This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

73 lines
1.9 KiB

  1. """
  2. Custom SPPF Prioritizer
  3. =======================
  4. This example demonstrates how to subclass ``ForestVisitor`` to make a custom
  5. SPPF node prioritizer to be used in conjunction with ``TreeForestTransformer``.
  6. Our prioritizer will count the number of descendants of a node that are tokens.
  7. By negating this count, our prioritizer will prefer nodes with fewer token
  8. descendants. Thus, we choose the more specific parse.
  9. """
  10. from lark import Lark
  11. from lark.parsers.earley_forest import ForestVisitor, TreeForestTransformer
  12. class TokenPrioritizer(ForestVisitor):
  13. def visit_symbol_node_in(self, node):
  14. # visit the entire forest by returning node.children
  15. return node.children
  16. def visit_packed_node_in(self, node):
  17. return node.children
  18. def visit_symbol_node_out(self, node):
  19. priority = 0
  20. for child in node.children:
  21. # Tokens do not have a priority attribute
  22. # count them as -1
  23. priority += getattr(child, 'priority', -1)
  24. node.priority = priority
  25. def visit_packed_node_out(self, node):
  26. priority = 0
  27. for child in node.children:
  28. priority += getattr(child, 'priority', -1)
  29. node.priority = priority
  30. def on_cycle(self, node, path):
  31. raise Exception("Oops, we encountered a cycle.")
  32. grammar = """
  33. start: hello " " world | hello_world
  34. hello: "Hello"
  35. world: "World"
  36. hello_world: "Hello World"
  37. """
  38. parser = Lark(grammar, parser='earley', ambiguity='forest')
  39. forest = parser.parse("Hello World")
  40. print("Default prioritizer:")
  41. tree = TreeForestTransformer(resolve_ambiguity=True).transform(forest)
  42. print(tree.pretty())
  43. forest = parser.parse("Hello World")
  44. print("Custom prioritizer:")
  45. tree = TreeForestTransformer(resolve_ambiguity=True, prioritizer=TokenPrioritizer()).transform(forest)
  46. print(tree.pretty())
  47. # Output:
  48. #
  49. # Default prioritizer:
  50. # start
  51. # hello Hello
  52. #
  53. # world World
  54. #
  55. # Custom prioritizer:
  56. # start
  57. # hello_world Hello World