""" Custom SPPF Prioritizer ======================= This example demonstrates how to subclass ``ForestVisitor`` to make a custom SPPF node prioritizer to be used in conjunction with ``TreeForestTransformer``. Our prioritizer will count the number of descendants of a node that are tokens. By negating this count, our prioritizer will prefer nodes with fewer token descendants. Thus, we choose the more specific parse. """ from lark import Lark from lark.parsers.earley_forest import ForestVisitor, TreeForestTransformer class TokenPrioritizer(ForestVisitor): def visit_symbol_node_in(self, node): # visit the entire forest by returning node.children return node.children def visit_packed_node_in(self, node): return node.children def visit_symbol_node_out(self, node): priority = 0 for child in node.children: # Tokens do not have a priority attribute # count them as -1 priority += getattr(child, 'priority', -1) node.priority = priority def visit_packed_node_out(self, node): priority = 0 for child in node.children: priority += getattr(child, 'priority', -1) node.priority = priority def on_cycle(self, node, path): raise Exception("Oops, we encountered a cycle.") grammar = """ start: hello " " world | hello_world hello: "Hello" world: "World" hello_world: "Hello World" """ parser = Lark(grammar, parser='earley', ambiguity='forest') forest = parser.parse("Hello World") print("Default prioritizer:") tree = TreeForestTransformer(resolve_ambiguity=True).transform(forest) print(tree.pretty()) forest = parser.parse("Hello World") print("Custom prioritizer:") tree = TreeForestTransformer(resolve_ambiguity=True, prioritizer=TokenPrioritizer()).transform(forest) print(tree.pretty()) # Output: # # Default prioritizer: # start # hello Hello # # world World # # Custom prioritizer: # start # hello_world Hello World