diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index 5a7c5d7..8b81d29 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -195,6 +195,86 @@ def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): if to_expand: return partial(AmbiguousExpander, to_expand, tree_class) +class AmbiguousIntermediateExpander: + """ + Propagate ambiguous intermediate nodes and their derivations up to the + current rule. + + In general, converts + + rule + _iambig + _inter + someChildren1 + ... + _inter + someChildren2 + ... + someChildren3 + ... + + to + + _ambig + rule + someChildren1 + ... + someChildren3 + ... + rule + someChildren2 + ... + someChildren3 + ... + rule + childrenFromNestedIambigs + ... + someChildren3 + ... + ... + + propagating up any nested '_iambig' nodes along the way. + """ + + def __init__(self, tree_class, node_builder): + self.node_builder = node_builder + self.tree_class = tree_class + + def __call__(self, children): + def _is_iambig_tree(child): + return hasattr(child, 'data') and child.data == '_iambig' + + def _collapse_iambig(children): + """ + Recursively flatten the derivations of the parent of an '_iambig' + node. Returns a list of '_inter' nodes guaranteed not + to contain any nested '_iambig' nodes, or None if children does + not contain an '_iambig' node. + """ + + # Due to the structure of the SPPF, + # an '_iambig' node can only appear as the first child + if children and _is_iambig_tree(children[0]): + iambig_node = children[0] + result = [] + for grandchild in iambig_node.children: + collapsed = _collapse_iambig(grandchild.children) + if collapsed: + for child in collapsed: + child.children += children[1:] + result += collapsed + else: + new_tree = self.tree_class('_inter', grandchild.children + children[1:]) + result.append(new_tree) + return result + + collapsed = _collapse_iambig(children) + if collapsed: + processed_nodes = [self.node_builder(c.children) for c in collapsed] + return self.tree_class('_ambig', processed_nodes) + + return self.node_builder(children) + def ptb_inline_args(func): @wraps(func) def f(children): @@ -239,6 +319,7 @@ class ParseTreeBuilder: maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), self.propagate_positions and PropagatePositions, self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), + self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class) ])) yield rule, wrapper_chain diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index 098639d..bcb568f 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -18,7 +18,7 @@ from ..utils import logger from .grammar_analysis import GrammarAnalyzer from ..grammar import NonTerminal from .earley_common import Item, TransitiveItem -from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, ForestToAmbiguousTreeVisitor +from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, CompleteForestToAmbiguousTreeVisitor class Parser: def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False): @@ -313,7 +313,7 @@ class Parser: assert False, 'Earley should not generate multiple start symbol items!' # Perform our SPPF -> AST conversion using the right ForestVisitor. - forest_tree_visitor_cls = ForestToTreeVisitor if self.resolve_ambiguity else ForestToAmbiguousTreeVisitor + forest_tree_visitor_cls = ForestToTreeVisitor if self.resolve_ambiguity else CompleteForestToAmbiguousTreeVisitor forest_tree_visitor = forest_tree_visitor_cls(self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor()) return forest_tree_visitor.visit(solutions[0]) diff --git a/lark/parsers/earley_forest.py b/lark/parsers/earley_forest.py index 4ed75d9..b39d02e 100644 --- a/lark/parsers/earley_forest.py +++ b/lark/parsers/earley_forest.py @@ -363,6 +363,75 @@ class ForestToAmbiguousTreeVisitor(ForestToTreeVisitor): else: self.result = result +class CompleteForestToAmbiguousTreeVisitor(ForestToTreeVisitor): + """ + An augmented version of ForestToAmbiguousTreeVisitor that is designed to + handle ambiguous intermediate nodes as well as ambiguous symbol nodes. + + On the way down: + + - When an ambiguous intermediate node is encountered, an '_iambig' node + is inserted into the tree. + - Each possible derivation of an ambiguous intermediate node is represented + by an '_inter' node added as a child of the corresponding '_iambig' node. + + On the way up, these nodes are propagated up the tree and collapsed + into a single '_ambig' node for the nearest symbol node ancestor. + This is achieved by the AmbiguousIntermediateExpander contained in + the callbacks. + """ + + def _collapse_ambig(self, children): + new_children = [] + for child in children: + if child.data == '_ambig': + new_children += child.children + else: + new_children.append(child) + return new_children + + def visit_token_node(self, node): + self.output_stack[-1].children.append(node) + + def visit_symbol_node_in(self, node): + if node.is_ambiguous: + if self.forest_sum_visitor and isinf(node.priority): + self.forest_sum_visitor.visit(node) + if node.is_intermediate: + self.output_stack.append(Tree('_iambig', [])) + else: + self.output_stack.append(Tree('_ambig', [])) + return iter(node.children) + + def visit_symbol_node_out(self, node): + if node.is_ambiguous: + result = self.output_stack.pop() + if not node.is_intermediate: + result = Tree('_ambig', self._collapse_ambig(result.children)) + if self.output_stack: + self.output_stack[-1].children.append(result) + else: + self.result = result + + def visit_packed_node_in(self, node): + if not node.parent.is_intermediate: + self.output_stack.append(Tree('drv', [])) + elif node.parent.is_ambiguous: + self.output_stack.append(Tree('_inter', [])) + return iter([node.left, node.right]) + + def visit_packed_node_out(self, node): + if not node.parent.is_intermediate: + result = self.callbacks[node.rule](self.output_stack.pop().children) + elif node.parent.is_ambiguous: + result = self.output_stack.pop() + else: + return + if self.output_stack: + self.output_stack[-1].children.append(result) + else: + self.result = result + class ForestToPyDotVisitor(ForestVisitor): """ A Forest visitor which writes the SPPF to a PNG. diff --git a/tests/test_parser.py b/tests/test_parser.py index 2f6a15e..83336c5 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -460,6 +460,221 @@ def _make_full_earley_test(LEXER): ]) self.assertEqual(res, expected) + def test_ambiguous_intermediate_node(self): + grammar = """ + start: ab bc d? + !ab: "A" "B"? + !bc: "B"? "C" + !d: "D" + """ + + l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) + ambig_tree = l.parse("ABCD") + expected = { + Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('d', ['D'])]), + Tree('start', [Tree('ab', ['A', 'B']), Tree('bc', ['C']), Tree('d', ['D'])]) + } + self.assertEqual(ambig_tree.data, '_ambig') + self.assertEqual(set(ambig_tree.children), expected) + + def test_ambiguous_symbol_and_intermediate_nodes(self): + grammar = """ + start: ab bc cd + !ab: "A" "B"? + !bc: "B"? "C"? + !cd: "C"? "D" + """ + + l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) + ambig_tree = l.parse("ABCD") + expected = { + Tree('start', [ + Tree('ab', ['A', 'B']), + Tree('bc', ['C']), + Tree('cd', ['D']) + ]), + Tree('start', [ + Tree('ab', ['A', 'B']), + Tree('bc', []), + Tree('cd', ['C', 'D']) + ]), + Tree('start', [ + Tree('ab', ['A']), + Tree('bc', ['B', 'C']), + Tree('cd', ['D']) + ]), + Tree('start', [ + Tree('ab', ['A']), + Tree('bc', ['B']), + Tree('cd', ['C', 'D']) + ]), + } + self.assertEqual(ambig_tree.data, '_ambig') + self.assertEqual(set(ambig_tree.children), expected) + + def test_nested_ambiguous_intermediate_nodes(self): + grammar = """ + start: ab bc cd e? + !ab: "A" "B"? + !bc: "B"? "C"? + !cd: "C"? "D" + !e: "E" + """ + + l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) + ambig_tree = l.parse("ABCDE") + expected = { + Tree('start', [ + Tree('ab', ['A', 'B']), + Tree('bc', ['C']), + Tree('cd', ['D']), + Tree('e', ['E']) + ]), + Tree('start', [ + Tree('ab', ['A']), + Tree('bc', ['B', 'C']), + Tree('cd', ['D']), + Tree('e', ['E']) + ]), + Tree('start', [ + Tree('ab', ['A']), + Tree('bc', ['B']), + Tree('cd', ['C', 'D']), + Tree('e', ['E']) + ]), + Tree('start', [ + Tree('ab', ['A', 'B']), + Tree('bc', []), + Tree('cd', ['C', 'D']), + Tree('e', ['E']) + ]), + } + self.assertEqual(ambig_tree.data, '_ambig') + self.assertEqual(set(ambig_tree.children), expected) + + def test_nested_ambiguous_intermediate_nodes2(self): + grammar = """ + start: ab bc cd de f + !ab: "A" "B"? + !bc: "B"? "C"? + !cd: "C"? "D"? + !de: "D"? "E" + !f: "F" + """ + + l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) + ambig_tree = l.parse("ABCDEF") + expected = { + Tree('start', [ + Tree('ab', ['A', 'B']), + Tree('bc', ['C']), + Tree('cd', ['D']), + Tree('de', ['E']), + Tree('f', ['F']), + ]), + Tree('start', [ + Tree('ab', ['A']), + Tree('bc', ['B', 'C']), + Tree('cd', ['D']), + Tree('de', ['E']), + Tree('f', ['F']), + ]), + Tree('start', [ + Tree('ab', ['A']), + Tree('bc', ['B']), + Tree('cd', ['C', 'D']), + Tree('de', ['E']), + Tree('f', ['F']), + ]), + Tree('start', [ + Tree('ab', ['A']), + Tree('bc', ['B']), + Tree('cd', ['C']), + Tree('de', ['D', 'E']), + Tree('f', ['F']), + ]), + Tree('start', [ + Tree('ab', ['A', "B"]), + Tree('bc', []), + Tree('cd', ['C']), + Tree('de', ['D', 'E']), + Tree('f', ['F']), + ]), + Tree('start', [ + Tree('ab', ['A']), + Tree('bc', ['B', 'C']), + Tree('cd', []), + Tree('de', ['D', 'E']), + Tree('f', ['F']), + ]), + Tree('start', [ + Tree('ab', ['A', 'B']), + Tree('bc', []), + Tree('cd', ['C', 'D']), + Tree('de', ['E']), + Tree('f', ['F']), + ]), + Tree('start', [ + Tree('ab', ['A', 'B']), + Tree('bc', ['C']), + Tree('cd', []), + Tree('de', ['D', 'E']), + Tree('f', ['F']), + ]), + } + self.assertEqual(ambig_tree.data, '_ambig') + self.assertEqual(set(ambig_tree.children), expected) + + def test_ambiguous_intermediate_node_unnamed_token(self): + grammar = """ + start: ab bc "D" + !ab: "A" "B"? + !bc: "B"? "C" + """ + + l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) + ambig_tree = l.parse("ABCD") + expected = { + Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C'])]), + Tree('start', [Tree('ab', ['A', 'B']), Tree('bc', ['C'])]) + } + self.assertEqual(ambig_tree.data, '_ambig') + self.assertEqual(set(ambig_tree.children), expected) + + def test_ambiguous_intermediate_node_inlined_rule(self): + grammar = """ + start: ab _bc d? + !ab: "A" "B"? + _bc: "B"? "C" + !d: "D" + """ + + l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) + ambig_tree = l.parse("ABCD") + expected = { + Tree('start', [Tree('ab', ['A']), Tree('d', ['D'])]), + Tree('start', [Tree('ab', ['A', 'B']), Tree('d', ['D'])]) + } + self.assertEqual(ambig_tree.data, '_ambig') + self.assertEqual(set(ambig_tree.children), expected) + + def test_ambiguous_intermediate_node_conditionally_inlined_rule(self): + grammar = """ + start: ab bc d? + !ab: "A" "B"? + !?bc: "B"? "C" + !d: "D" + """ + + l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) + ambig_tree = l.parse("ABCD") + expected = { + Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('d', ['D'])]), + Tree('start', [Tree('ab', ['A', 'B']), 'C', Tree('d', ['D'])]) + } + self.assertEqual(ambig_tree.data, '_ambig') + self.assertEqual(set(ambig_tree.children), expected) + def test_fruitflies_ambig(self): grammar = """ start: noun verb noun -> simple