@@ -195,6 +195,86 @@ def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): | |||
if to_expand: | |||
return partial(AmbiguousExpander, to_expand, tree_class) | |||
class AmbiguousIntermediateExpander: | |||
""" | |||
Propagate ambiguous intermediate nodes and their derivations up to the | |||
current rule. | |||
In general, converts | |||
rule | |||
_iambig | |||
_inter | |||
someChildren1 | |||
... | |||
_inter | |||
someChildren2 | |||
... | |||
someChildren3 | |||
... | |||
to | |||
_ambig | |||
rule | |||
someChildren1 | |||
... | |||
someChildren3 | |||
... | |||
rule | |||
someChildren2 | |||
... | |||
someChildren3 | |||
... | |||
rule | |||
childrenFromNestedIambigs | |||
... | |||
someChildren3 | |||
... | |||
... | |||
propagating up any nested '_iambig' nodes along the way. | |||
""" | |||
def __init__(self, tree_class, node_builder): | |||
self.node_builder = node_builder | |||
self.tree_class = tree_class | |||
def __call__(self, children): | |||
def _is_iambig_tree(child): | |||
return hasattr(child, 'data') and child.data == '_iambig' | |||
def _collapse_iambig(children): | |||
""" | |||
Recursively flatten the derivations of the parent of an '_iambig' | |||
node. Returns a list of '_inter' nodes guaranteed not | |||
to contain any nested '_iambig' nodes, or None if children does | |||
not contain an '_iambig' node. | |||
""" | |||
# Due to the structure of the SPPF, | |||
# an '_iambig' node can only appear as the first child | |||
if children and _is_iambig_tree(children[0]): | |||
iambig_node = children[0] | |||
result = [] | |||
for grandchild in iambig_node.children: | |||
collapsed = _collapse_iambig(grandchild.children) | |||
if collapsed: | |||
for child in collapsed: | |||
child.children += children[1:] | |||
result += collapsed | |||
else: | |||
new_tree = self.tree_class('_inter', grandchild.children + children[1:]) | |||
result.append(new_tree) | |||
return result | |||
collapsed = _collapse_iambig(children) | |||
if collapsed: | |||
processed_nodes = [self.node_builder(c.children) for c in collapsed] | |||
return self.tree_class('_ambig', processed_nodes) | |||
return self.node_builder(children) | |||
def ptb_inline_args(func): | |||
@wraps(func) | |||
def f(children): | |||
@@ -239,6 +319,7 @@ class ParseTreeBuilder: | |||
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), | |||
self.propagate_positions and PropagatePositions, | |||
self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), | |||
self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class) | |||
])) | |||
yield rule, wrapper_chain | |||
@@ -18,7 +18,7 @@ from ..utils import logger | |||
from .grammar_analysis import GrammarAnalyzer | |||
from ..grammar import NonTerminal | |||
from .earley_common import Item, TransitiveItem | |||
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, ForestToAmbiguousTreeVisitor | |||
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, CompleteForestToAmbiguousTreeVisitor | |||
class Parser: | |||
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False): | |||
@@ -313,7 +313,7 @@ class Parser: | |||
assert False, 'Earley should not generate multiple start symbol items!' | |||
# Perform our SPPF -> AST conversion using the right ForestVisitor. | |||
forest_tree_visitor_cls = ForestToTreeVisitor if self.resolve_ambiguity else ForestToAmbiguousTreeVisitor | |||
forest_tree_visitor_cls = ForestToTreeVisitor if self.resolve_ambiguity else CompleteForestToAmbiguousTreeVisitor | |||
forest_tree_visitor = forest_tree_visitor_cls(self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor()) | |||
return forest_tree_visitor.visit(solutions[0]) | |||
@@ -363,6 +363,75 @@ class ForestToAmbiguousTreeVisitor(ForestToTreeVisitor): | |||
else: | |||
self.result = result | |||
class CompleteForestToAmbiguousTreeVisitor(ForestToTreeVisitor): | |||
""" | |||
An augmented version of ForestToAmbiguousTreeVisitor that is designed to | |||
handle ambiguous intermediate nodes as well as ambiguous symbol nodes. | |||
On the way down: | |||
- When an ambiguous intermediate node is encountered, an '_iambig' node | |||
is inserted into the tree. | |||
- Each possible derivation of an ambiguous intermediate node is represented | |||
by an '_inter' node added as a child of the corresponding '_iambig' node. | |||
On the way up, these nodes are propagated up the tree and collapsed | |||
into a single '_ambig' node for the nearest symbol node ancestor. | |||
This is achieved by the AmbiguousIntermediateExpander contained in | |||
the callbacks. | |||
""" | |||
def _collapse_ambig(self, children): | |||
new_children = [] | |||
for child in children: | |||
if child.data == '_ambig': | |||
new_children += child.children | |||
else: | |||
new_children.append(child) | |||
return new_children | |||
def visit_token_node(self, node): | |||
self.output_stack[-1].children.append(node) | |||
def visit_symbol_node_in(self, node): | |||
if node.is_ambiguous: | |||
if self.forest_sum_visitor and isinf(node.priority): | |||
self.forest_sum_visitor.visit(node) | |||
if node.is_intermediate: | |||
self.output_stack.append(Tree('_iambig', [])) | |||
else: | |||
self.output_stack.append(Tree('_ambig', [])) | |||
return iter(node.children) | |||
def visit_symbol_node_out(self, node): | |||
if node.is_ambiguous: | |||
result = self.output_stack.pop() | |||
if not node.is_intermediate: | |||
result = Tree('_ambig', self._collapse_ambig(result.children)) | |||
if self.output_stack: | |||
self.output_stack[-1].children.append(result) | |||
else: | |||
self.result = result | |||
def visit_packed_node_in(self, node): | |||
if not node.parent.is_intermediate: | |||
self.output_stack.append(Tree('drv', [])) | |||
elif node.parent.is_ambiguous: | |||
self.output_stack.append(Tree('_inter', [])) | |||
return iter([node.left, node.right]) | |||
def visit_packed_node_out(self, node): | |||
if not node.parent.is_intermediate: | |||
result = self.callbacks[node.rule](self.output_stack.pop().children) | |||
elif node.parent.is_ambiguous: | |||
result = self.output_stack.pop() | |||
else: | |||
return | |||
if self.output_stack: | |||
self.output_stack[-1].children.append(result) | |||
else: | |||
self.result = result | |||
class ForestToPyDotVisitor(ForestVisitor): | |||
""" | |||
A Forest visitor which writes the SPPF to a PNG. | |||
@@ -460,6 +460,221 @@ def _make_full_earley_test(LEXER): | |||
]) | |||
self.assertEqual(res, expected) | |||
def test_ambiguous_intermediate_node(self): | |||
grammar = """ | |||
start: ab bc d? | |||
!ab: "A" "B"? | |||
!bc: "B"? "C" | |||
!d: "D" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
ambig_tree = l.parse("ABCD") | |||
expected = { | |||
Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('d', ['D'])]), | |||
Tree('start', [Tree('ab', ['A', 'B']), Tree('bc', ['C']), Tree('d', ['D'])]) | |||
} | |||
self.assertEqual(ambig_tree.data, '_ambig') | |||
self.assertEqual(set(ambig_tree.children), expected) | |||
def test_ambiguous_symbol_and_intermediate_nodes(self): | |||
grammar = """ | |||
start: ab bc cd | |||
!ab: "A" "B"? | |||
!bc: "B"? "C"? | |||
!cd: "C"? "D" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
ambig_tree = l.parse("ABCD") | |||
expected = { | |||
Tree('start', [ | |||
Tree('ab', ['A', 'B']), | |||
Tree('bc', ['C']), | |||
Tree('cd', ['D']) | |||
]), | |||
Tree('start', [ | |||
Tree('ab', ['A', 'B']), | |||
Tree('bc', []), | |||
Tree('cd', ['C', 'D']) | |||
]), | |||
Tree('start', [ | |||
Tree('ab', ['A']), | |||
Tree('bc', ['B', 'C']), | |||
Tree('cd', ['D']) | |||
]), | |||
Tree('start', [ | |||
Tree('ab', ['A']), | |||
Tree('bc', ['B']), | |||
Tree('cd', ['C', 'D']) | |||
]), | |||
} | |||
self.assertEqual(ambig_tree.data, '_ambig') | |||
self.assertEqual(set(ambig_tree.children), expected) | |||
def test_nested_ambiguous_intermediate_nodes(self): | |||
grammar = """ | |||
start: ab bc cd e? | |||
!ab: "A" "B"? | |||
!bc: "B"? "C"? | |||
!cd: "C"? "D" | |||
!e: "E" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
ambig_tree = l.parse("ABCDE") | |||
expected = { | |||
Tree('start', [ | |||
Tree('ab', ['A', 'B']), | |||
Tree('bc', ['C']), | |||
Tree('cd', ['D']), | |||
Tree('e', ['E']) | |||
]), | |||
Tree('start', [ | |||
Tree('ab', ['A']), | |||
Tree('bc', ['B', 'C']), | |||
Tree('cd', ['D']), | |||
Tree('e', ['E']) | |||
]), | |||
Tree('start', [ | |||
Tree('ab', ['A']), | |||
Tree('bc', ['B']), | |||
Tree('cd', ['C', 'D']), | |||
Tree('e', ['E']) | |||
]), | |||
Tree('start', [ | |||
Tree('ab', ['A', 'B']), | |||
Tree('bc', []), | |||
Tree('cd', ['C', 'D']), | |||
Tree('e', ['E']) | |||
]), | |||
} | |||
self.assertEqual(ambig_tree.data, '_ambig') | |||
self.assertEqual(set(ambig_tree.children), expected) | |||
def test_nested_ambiguous_intermediate_nodes2(self): | |||
grammar = """ | |||
start: ab bc cd de f | |||
!ab: "A" "B"? | |||
!bc: "B"? "C"? | |||
!cd: "C"? "D"? | |||
!de: "D"? "E" | |||
!f: "F" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
ambig_tree = l.parse("ABCDEF") | |||
expected = { | |||
Tree('start', [ | |||
Tree('ab', ['A', 'B']), | |||
Tree('bc', ['C']), | |||
Tree('cd', ['D']), | |||
Tree('de', ['E']), | |||
Tree('f', ['F']), | |||
]), | |||
Tree('start', [ | |||
Tree('ab', ['A']), | |||
Tree('bc', ['B', 'C']), | |||
Tree('cd', ['D']), | |||
Tree('de', ['E']), | |||
Tree('f', ['F']), | |||
]), | |||
Tree('start', [ | |||
Tree('ab', ['A']), | |||
Tree('bc', ['B']), | |||
Tree('cd', ['C', 'D']), | |||
Tree('de', ['E']), | |||
Tree('f', ['F']), | |||
]), | |||
Tree('start', [ | |||
Tree('ab', ['A']), | |||
Tree('bc', ['B']), | |||
Tree('cd', ['C']), | |||
Tree('de', ['D', 'E']), | |||
Tree('f', ['F']), | |||
]), | |||
Tree('start', [ | |||
Tree('ab', ['A', "B"]), | |||
Tree('bc', []), | |||
Tree('cd', ['C']), | |||
Tree('de', ['D', 'E']), | |||
Tree('f', ['F']), | |||
]), | |||
Tree('start', [ | |||
Tree('ab', ['A']), | |||
Tree('bc', ['B', 'C']), | |||
Tree('cd', []), | |||
Tree('de', ['D', 'E']), | |||
Tree('f', ['F']), | |||
]), | |||
Tree('start', [ | |||
Tree('ab', ['A', 'B']), | |||
Tree('bc', []), | |||
Tree('cd', ['C', 'D']), | |||
Tree('de', ['E']), | |||
Tree('f', ['F']), | |||
]), | |||
Tree('start', [ | |||
Tree('ab', ['A', 'B']), | |||
Tree('bc', ['C']), | |||
Tree('cd', []), | |||
Tree('de', ['D', 'E']), | |||
Tree('f', ['F']), | |||
]), | |||
} | |||
self.assertEqual(ambig_tree.data, '_ambig') | |||
self.assertEqual(set(ambig_tree.children), expected) | |||
def test_ambiguous_intermediate_node_unnamed_token(self): | |||
grammar = """ | |||
start: ab bc "D" | |||
!ab: "A" "B"? | |||
!bc: "B"? "C" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
ambig_tree = l.parse("ABCD") | |||
expected = { | |||
Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C'])]), | |||
Tree('start', [Tree('ab', ['A', 'B']), Tree('bc', ['C'])]) | |||
} | |||
self.assertEqual(ambig_tree.data, '_ambig') | |||
self.assertEqual(set(ambig_tree.children), expected) | |||
def test_ambiguous_intermediate_node_inlined_rule(self): | |||
grammar = """ | |||
start: ab _bc d? | |||
!ab: "A" "B"? | |||
_bc: "B"? "C" | |||
!d: "D" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
ambig_tree = l.parse("ABCD") | |||
expected = { | |||
Tree('start', [Tree('ab', ['A']), Tree('d', ['D'])]), | |||
Tree('start', [Tree('ab', ['A', 'B']), Tree('d', ['D'])]) | |||
} | |||
self.assertEqual(ambig_tree.data, '_ambig') | |||
self.assertEqual(set(ambig_tree.children), expected) | |||
def test_ambiguous_intermediate_node_conditionally_inlined_rule(self): | |||
grammar = """ | |||
start: ab bc d? | |||
!ab: "A" "B"? | |||
!?bc: "B"? "C" | |||
!d: "D" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
ambig_tree = l.parse("ABCD") | |||
expected = { | |||
Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('d', ['D'])]), | |||
Tree('start', [Tree('ab', ['A', 'B']), 'C', Tree('d', ['D'])]) | |||
} | |||
self.assertEqual(ambig_tree.data, '_ambig') | |||
self.assertEqual(set(ambig_tree.children), expected) | |||
def test_fruitflies_ambig(self): | |||
grammar = """ | |||
start: noun verb noun -> simple | |||