From c017525e50d92bad9b1219f0b9148ed33bce602d Mon Sep 17 00:00:00 2001 From: Chanic Panic Date: Mon, 2 Nov 2020 21:56:10 -0800 Subject: [PATCH 1/3] Fix issues with SPPF cycle handling --- lark/parsers/earley_forest.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/lark/parsers/earley_forest.py b/lark/parsers/earley_forest.py index 532dedf..630ea63 100644 --- a/lark/parsers/earley_forest.py +++ b/lark/parsers/earley_forest.py @@ -490,19 +490,22 @@ class ForestToParseTree(ForestTransformer): self.prioritizer = prioritizer self.resolve_ambiguity = resolve_ambiguity self._on_cycle_retreat = False + self._cycle_node = None + self._successful_visits = set() def on_cycle(self, node, path): logger.warning("Cycle encountered in the SPPF at node: %s. " "As infinite ambiguities cannot be represented in a tree, " "this family of derivations will be discarded.", node) - if self.resolve_ambiguity: - # TODO: choose a different path if cycle is encountered - logger.warning("At this time, using ambiguity resolution for SPPFs " - "with cycles may result in None being returned.") + self._cycle_node = node self._on_cycle_retreat = True def _check_cycle(self, node): if self._on_cycle_retreat: + if id(node) == id(self._cycle_node): + self._cycle_node = None + self._on_cycle_retreat = False + return raise Discard() def _collapse_ambig(self, children): @@ -531,11 +534,17 @@ class ForestToParseTree(ForestTransformer): raise Discard() def transform_symbol_node(self, node, data): + if id(node) not in self._successful_visits: + raise Discard() + self._successful_visits.remove(id(node)) self._check_cycle(node) data = self._collapse_ambig(data) return self._call_ambig_func(node, data) def transform_intermediate_node(self, node, data): + if id(node) not in self._successful_visits: + raise Discard() + self._successful_visits.remove(id(node)) self._check_cycle(node) if len(data) > 1: children = [self.tree_class('_inter', c) for c in data] @@ -544,6 +553,8 @@ class ForestToParseTree(ForestTransformer): def transform_packed_node(self, node, data): self._check_cycle(node) + if self.resolve_ambiguity and id(node.parent) in self._successful_visits: + raise Discard() children = [] assert len(data) <= 2 data = PackedData(node, data) @@ -559,21 +570,23 @@ class ForestToParseTree(ForestTransformer): return self._call_rule_func(node, children) def visit_symbol_node_in(self, node): - self._on_cycle_retreat = False super(ForestToParseTree, self).visit_symbol_node_in(node) + if self._on_cycle_retreat: + return if self.prioritizer and node.is_ambiguous and isinf(node.priority): self.prioritizer.visit(node) - if self.resolve_ambiguity: - return node.children[0] return node.children def visit_packed_node_in(self, node): self._on_cycle_retreat = False - return super(ForestToParseTree, self).visit_packed_node_in(node) + to_visit = super(ForestToParseTree, self).visit_packed_node_in(node) + if not self.resolve_ambiguity or id(node.parent) not in self._successful_visits: + return to_visit - def visit_token_node(self, node): - self._on_cycle_retreat = False - return super(ForestToParseTree, self).visit_token_node(node) + def visit_packed_node_out(self, node): + super(ForestToParseTree, self).visit_packed_node_out(node) + if not self._on_cycle_retreat: + self._successful_visits.add(id(node.parent)) def handles_ambiguity(func): """Decorator for methods of subclasses of ``TreeForestTransformer``. From d6ef5391991fdf5b856e95eb34f854a5d94adc8c Mon Sep 17 00:00:00 2001 From: Chanic Panic Date: Mon, 2 Nov 2020 22:36:31 -0800 Subject: [PATCH 2/3] Add tests for SPPF cycles --- tests/test_parser.py | 70 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/tests/test_parser.py b/tests/test_parser.py index 32aa4fc..6d0981f 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -746,6 +746,76 @@ def _make_full_earley_test(LEXER): tree = parser.parse(text) self.assertEqual(tree.children, ['foo', 'bar']) + def test_cycle(self): + grammar = """ + start: start? + """ + + l = Lark(grammar, ambiguity='resolve', lexer=LEXER) + tree = l.parse('') + self.assertEqual(tree, Tree('start', [])) + + l = Lark(grammar, ambiguity='explicit', lexer=LEXER) + tree = l.parse('') + self.assertEqual(tree, Tree('start', [])) + + def test_cycles(self): + grammar = """ + a: b + b: c* + c: a + """ + + l = Lark(grammar, start='a', ambiguity='resolve', lexer=LEXER) + tree = l.parse('') + self.assertEqual(tree, Tree('a', [Tree('b', [])])) + + l = Lark(grammar, start='a', ambiguity='explicit', lexer=LEXER) + tree = l.parse('') + self.assertEqual(tree, Tree('a', [Tree('b', [])])) + + def test_many_cycles(self): + grammar = """ + start: a? | start start + !a: "a" + """ + + l = Lark(grammar, ambiguity='resolve', lexer=LEXER) + tree = l.parse('a') + self.assertEqual(tree, Tree('start', [Tree('a', ['a'])])) + + l = Lark(grammar, ambiguity='explicit', lexer=LEXER) + tree = l.parse('a') + self.assertEqual(tree, Tree('start', [Tree('a', ['a'])])) + + def test_cycles_with_child_filter(self): + grammar = """ + a: _x + _x: _x? b + b: + """ + + grammar2 = """ + a: x + x: x? b + b: + """ + + l = Lark(grammar, start='a', ambiguity='resolve', lexer=LEXER) + tree = l.parse('') + self.assertEqual(tree, Tree('a', [Tree('b', [])])) + + l = Lark(grammar, start='a', ambiguity='explicit', lexer=LEXER) + tree = l.parse(''); + self.assertEqual(tree, Tree('a', [Tree('b', [])])) + + l = Lark(grammar2, start='a', ambiguity='resolve', lexer=LEXER) + tree = l.parse(''); + self.assertEqual(tree, Tree('a', [Tree('x', [Tree('b', [])])])) + + l = Lark(grammar2, start='a', ambiguity='explicit', lexer=LEXER) + tree = l.parse(''); + self.assertEqual(tree, Tree('a', [Tree('x', [Tree('b', [])])])) From e3c7564fa02d3f5f1f71494588dbd80a60c8525a Mon Sep 17 00:00:00 2001 From: Chanic Panic Date: Sat, 7 Nov 2020 19:38:13 -0800 Subject: [PATCH 3/3] Add debug attribute to ForestToParseTree --- lark/parsers/earley.py | 2 +- lark/parsers/earley_forest.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index 3ccba01..d6afa71 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -315,7 +315,7 @@ class Parser: if self.tree_class is not None: # Perform our SPPF -> AST conversion - transformer = ForestToParseTree(self.tree_class, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity) + transformer = ForestToParseTree(self.tree_class, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity, self.debug) return transformer.transform(solutions[0]) # return the root of the SPPF diff --git a/lark/parsers/earley_forest.py b/lark/parsers/earley_forest.py index 630ea63..7a56bbc 100644 --- a/lark/parsers/earley_forest.py +++ b/lark/parsers/earley_forest.py @@ -483,20 +483,22 @@ class ForestToParseTree(ForestTransformer): tree. """ - def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True): + def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True, debug=False): super(ForestToParseTree, self).__init__() self.tree_class = tree_class self.callbacks = callbacks self.prioritizer = prioritizer self.resolve_ambiguity = resolve_ambiguity + self.debug = debug self._on_cycle_retreat = False self._cycle_node = None self._successful_visits = set() def on_cycle(self, node, path): - logger.warning("Cycle encountered in the SPPF at node: %s. " - "As infinite ambiguities cannot be represented in a tree, " - "this family of derivations will be discarded.", node) + if self.debug: + logger.warning("Cycle encountered in the SPPF at node: %s. " + "As infinite ambiguities cannot be represented in a tree, " + "this family of derivations will be discarded.", node) self._cycle_node = node self._on_cycle_retreat = True