From 1685f94ea3d069f1b9645b63d2e827ea2448c1e0 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Wed, 5 Apr 2017 17:32:56 +0300 Subject: [PATCH] BUGFIX: Solved an elusive bug in Earley parser, when empty rules repeat in the same column --- lark/parsers/earley.py | 4 ++-- tests/test_parser.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index b903167..95061e1 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -98,7 +98,7 @@ class Column: for item in items: if item.is_complete: - # XXX TODO Potential bug: What happens if there's ambiguity in an empty rule? + # XXX Potential bug: What happens if there's ambiguity in an empty rule? if item.rule.expansion and item in self.completed: old_tree = self.completed[item].tree if old_tree.data != 'ambig': @@ -110,7 +110,7 @@ class Column: old_tree.children.append(item.tree) else: self.completed[item] = item - self.to_reduce.append(item) + self.to_reduce.append(item) else: if item not in added: added.add(item) diff --git a/tests/test_parser.py b/tests/test_parser.py index 83085e2..3e3ee14 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -19,6 +19,7 @@ logging.basicConfig(level=logging.INFO) from lark.lark import Lark from lark.common import GrammarError, ParseError from lark.lexer import LexError +from lark.tree import Tree __path__ = os.path.dirname(__file__) def _read(n, *args): @@ -104,6 +105,21 @@ class TestEarley(unittest.TestCase): res = l.parse("aaa") self.assertEqual(res.children, ['aaa']) + def test_earley_repeating_empty(self): + # This was a sneaky bug! + + grammar = """ + !start: "a" empty empty "b" + empty: empty2 + empty2: + """ + + parser = Lark(grammar, parser='earley', lexer=None) + res = parser.parse('ab') + + empty_tree = Tree('empty', [Tree('empty2', [])]) + self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b']) + def _make_parser_test(LEXER, PARSER): def _Lark(grammar, **kwargs): return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs)