Browse Source

BUGFIX: Solved an elusive bug in Earley parser, when empty rules repeat in the same column

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 years ago
parent
commit
1685f94ea3
2 changed files with 18 additions and 2 deletions
  1. +2
    -2
      lark/parsers/earley.py
  2. +16
    -0
      tests/test_parser.py

+ 2
- 2
lark/parsers/earley.py View File

@@ -98,7 +98,7 @@ class Column:
for item in items:

if item.is_complete:
# XXX TODO Potential bug: What happens if there's ambiguity in an empty rule?
# XXX Potential bug: What happens if there's ambiguity in an empty rule?
if item.rule.expansion and item in self.completed:
old_tree = self.completed[item].tree
if old_tree.data != 'ambig':
@@ -110,7 +110,7 @@ class Column:
old_tree.children.append(item.tree)
else:
self.completed[item] = item
self.to_reduce.append(item)
self.to_reduce.append(item)
else:
if item not in added:
added.add(item)


+ 16
- 0
tests/test_parser.py View File

@@ -19,6 +19,7 @@ logging.basicConfig(level=logging.INFO)
from lark.lark import Lark
from lark.common import GrammarError, ParseError
from lark.lexer import LexError
from lark.tree import Tree

__path__ = os.path.dirname(__file__)
def _read(n, *args):
@@ -104,6 +105,21 @@ class TestEarley(unittest.TestCase):
res = l.parse("aaa")
self.assertEqual(res.children, ['aaa'])

def test_earley_repeating_empty(self):
# This was a sneaky bug!

grammar = """
!start: "a" empty empty "b"
empty: empty2
empty2:
"""

parser = Lark(grammar, parser='earley', lexer=None)
res = parser.parse('ab')

empty_tree = Tree('empty', [Tree('empty2', [])])
self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])

def _make_parser_test(LEXER, PARSER):
def _Lark(grammar, **kwargs):
return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs)


Loading…
Cancel
Save