Browse Source

Validate against zero-width terminals in XEarley (Issue #63)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.3
Erez Shinan 6 years ago
parent
commit
d173d6d66b
2 changed files with 6 additions and 2 deletions
  1. +5
    -1
      lark/parser_frontends.py
  2. +1
    -1
      tests/test_parser.py

+ 5
- 1
lark/parser_frontends.py View File

@@ -122,9 +122,13 @@ class XEarley:
for t in lexer_conf.tokens: for t in lexer_conf.tokens:
regexp = t.pattern.to_regexp() regexp = t.pattern.to_regexp()
try: try:
assert get_regexp_width(regexp)
width = get_regexp_width(regexp)[0]
except ValueError: except ValueError:
raise ValueError("Bad regexp in token %s: %s" % (t.name, regexp)) raise ValueError("Bad regexp in token %s: %s" % (t.name, regexp))
else:
if width == 0:
raise ValueError("Dynamic Earley doesn't allow zero-width regexps")

self.regexps[t.name] = re.compile(regexp) self.regexps[t.name] = re.compile(regexp)


def parse(self, text): def parse(self, text):


+ 1
- 1
tests/test_parser.py View File

@@ -159,7 +159,7 @@ def _make_full_earley_test(LEXER):
# Fails an Earley implementation without special handling for empty rules, # Fails an Earley implementation without special handling for empty rules,
# or re-processing of already completed rules. # or re-processing of already completed rules.
g = Lark(r"""start: B g = Lark(r"""start: B
B: ("ab"|/[^b]/)*
B: ("ab"|/[^b]/)+
""", lexer=LEXER) """, lexer=LEXER)


self.assertEqual( g.parse('abc').children[0], 'abc') self.assertEqual( g.parse('abc').children[0], 'abc')


Loading…
Cancel
Save