Validate against zero-width terminals in XEarley (Issue #63)

6 years ago · d173d6d66b
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -122,9 +122,13 @@ class XEarley:
        for t in lexer_conf.tokens:
            regexp = t.pattern.to_regexp()
            try:
                assert get_regexp_width(regexp)
                width = get_regexp_width(regexp)[0]
            except ValueError:
                raise ValueError("Bad regexp in token %s: %s" % (t.name, regexp))
            else:
                if width == 0:
                    raise ValueError("Dynamic Earley doesn't allow zero-width regexps")
            self.regexps[t.name] = re.compile(regexp)
    def parse(self, text):
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -159,7 +159,7 @@ def _make_full_earley_test(LEXER):
            # Fails an Earley implementation without special handling for empty rules,
            # or re-processing of already completed rules.
            g = Lark(r"""start: B
                         B: ("ab"|/[^b]/)*
                         B: ("ab"|/[^b]/)+
                      """, lexer=LEXER)
            self.assertEqual( g.parse('abc').children[0], 'abc')