From c1fbb3214c621015fd0693d92721d8fdee8294ae Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Wed, 1 Mar 2017 18:14:47 +0200 Subject: [PATCH] Earley.lexer = None as default now --- examples/conf_nolex.py | 2 +- lark/lark.py | 4 ++-- tests/test_parser.py | 32 +++++++++++++++----------------- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/examples/conf_nolex.py b/examples/conf_nolex.py index 6c16baf..b30087b 100644 --- a/examples/conf_nolex.py +++ b/examples/conf_nolex.py @@ -25,7 +25,7 @@ parser = Lark(r""" %import common.WS_INLINE %ignore WS_INLINE - """, lexer=None) + """) def test(): sample_conf = """ diff --git a/lark/lark.py b/lark/lark.py index aa0c210..7bc546b 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -22,7 +22,7 @@ class LarkOptions(object): Note: "lalr" requires a lexer lexer - Decides whether or not to use a lexer stage - None: Don't use a lexer + None: Don't use a lexer (scanless, only works with parser="earley") "standard": Use a standard lexer "contextual": Stronger lexer (only works with parser="lalr") "auto" (default): Choose for me based on grammar and parser @@ -124,7 +124,7 @@ class Lark: if self.options.parser == 'lalr': lexer = 'standard' elif self.options.parser == 'earley': - lexer = 'standard' + lexer = None self.options.lexer = lexer self.grammar = load_grammar(grammar, source) diff --git a/tests/test_parser.py b/tests/test_parser.py index fc1691a..0b1f342 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -35,19 +35,11 @@ class TestParsers(unittest.TestCase): g = Lark("""start: "(" name_list ("," "*" NAME)? ")" name_list: NAME | name_list "," NAME - NAME: /\w+/ """) + NAME: /\w/+ """) l2 = g.parse('(a,b,c,*x)') assert l == l2, '%s != %s' % (l.pretty(), l2.pretty()) - def test_earley_nolex(self): - g = Lark("""start: A "b" c - A: "a"+ - c: "abc" - """, parser="earley", lexer=None) - x = g.parse('aaaababc') - - class TestEarley(unittest.TestCase): def test_anon_in_scanless(self): # Fails an Earley implementation without special handling for empty rules, @@ -58,6 +50,14 @@ class TestEarley(unittest.TestCase): assertEqual( g.parse('abc'), 'abc') + def test_earley_scanless(self): + g = Lark("""start: A "b" c + A: "a"+ + c: "abc" + """, parser="earley", lexer=None) + x = g.parse('aaaababc') + + def _make_parser_test(LEXER, PARSER): def _Lark(grammar, **kwargs): @@ -78,8 +78,8 @@ def _make_parser_test(LEXER, PARSER): def test_basic2(self): # Multiple parsers and colliding tokens g = _Lark("""start: B A - B: "12" - A: "1" """) + B: "12" + A: "1" """) g2 = _Lark("""start: B A B: "12" A: "2" """) @@ -123,14 +123,12 @@ def _make_parser_test(LEXER, PARSER): g.parse(u'\xa3\u0101\u00a3\u0203\n') - def test_recurse_expansion(self): - """Verify that stack depth doesn't get exceeded on recursive rules marked for expansion.""" - g = _Lark(r"""start: a | start a + def test_stack_for_ebnf(self): + """Verify that stack depth isn't an issue for EBNF grammars""" + g = _Lark(r"""start: a+ a : "a" """) - # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built - # STree data structures, which uses recursion). - g.parse("a" * (sys.getrecursionlimit() // 4)) + g.parse("a" * (sys.getrecursionlimit()*2 )) def test_expand1_lists_with_one_item(self): g = _Lark(r"""start: list