From 816266a5eba0f7c959cedc990f646c33b7c53dbf Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Tue, 31 Oct 2017 13:36:54 +0200 Subject: [PATCH] BUGFIX for issue #24: Dynamic Earley mishandled %ignore tokens --- lark/parsers/xearley.py | 7 ++++--- tests/test_parser.py | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py index c12108f..86a6b99 100644 --- a/lark/parsers/xearley.py +++ b/lark/parsers/xearley.py @@ -77,12 +77,13 @@ class Parser: column.add(new_items) def scan(i, token, column): + to_scan = column.to_scan.get_news() + for x in self.ignore: m = x.match(stream, i) if m: - return column - - to_scan = column.to_scan.get_news() + # TODO add partial matches for ignore too? + delayed_matches[m.end()] += to_scan for item in to_scan: m = item.expect.match(stream, i) diff --git a/tests/test_parser.py b/tests/test_parser.py index 838b0ef..76a6a33 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -186,6 +186,7 @@ def _make_full_earley_test(LEXER): l = Lark(grammar, parser='earley', lexer=LEXER) l.parse(program) + def test_earley_scanless3(self): "Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)" @@ -940,6 +941,22 @@ def _make_parser_test(LEXER, PARSER): + @unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions") + def test_ignore(self): + grammar = r""" + COMMENT: /(!|(\/\/))[^\n]*/ + %ignore COMMENT + %import common.WS -> _WS + %import common.INT + start: "INT"i _WS+ INT _WS* + """ + + parser = _Lark(grammar) + + tree = parser.parse("int 1 ! This is a comment\n") + self.assertEqual(tree.children, ['1']) + + _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() _TestParser.__name__ = _NAME