diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py index c12108f..86a6b99 100644 --- a/lark/parsers/xearley.py +++ b/lark/parsers/xearley.py @@ -77,12 +77,13 @@ class Parser: column.add(new_items) def scan(i, token, column): + to_scan = column.to_scan.get_news() + for x in self.ignore: m = x.match(stream, i) if m: - return column - - to_scan = column.to_scan.get_news() + # TODO add partial matches for ignore too? + delayed_matches[m.end()] += to_scan for item in to_scan: m = item.expect.match(stream, i) diff --git a/tests/test_parser.py b/tests/test_parser.py index 838b0ef..76a6a33 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -186,6 +186,7 @@ def _make_full_earley_test(LEXER): l = Lark(grammar, parser='earley', lexer=LEXER) l.parse(program) + def test_earley_scanless3(self): "Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)" @@ -940,6 +941,22 @@ def _make_parser_test(LEXER, PARSER): + @unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions") + def test_ignore(self): + grammar = r""" + COMMENT: /(!|(\/\/))[^\n]*/ + %ignore COMMENT + %import common.WS -> _WS + %import common.INT + start: "INT"i _WS+ INT _WS* + """ + + parser = _Lark(grammar) + + tree = parser.parse("int 1 ! This is a comment\n") + self.assertEqual(tree.children, ['1']) + + _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() _TestParser.__name__ = _NAME