BUGFIX for issue #24: Dynamic Earley mishandled %ignore tokens

6 lat temu · 816266a5eb
--- a/lark/parsers/xearley.py
+++ b/lark/parsers/xearley.py
@@ -77,12 +77,13 @@ class Parser:
                    column.add(new_items)

        def scan(i, token, column):
            to_scan = column.to_scan.get_news()

            for x in self.ignore:
                m = x.match(stream, i)
                if m:
                    return column

            to_scan = column.to_scan.get_news()
                    # TODO add partial matches for ignore too?
                    delayed_matches[m.end()] += to_scan

            for item in to_scan:
                m = item.expect.match(stream, i)
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -186,6 +186,7 @@ def _make_full_earley_test(LEXER):
            l = Lark(grammar, parser='earley', lexer=LEXER)
            l.parse(program)


        def test_earley_scanless3(self):
            "Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)"

@@ -940,6 +941,22 @@ def _make_parser_test(LEXER, PARSER):



        @unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions")
        def test_ignore(self):
            grammar = r"""
            COMMENT: /(!|(\/\/))[^\n]*/
            %ignore COMMENT
            %import common.WS -> _WS
            %import common.INT
            start: "INT"i _WS+ INT _WS*
            """

            parser = _Lark(grammar)

            tree = parser.parse("int 1 ! This is a comment\n")      
            self.assertEqual(tree.children, ['1'])



    _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize()
    _TestParser.__name__ = _NAME