Bugfix: Fixed the %import TOKEN feature

8 years ago · 9b8ae7aecb
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -561,8 +561,8 @@ class GrammarLoader:
        ignore = []
        for (stmt,) in statements:
            if stmt.data == 'ignore':
                expansions ,= stmt.children
                ignore.append(expansions)
                t ,= stmt.children
                ignore.append(t)
            elif stmt.data == 'import':
                dotted_path = stmt.children[0].children
                name = stmt.children[1] if len(stmt.children)>1 else dotted_path[-1]
@@ -580,9 +580,22 @@ class GrammarLoader:
                raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name)
        # Handle ignore tokens
        ignore_defs = [('__IGNORE_%d'%i, t) for i, t in enumerate(ignore)]
        ignore_names = [name for name,_ in ignore_defs]
        token_defs += ignore_defs
        # XXX A slightly hacky solution. Recognition of %ignore TOKEN as separate comes from the lexer's
        #     inability to handle duplicate tokens (two names, one value)
        ignore_names = []
        for t in ignore:
            if t.data=='expansions' and len(t.children) == 1:
                t2 ,= t.children
                if t2.data=='expansion' and len(t2.children) == 1:
                    item ,= t2.children
                    if isinstance(item, Token) and item.type == 'TOKEN':
                        ignore_names.append(item.value)
                        continue
            name = '__IGNORE_%d'% len(ignore_names)
            ignore_names.append(name)
            token_defs.append((name, t))
        # Verify correctness 2
        token_names = set()
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -47,7 +47,10 @@ class TestParsers(unittest.TestCase):
        self.assertRaises(GrammarError, Lark, g, parser='lalr')
        l = Lark(g, parser='earley')
        l = Lark(g, parser='earley', lexer=None)
        self.assertRaises(ParseError, l.parse, 'a')
        l = Lark(g, parser='earley', lexer='dynamic')
        self.assertRaises(ParseError, l.parse, 'a')
@@ -385,6 +388,18 @@ def _make_parser_test(LEXER, PARSER):
            x = g.parse('Hello HelloWorld')
            self.assertSequenceEqual(x.children, ['HelloWorld'])
        def test_token_collision_WS(self):
            g = _Lark("""start: "Hello" NAME
                        NAME: /\w/+
                        %import common.WS
                        %ignore WS
                    """)
            x = g.parse('Hello World')
            self.assertSequenceEqual(x.children, ['World'])
            x = g.parse('Hello HelloWorld')
            self.assertSequenceEqual(x.children, ['HelloWorld'])
        def test_token_collision2(self):
            # NOTE: This test reveals a bug in token reconstruction in Scanless Earley
            #       I probably need to re-write grammar transformation