| @@ -561,8 +561,8 @@ class GrammarLoader: | |||||
| ignore = [] | ignore = [] | ||||
| for (stmt,) in statements: | for (stmt,) in statements: | ||||
| if stmt.data == 'ignore': | if stmt.data == 'ignore': | ||||
| expansions ,= stmt.children | |||||
| ignore.append(expansions) | |||||
| t ,= stmt.children | |||||
| ignore.append(t) | |||||
| elif stmt.data == 'import': | elif stmt.data == 'import': | ||||
| dotted_path = stmt.children[0].children | dotted_path = stmt.children[0].children | ||||
| name = stmt.children[1] if len(stmt.children)>1 else dotted_path[-1] | name = stmt.children[1] if len(stmt.children)>1 else dotted_path[-1] | ||||
| @@ -580,9 +580,22 @@ class GrammarLoader: | |||||
| raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) | raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) | ||||
| # Handle ignore tokens | # Handle ignore tokens | ||||
| ignore_defs = [('__IGNORE_%d'%i, t) for i, t in enumerate(ignore)] | |||||
| ignore_names = [name for name,_ in ignore_defs] | |||||
| token_defs += ignore_defs | |||||
| # XXX A slightly hacky solution. Recognition of %ignore TOKEN as separate comes from the lexer's | |||||
| # inability to handle duplicate tokens (two names, one value) | |||||
| ignore_names = [] | |||||
| for t in ignore: | |||||
| if t.data=='expansions' and len(t.children) == 1: | |||||
| t2 ,= t.children | |||||
| if t2.data=='expansion' and len(t2.children) == 1: | |||||
| item ,= t2.children | |||||
| if isinstance(item, Token) and item.type == 'TOKEN': | |||||
| ignore_names.append(item.value) | |||||
| continue | |||||
| name = '__IGNORE_%d'% len(ignore_names) | |||||
| ignore_names.append(name) | |||||
| token_defs.append((name, t)) | |||||
| # Verify correctness 2 | # Verify correctness 2 | ||||
| token_names = set() | token_names = set() | ||||
| @@ -47,7 +47,10 @@ class TestParsers(unittest.TestCase): | |||||
| self.assertRaises(GrammarError, Lark, g, parser='lalr') | self.assertRaises(GrammarError, Lark, g, parser='lalr') | ||||
| l = Lark(g, parser='earley') | |||||
| l = Lark(g, parser='earley', lexer=None) | |||||
| self.assertRaises(ParseError, l.parse, 'a') | |||||
| l = Lark(g, parser='earley', lexer='dynamic') | |||||
| self.assertRaises(ParseError, l.parse, 'a') | self.assertRaises(ParseError, l.parse, 'a') | ||||
| @@ -385,6 +388,18 @@ def _make_parser_test(LEXER, PARSER): | |||||
| x = g.parse('Hello HelloWorld') | x = g.parse('Hello HelloWorld') | ||||
| self.assertSequenceEqual(x.children, ['HelloWorld']) | self.assertSequenceEqual(x.children, ['HelloWorld']) | ||||
| def test_token_collision_WS(self): | |||||
| g = _Lark("""start: "Hello" NAME | |||||
| NAME: /\w/+ | |||||
| %import common.WS | |||||
| %ignore WS | |||||
| """) | |||||
| x = g.parse('Hello World') | |||||
| self.assertSequenceEqual(x.children, ['World']) | |||||
| x = g.parse('Hello HelloWorld') | |||||
| self.assertSequenceEqual(x.children, ['HelloWorld']) | |||||
| def test_token_collision2(self): | def test_token_collision2(self): | ||||
| # NOTE: This test reveals a bug in token reconstruction in Scanless Earley | # NOTE: This test reveals a bug in token reconstruction in Scanless Earley | ||||
| # I probably need to re-write grammar transformation | # I probably need to re-write grammar transformation | ||||