@@ -561,8 +561,8 @@ class GrammarLoader: | |||||
ignore = [] | ignore = [] | ||||
for (stmt,) in statements: | for (stmt,) in statements: | ||||
if stmt.data == 'ignore': | if stmt.data == 'ignore': | ||||
expansions ,= stmt.children | |||||
ignore.append(expansions) | |||||
t ,= stmt.children | |||||
ignore.append(t) | |||||
elif stmt.data == 'import': | elif stmt.data == 'import': | ||||
dotted_path = stmt.children[0].children | dotted_path = stmt.children[0].children | ||||
name = stmt.children[1] if len(stmt.children)>1 else dotted_path[-1] | name = stmt.children[1] if len(stmt.children)>1 else dotted_path[-1] | ||||
@@ -580,9 +580,22 @@ class GrammarLoader: | |||||
raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) | raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) | ||||
# Handle ignore tokens | # Handle ignore tokens | ||||
ignore_defs = [('__IGNORE_%d'%i, t) for i, t in enumerate(ignore)] | |||||
ignore_names = [name for name,_ in ignore_defs] | |||||
token_defs += ignore_defs | |||||
# XXX A slightly hacky solution. Recognition of %ignore TOKEN as separate comes from the lexer's | |||||
# inability to handle duplicate tokens (two names, one value) | |||||
ignore_names = [] | |||||
for t in ignore: | |||||
if t.data=='expansions' and len(t.children) == 1: | |||||
t2 ,= t.children | |||||
if t2.data=='expansion' and len(t2.children) == 1: | |||||
item ,= t2.children | |||||
if isinstance(item, Token) and item.type == 'TOKEN': | |||||
ignore_names.append(item.value) | |||||
continue | |||||
name = '__IGNORE_%d'% len(ignore_names) | |||||
ignore_names.append(name) | |||||
token_defs.append((name, t)) | |||||
# Verify correctness 2 | # Verify correctness 2 | ||||
token_names = set() | token_names = set() | ||||
@@ -47,7 +47,10 @@ class TestParsers(unittest.TestCase): | |||||
self.assertRaises(GrammarError, Lark, g, parser='lalr') | self.assertRaises(GrammarError, Lark, g, parser='lalr') | ||||
l = Lark(g, parser='earley') | |||||
l = Lark(g, parser='earley', lexer=None) | |||||
self.assertRaises(ParseError, l.parse, 'a') | |||||
l = Lark(g, parser='earley', lexer='dynamic') | |||||
self.assertRaises(ParseError, l.parse, 'a') | self.assertRaises(ParseError, l.parse, 'a') | ||||
@@ -385,6 +388,18 @@ def _make_parser_test(LEXER, PARSER): | |||||
x = g.parse('Hello HelloWorld') | x = g.parse('Hello HelloWorld') | ||||
self.assertSequenceEqual(x.children, ['HelloWorld']) | self.assertSequenceEqual(x.children, ['HelloWorld']) | ||||
def test_token_collision_WS(self): | |||||
g = _Lark("""start: "Hello" NAME | |||||
NAME: /\w/+ | |||||
%import common.WS | |||||
%ignore WS | |||||
""") | |||||
x = g.parse('Hello World') | |||||
self.assertSequenceEqual(x.children, ['World']) | |||||
x = g.parse('Hello HelloWorld') | |||||
self.assertSequenceEqual(x.children, ['HelloWorld']) | |||||
def test_token_collision2(self): | def test_token_collision2(self): | ||||
# NOTE: This test reveals a bug in token reconstruction in Scanless Earley | # NOTE: This test reveals a bug in token reconstruction in Scanless Earley | ||||
# I probably need to re-write grammar transformation | # I probably need to re-write grammar transformation | ||||