From 2e8f74c7746e3d091e76cbddded2c705d42d0c69 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Tue, 7 Feb 2017 17:58:02 +0200 Subject: [PATCH] Better error reporting in lexer and lalr parser --- lark/lexer.py | 4 ++-- lark/load_grammar.py | 1 + lark/parser.py | 7 +++++-- lark/tests/test_parser.py | 2 ++ 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/lark/lexer.py b/lark/lexer.py index 74821f1..22323cd 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -61,7 +61,7 @@ class Lexer(object): def lex(self, stream): lex_pos = 0 - line = 0 + line = 1 col_start_pos = 0 while True: i = 0 @@ -87,7 +87,7 @@ class Lexer(object): else: if lex_pos < len(stream): context = stream[lex_pos:lex_pos+5] - raise LexError("No token defined for: '%s' in %s" % (stream[lex_pos], context)) + raise LexError("No token defined for: '%s' in %s at line %d" % (stream[lex_pos], context, line)) break diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 9a93da9..57e1412 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -231,6 +231,7 @@ class ExtractAnonTokens(InlineTransformer): self.tokens = tokens self.token_set = token_set self.token_reverse = {value[1:-1]: name for name, value, _flags in tokens} + self.i = 0 def anontoken(self, token): if token.type == 'STRING': diff --git a/lark/parser.py b/lark/parser.py index 37edf18..07d6924 100644 --- a/lark/parser.py +++ b/lark/parser.py @@ -22,8 +22,11 @@ class Parser(object): return states_idx[state][key] except KeyError: expected = states_idx[state].keys() - context = ' '.join(['%s(%r)' % (t.type, t.value) for t in seq[i:i+5]]) - raise ParseError("Unexpected input %r.\nExpected: %s\nContext: %s" % (key, expected, context)) + context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[i:i+5]]) + token = seq[i] + raise ParseError("Unexpected input %r at line %d, column %d.\n" + "Expected: %s\n" + "Context: %s" % (token.value, token.line, token.column, expected, context)) def reduce(rule): if rule.expansion: diff --git a/lark/tests/test_parser.py b/lark/tests/test_parser.py index e9d6e01..3000ad4 100644 --- a/lark/tests/test_parser.py +++ b/lark/tests/test_parser.py @@ -267,6 +267,8 @@ class TestLalr(unittest.TestCase): """, parser='lalr') x = g.parse('Hello World') self.assertSequenceEqual(x.children, ['World']) + x = g.parse('HelloWorld') + self.assertSequenceEqual(x.children, ['HelloWorld']) def test_undefined_rule(self): self.assertRaises(GrammarError, Lark, """start: a""", parser='lalr')