diff --git a/lark/lexer.py b/lark/lexer.py index 6fb6572..c24a5b3 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -11,7 +11,7 @@ class LexError(Exception): class UnexpectedInput(LexError): def __init__(self, seq, lex_pos, line, column, allowed=None): context = seq[lex_pos:lex_pos+5] - message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line) + message = "No token defined for: '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column) super(UnexpectedInput, self).__init__(message) diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index f90c000..9ec04c3 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -137,14 +137,17 @@ class Column: if isinstance(item.expect, Terminal): self.to_scan.append(item) else: - if item not in self.added: - self.added.add(item) - self.to_predict.append(item) + if item in self.added: + continue + self.added.add(item) + self.to_predict.append(item) self.item_count += 1 # Only count if actually added - def __nonzero__(self): + + def __bool__(self): return bool(self.item_count) + __nonzero__ = __bool__ # Py2 backwards-compatibility class Parser: def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None): diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py index 693e54f..762c2a8 100644 --- a/lark/parsers/xearley.py +++ b/lark/parsers/xearley.py @@ -21,7 +21,7 @@ from collections import defaultdict from ..common import ParseError, UnexpectedToken, Terminal -from ..lexer import Token +from ..lexer import Token, UnexpectedInput from ..tree import Tree from .grammar_analysis import GrammarAnalyzer @@ -115,6 +115,9 @@ class Parser: next_set.add(delayed_matches[i+1]) del delayed_matches[i+1] # No longer needed, so unburden memory + if not next_set and not delayed_matches: + raise UnexpectedInput(stream, i, text_line, text_column, to_scan) + return next_set # Main loop starts @@ -128,7 +131,7 @@ class Parser: if token == '\n': text_line += 1 - text_column = 0 + text_column = 1 else: text_column += 1 diff --git a/tests/test_parser.py b/tests/test_parser.py index 35c1a44..ffed772 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -18,7 +18,7 @@ from io import ( logging.basicConfig(level=logging.INFO) from lark.lark import Lark -from lark.common import GrammarError, ParseError +from lark.common import GrammarError, ParseError, UnexpectedToken from lark.lexer import LexError, UnexpectedInput from lark.tree import Tree, Transformer @@ -718,6 +718,8 @@ def _make_parser_test(LEXER, PARSER): %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items()))) def test_float_without_lexer(self): + expected_error = UnexpectedInput if LEXER == 'dynamic' else UnexpectedToken + g = _Lark("""start: ["+"|"-"] float float: digit* "." digit+ exp? | digit+ exp @@ -727,7 +729,7 @@ def _make_parser_test(LEXER, PARSER): g.parse("1.2") g.parse("-.2e9") g.parse("+2e-9") - self.assertRaises(ParseError, g.parse, "+2e-9e") + self.assertRaises( expected_error, g.parse, "+2e-9e") def test_keep_all_tokens(self): l = _Lark("""start: "a"+ """, keep_all_tokens=True) @@ -963,19 +965,16 @@ def _make_parser_test(LEXER, PARSER): @unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions") def test_regex_escaping(self): - expected_error = ParseError if LEXER == 'dynamic' else UnexpectedInput - # TODO Make dynamic parser raise UnexpectedInput if nothing scans? - g = _Lark("start: /[ab]/") g.parse('a') g.parse('b') - self.assertRaises( expected_error, g.parse, 'c') + self.assertRaises( UnexpectedInput, g.parse, 'c') _Lark(r'start: /\w/').parse('a') g = _Lark(r'start: /\\w/') - self.assertRaises( expected_error, g.parse, 'a') + self.assertRaises( UnexpectedInput, g.parse, 'a') g.parse(r'\w') _Lark(r'start: /\[/').parse('[')