| @@ -11,7 +11,7 @@ class LexError(Exception): | |||||
| class UnexpectedInput(LexError): | class UnexpectedInput(LexError): | ||||
| def __init__(self, seq, lex_pos, line, column, allowed=None): | def __init__(self, seq, lex_pos, line, column, allowed=None): | ||||
| context = seq[lex_pos:lex_pos+5] | context = seq[lex_pos:lex_pos+5] | ||||
| message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line) | |||||
| message = "No token defined for: '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column) | |||||
| super(UnexpectedInput, self).__init__(message) | super(UnexpectedInput, self).__init__(message) | ||||
| @@ -137,14 +137,17 @@ class Column: | |||||
| if isinstance(item.expect, Terminal): | if isinstance(item.expect, Terminal): | ||||
| self.to_scan.append(item) | self.to_scan.append(item) | ||||
| else: | else: | ||||
| if item not in self.added: | |||||
| self.added.add(item) | |||||
| self.to_predict.append(item) | |||||
| if item in self.added: | |||||
| continue | |||||
| self.added.add(item) | |||||
| self.to_predict.append(item) | |||||
| self.item_count += 1 # Only count if actually added | self.item_count += 1 # Only count if actually added | ||||
| def __nonzero__(self): | |||||
| def __bool__(self): | |||||
| return bool(self.item_count) | return bool(self.item_count) | ||||
| __nonzero__ = __bool__ # Py2 backwards-compatibility | |||||
| class Parser: | class Parser: | ||||
| def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None): | def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None): | ||||
| @@ -21,7 +21,7 @@ | |||||
| from collections import defaultdict | from collections import defaultdict | ||||
| from ..common import ParseError, UnexpectedToken, Terminal | from ..common import ParseError, UnexpectedToken, Terminal | ||||
| from ..lexer import Token | |||||
| from ..lexer import Token, UnexpectedInput | |||||
| from ..tree import Tree | from ..tree import Tree | ||||
| from .grammar_analysis import GrammarAnalyzer | from .grammar_analysis import GrammarAnalyzer | ||||
| @@ -115,6 +115,9 @@ class Parser: | |||||
| next_set.add(delayed_matches[i+1]) | next_set.add(delayed_matches[i+1]) | ||||
| del delayed_matches[i+1] # No longer needed, so unburden memory | del delayed_matches[i+1] # No longer needed, so unburden memory | ||||
| if not next_set and not delayed_matches: | |||||
| raise UnexpectedInput(stream, i, text_line, text_column, to_scan) | |||||
| return next_set | return next_set | ||||
| # Main loop starts | # Main loop starts | ||||
| @@ -128,7 +131,7 @@ class Parser: | |||||
| if token == '\n': | if token == '\n': | ||||
| text_line += 1 | text_line += 1 | ||||
| text_column = 0 | |||||
| text_column = 1 | |||||
| else: | else: | ||||
| text_column += 1 | text_column += 1 | ||||
| @@ -18,7 +18,7 @@ from io import ( | |||||
| logging.basicConfig(level=logging.INFO) | logging.basicConfig(level=logging.INFO) | ||||
| from lark.lark import Lark | from lark.lark import Lark | ||||
| from lark.common import GrammarError, ParseError | |||||
| from lark.common import GrammarError, ParseError, UnexpectedToken | |||||
| from lark.lexer import LexError, UnexpectedInput | from lark.lexer import LexError, UnexpectedInput | ||||
| from lark.tree import Tree, Transformer | from lark.tree import Tree, Transformer | ||||
| @@ -718,6 +718,8 @@ def _make_parser_test(LEXER, PARSER): | |||||
| %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items()))) | %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items()))) | ||||
| def test_float_without_lexer(self): | def test_float_without_lexer(self): | ||||
| expected_error = UnexpectedInput if LEXER == 'dynamic' else UnexpectedToken | |||||
| g = _Lark("""start: ["+"|"-"] float | g = _Lark("""start: ["+"|"-"] float | ||||
| float: digit* "." digit+ exp? | float: digit* "." digit+ exp? | ||||
| | digit+ exp | | digit+ exp | ||||
| @@ -727,7 +729,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
| g.parse("1.2") | g.parse("1.2") | ||||
| g.parse("-.2e9") | g.parse("-.2e9") | ||||
| g.parse("+2e-9") | g.parse("+2e-9") | ||||
| self.assertRaises(ParseError, g.parse, "+2e-9e") | |||||
| self.assertRaises( expected_error, g.parse, "+2e-9e") | |||||
| def test_keep_all_tokens(self): | def test_keep_all_tokens(self): | ||||
| l = _Lark("""start: "a"+ """, keep_all_tokens=True) | l = _Lark("""start: "a"+ """, keep_all_tokens=True) | ||||
| @@ -963,19 +965,16 @@ def _make_parser_test(LEXER, PARSER): | |||||
| @unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions") | @unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions") | ||||
| def test_regex_escaping(self): | def test_regex_escaping(self): | ||||
| expected_error = ParseError if LEXER == 'dynamic' else UnexpectedInput | |||||
| # TODO Make dynamic parser raise UnexpectedInput if nothing scans? | |||||
| g = _Lark("start: /[ab]/") | g = _Lark("start: /[ab]/") | ||||
| g.parse('a') | g.parse('a') | ||||
| g.parse('b') | g.parse('b') | ||||
| self.assertRaises( expected_error, g.parse, 'c') | |||||
| self.assertRaises( UnexpectedInput, g.parse, 'c') | |||||
| _Lark(r'start: /\w/').parse('a') | _Lark(r'start: /\w/').parse('a') | ||||
| g = _Lark(r'start: /\\w/') | g = _Lark(r'start: /\\w/') | ||||
| self.assertRaises( expected_error, g.parse, 'a') | |||||
| self.assertRaises( UnexpectedInput, g.parse, 'a') | |||||
| g.parse(r'\w') | g.parse(r'\w') | ||||
| _Lark(r'start: /\[/').parse('[') | _Lark(r'start: /\[/').parse('[') | ||||