diff --git a/lark/lexer.py b/lark/lexer.py index b568e53..db58bb2 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -7,6 +7,17 @@ from .utils import Str class LexError(Exception): pass +class UnexpectedInput(LexError): + def __init__(self, seq, lex_pos, line, column): + context = seq[lex_pos:lex_pos+5] + message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line) + + super(LexError, self).__init__(message) + + self.line = line + self.column = column + self.context = context + class Token(Str): def __new__(cls, type, value, pos_in_stream=None): inst = Str.__new__(cls, value) @@ -103,8 +114,7 @@ class Lexer(object): break else: if lex_pos < len(stream): - context = stream[lex_pos:lex_pos+5] - raise LexError("No token defined for: '%s' in %s at line %d" % (stream[lex_pos], context, line)) + raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos) break diff --git a/lark/load_grammar.py b/lark/load_grammar.py index d090cc7..e7d1bf4 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -1,10 +1,11 @@ import re import codecs -from .lexer import Lexer, Token +from .lexer import Lexer, Token, UnexpectedInput from .parse_tree_builder import ParseTreeBuilder from .parser_frontends import LALR +from .parsers.lalr_parser import UnexpectedToken from .common import is_terminal, GrammarError from .tree import Tree as T, Transformer, InlineTransformer, Visitor @@ -285,9 +286,19 @@ class GrammarLoader: self.rule_tree_to_text = RuleTreeToText() def load_grammar(self, grammar_text): - - token_stream = list(self.lexer.lex(grammar_text+"\n")) - tree = self.simplify_tree.transform( self.parser.parse(token_stream) ) + try: + token_stream = list(self.lexer.lex(grammar_text+"\n")) + except UnexpectedInput as e: + raise GrammarError("Unexpected input %r at line %d column %d" % (e.context, e.line, e.column)) + + try: + tree = self.simplify_tree.transform( self.parser.parse(token_stream) ) + except UnexpectedToken as e: + if '_COLON' in e.expected: + raise GrammarError("Missing colon at line %s column %s" % (e.line, e.column)) + elif 'tokenvalue' in e.expected: + raise GrammarError("Expecting a value at line %s column %s" % (e.line, e.column)) + raise # ================= # Process Tokens diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index ee59f71..5561544 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -1,4 +1,4 @@ -from .common import is_terminal +from .common import is_terminal, GrammarError class Callback(object): pass @@ -70,7 +70,8 @@ class ParseTreeBuilder: alias_handler = create_rule_handler(expansion, f) - assert not hasattr(callback, _alias) + if hasattr(callback, _alias): + raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin)) setattr(callback, _alias, alias_handler) new_rules.append(( _origin, expansion, _alias )) diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 2827925..d766221 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -1,6 +1,23 @@ from .lalr_analysis import ACTION_SHIFT from ..common import ParseError +class UnexpectedToken(ParseError): + def __init__(self, token, expected, seq, index): + self.token = token + self.expected = expected + self.line = getattr(token, 'line', '?') + self.column = getattr(token, 'column', '?') + + context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) + message = ("Unexpected input %r at line %s, column %s.\n" + "Expected: %s\n" + "Context: %s" % (token.value, self.line, self.column, expected, context)) + + super(ParseError, self).__init__(message) + + + + class Parser(object): def __init__(self, ga, callback): self.ga = ga @@ -20,18 +37,13 @@ class Parser(object): return states_idx[state][key] except KeyError: expected = states_idx[state].keys() - context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[i:i+5]]) try: token = seq[i] except IndexError: assert key == '$end' token = seq[-1] - raise ParseError("Unexpected input %r at line %s, column %s.\n" - "Expected: %s\n" - "Context: %s" % (token.value, - getattr(token, 'line', '?'), - getattr(token, 'column', '?'), - expected, context)) + + raise UnexpectedToken(token, expected, seq, i) def reduce(rule): if rule.expansion: