| @@ -7,6 +7,17 @@ from .utils import Str | |||||
| class LexError(Exception): | class LexError(Exception): | ||||
| pass | pass | ||||
| class UnexpectedInput(LexError): | |||||
| def __init__(self, seq, lex_pos, line, column): | |||||
| context = seq[lex_pos:lex_pos+5] | |||||
| message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line) | |||||
| super(LexError, self).__init__(message) | |||||
| self.line = line | |||||
| self.column = column | |||||
| self.context = context | |||||
| class Token(Str): | class Token(Str): | ||||
| def __new__(cls, type, value, pos_in_stream=None): | def __new__(cls, type, value, pos_in_stream=None): | ||||
| inst = Str.__new__(cls, value) | inst = Str.__new__(cls, value) | ||||
| @@ -103,8 +114,7 @@ class Lexer(object): | |||||
| break | break | ||||
| else: | else: | ||||
| if lex_pos < len(stream): | if lex_pos < len(stream): | ||||
| context = stream[lex_pos:lex_pos+5] | |||||
| raise LexError("No token defined for: '%s' in %s at line %d" % (stream[lex_pos], context, line)) | |||||
| raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos) | |||||
| break | break | ||||
| @@ -1,10 +1,11 @@ | |||||
| import re | import re | ||||
| import codecs | import codecs | ||||
| from .lexer import Lexer, Token | |||||
| from .lexer import Lexer, Token, UnexpectedInput | |||||
| from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
| from .parser_frontends import LALR | from .parser_frontends import LALR | ||||
| from .parsers.lalr_parser import UnexpectedToken | |||||
| from .common import is_terminal, GrammarError | from .common import is_terminal, GrammarError | ||||
| from .tree import Tree as T, Transformer, InlineTransformer, Visitor | from .tree import Tree as T, Transformer, InlineTransformer, Visitor | ||||
| @@ -285,9 +286,19 @@ class GrammarLoader: | |||||
| self.rule_tree_to_text = RuleTreeToText() | self.rule_tree_to_text = RuleTreeToText() | ||||
| def load_grammar(self, grammar_text): | def load_grammar(self, grammar_text): | ||||
| token_stream = list(self.lexer.lex(grammar_text+"\n")) | |||||
| tree = self.simplify_tree.transform( self.parser.parse(token_stream) ) | |||||
| try: | |||||
| token_stream = list(self.lexer.lex(grammar_text+"\n")) | |||||
| except UnexpectedInput as e: | |||||
| raise GrammarError("Unexpected input %r at line %d column %d" % (e.context, e.line, e.column)) | |||||
| try: | |||||
| tree = self.simplify_tree.transform( self.parser.parse(token_stream) ) | |||||
| except UnexpectedToken as e: | |||||
| if '_COLON' in e.expected: | |||||
| raise GrammarError("Missing colon at line %s column %s" % (e.line, e.column)) | |||||
| elif 'tokenvalue' in e.expected: | |||||
| raise GrammarError("Expecting a value at line %s column %s" % (e.line, e.column)) | |||||
| raise | |||||
| # ================= | # ================= | ||||
| # Process Tokens | # Process Tokens | ||||
| @@ -1,4 +1,4 @@ | |||||
| from .common import is_terminal | |||||
| from .common import is_terminal, GrammarError | |||||
| class Callback(object): | class Callback(object): | ||||
| pass | pass | ||||
| @@ -70,7 +70,8 @@ class ParseTreeBuilder: | |||||
| alias_handler = create_rule_handler(expansion, f) | alias_handler = create_rule_handler(expansion, f) | ||||
| assert not hasattr(callback, _alias) | |||||
| if hasattr(callback, _alias): | |||||
| raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin)) | |||||
| setattr(callback, _alias, alias_handler) | setattr(callback, _alias, alias_handler) | ||||
| new_rules.append(( _origin, expansion, _alias )) | new_rules.append(( _origin, expansion, _alias )) | ||||
| @@ -1,6 +1,23 @@ | |||||
| from .lalr_analysis import ACTION_SHIFT | from .lalr_analysis import ACTION_SHIFT | ||||
| from ..common import ParseError | from ..common import ParseError | ||||
| class UnexpectedToken(ParseError): | |||||
| def __init__(self, token, expected, seq, index): | |||||
| self.token = token | |||||
| self.expected = expected | |||||
| self.line = getattr(token, 'line', '?') | |||||
| self.column = getattr(token, 'column', '?') | |||||
| context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) | |||||
| message = ("Unexpected input %r at line %s, column %s.\n" | |||||
| "Expected: %s\n" | |||||
| "Context: %s" % (token.value, self.line, self.column, expected, context)) | |||||
| super(ParseError, self).__init__(message) | |||||
| class Parser(object): | class Parser(object): | ||||
| def __init__(self, ga, callback): | def __init__(self, ga, callback): | ||||
| self.ga = ga | self.ga = ga | ||||
| @@ -20,18 +37,13 @@ class Parser(object): | |||||
| return states_idx[state][key] | return states_idx[state][key] | ||||
| except KeyError: | except KeyError: | ||||
| expected = states_idx[state].keys() | expected = states_idx[state].keys() | ||||
| context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[i:i+5]]) | |||||
| try: | try: | ||||
| token = seq[i] | token = seq[i] | ||||
| except IndexError: | except IndexError: | ||||
| assert key == '$end' | assert key == '$end' | ||||
| token = seq[-1] | token = seq[-1] | ||||
| raise ParseError("Unexpected input %r at line %s, column %s.\n" | |||||
| "Expected: %s\n" | |||||
| "Context: %s" % (token.value, | |||||
| getattr(token, 'line', '?'), | |||||
| getattr(token, 'column', '?'), | |||||
| expected, context)) | |||||
| raise UnexpectedToken(token, expected, seq, i) | |||||
| def reduce(rule): | def reduce(rule): | ||||
| if rule.expansion: | if rule.expansion: | ||||