Browse Source

Improved error reporting

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 years ago
parent
commit
772f1cc0aa
4 changed files with 49 additions and 15 deletions
  1. +12
    -2
      lark/lexer.py
  2. +15
    -4
      lark/load_grammar.py
  3. +3
    -2
      lark/parse_tree_builder.py
  4. +19
    -7
      lark/parsers/lalr_parser.py

+ 12
- 2
lark/lexer.py View File

@@ -7,6 +7,17 @@ from .utils import Str
class LexError(Exception):
pass

class UnexpectedInput(LexError):
def __init__(self, seq, lex_pos, line, column):
context = seq[lex_pos:lex_pos+5]
message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line)

super(LexError, self).__init__(message)

self.line = line
self.column = column
self.context = context

class Token(Str):
def __new__(cls, type, value, pos_in_stream=None):
inst = Str.__new__(cls, value)
@@ -103,8 +114,7 @@ class Lexer(object):
break
else:
if lex_pos < len(stream):
context = stream[lex_pos:lex_pos+5]
raise LexError("No token defined for: '%s' in %s at line %d" % (stream[lex_pos], context, line))
raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos)
break



+ 15
- 4
lark/load_grammar.py View File

@@ -1,10 +1,11 @@
import re
import codecs

from .lexer import Lexer, Token
from .lexer import Lexer, Token, UnexpectedInput

from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import LALR
from .parsers.lalr_parser import UnexpectedToken
from .common import is_terminal, GrammarError

from .tree import Tree as T, Transformer, InlineTransformer, Visitor
@@ -285,9 +286,19 @@ class GrammarLoader:
self.rule_tree_to_text = RuleTreeToText()

def load_grammar(self, grammar_text):

token_stream = list(self.lexer.lex(grammar_text+"\n"))
tree = self.simplify_tree.transform( self.parser.parse(token_stream) )
try:
token_stream = list(self.lexer.lex(grammar_text+"\n"))
except UnexpectedInput as e:
raise GrammarError("Unexpected input %r at line %d column %d" % (e.context, e.line, e.column))

try:
tree = self.simplify_tree.transform( self.parser.parse(token_stream) )
except UnexpectedToken as e:
if '_COLON' in e.expected:
raise GrammarError("Missing colon at line %s column %s" % (e.line, e.column))
elif 'tokenvalue' in e.expected:
raise GrammarError("Expecting a value at line %s column %s" % (e.line, e.column))
raise

# =================
# Process Tokens


+ 3
- 2
lark/parse_tree_builder.py View File

@@ -1,4 +1,4 @@
from .common import is_terminal
from .common import is_terminal, GrammarError

class Callback(object):
pass
@@ -70,7 +70,8 @@ class ParseTreeBuilder:

alias_handler = create_rule_handler(expansion, f)

assert not hasattr(callback, _alias)
if hasattr(callback, _alias):
raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin))
setattr(callback, _alias, alias_handler)

new_rules.append(( _origin, expansion, _alias ))


+ 19
- 7
lark/parsers/lalr_parser.py View File

@@ -1,6 +1,23 @@
from .lalr_analysis import ACTION_SHIFT
from ..common import ParseError

class UnexpectedToken(ParseError):
def __init__(self, token, expected, seq, index):
self.token = token
self.expected = expected
self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?')

context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
message = ("Unexpected input %r at line %s, column %s.\n"
"Expected: %s\n"
"Context: %s" % (token.value, self.line, self.column, expected, context))

super(ParseError, self).__init__(message)




class Parser(object):
def __init__(self, ga, callback):
self.ga = ga
@@ -20,18 +37,13 @@ class Parser(object):
return states_idx[state][key]
except KeyError:
expected = states_idx[state].keys()
context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[i:i+5]])
try:
token = seq[i]
except IndexError:
assert key == '$end'
token = seq[-1]
raise ParseError("Unexpected input %r at line %s, column %s.\n"
"Expected: %s\n"
"Context: %s" % (token.value,
getattr(token, 'line', '?'),
getattr(token, 'column', '?'),
expected, context))

raise UnexpectedToken(token, expected, seq, i)

def reduce(rule):
if rule.expansion:


Loading…
Cancel
Save