@@ -0,0 +1,79 @@ | |||
""" | |||
Example-Driven Error Reporting | |||
============================== | |||
A demonstration of example-driven error reporting with the Earley parser | |||
(See also: error_reporting_lalr.py) | |||
""" | |||
from lark import Lark, UnexpectedInput | |||
from _json_parser import json_grammar # Using the grammar from the json_parser example | |||
json_parser = Lark(json_grammar) | |||
class JsonSyntaxError(SyntaxError): | |||
def __str__(self): | |||
context, line, column = self.args | |||
return '%s at line %s, column %s.\n\n%s' % (self.label, line, column, context) | |||
class JsonMissingValue(JsonSyntaxError): | |||
label = 'Missing Value' | |||
class JsonMissingOpening(JsonSyntaxError): | |||
label = 'Missing Opening' | |||
class JsonMissingClosing(JsonSyntaxError): | |||
label = 'Missing Closing' | |||
class JsonMissingComma(JsonSyntaxError): | |||
label = 'Missing Comma' | |||
class JsonTrailingComma(JsonSyntaxError): | |||
label = 'Trailing Comma' | |||
def parse(json_text): | |||
try: | |||
j = json_parser.parse(json_text) | |||
except UnexpectedInput as u: | |||
exc_class = u.match_examples(json_parser.parse, { | |||
JsonMissingOpening: ['{"foo": ]}', | |||
'{"foor": }}', | |||
'{"foo": }'], | |||
JsonMissingClosing: ['{"foo": [}', | |||
'{', | |||
'{"a": 1', | |||
'[1'], | |||
JsonMissingComma: ['[1 2]', | |||
'[false 1]', | |||
'["b" 1]', | |||
'{"a":true 1:4}', | |||
'{"a":1 1:4}', | |||
'{"a":"b" 1:4}'], | |||
JsonTrailingComma: ['[,]', | |||
'[1,]', | |||
'[1,2,]', | |||
'{"foo":1,}', | |||
'{"foo":false,"bar":true,}'] | |||
}, use_accepts=True) | |||
if not exc_class: | |||
raise | |||
raise exc_class(u.get_context(json_text), u.line, u.column) | |||
def test(): | |||
try: | |||
parse('{"example1": "value"') | |||
except JsonMissingClosing as e: | |||
print(e) | |||
try: | |||
parse('{"example2": ] ') | |||
except JsonMissingOpening as e: | |||
print(e) | |||
if __name__ == '__main__': | |||
test() | |||
@@ -3,7 +3,7 @@ Example-Driven Error Reporting | |||
============================== | |||
A demonstration of example-driven error reporting with the LALR parser | |||
(See also: error_reporting_earley.py) | |||
""" | |||
from lark import Lark, UnexpectedInput | |||
@@ -3,7 +3,7 @@ from .tree import Tree | |||
from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive | |||
from .visitors import InlineTransformer, inline_args # XXX Deprecated | |||
from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, | |||
UnexpectedInput, UnexpectedCharacters, LarkError) | |||
UnexpectedInput, UnexpectedCharacters, UnexpectedEOF, LarkError) | |||
from .lexer import Token | |||
from .lark import Lark | |||
@@ -19,14 +19,6 @@ class LexError(LarkError): | |||
pass | |||
class UnexpectedEOF(ParseError): | |||
def __init__(self, expected): | |||
self.expected = expected | |||
message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected)) | |||
super(UnexpectedEOF, self).__init__(message) | |||
class UnexpectedInput(LarkError): | |||
"""UnexpectedInput Error. | |||
@@ -47,6 +39,7 @@ class UnexpectedInput(LarkError): | |||
The parser doesn't hold a copy of the text it has to parse, | |||
so you have to provide it again | |||
""" | |||
assert self.pos_in_stream is not None, self | |||
pos = self.pos_in_stream | |||
start = max(pos - span, 0) | |||
end = pos + span | |||
@@ -91,7 +84,7 @@ class UnexpectedInput(LarkError): | |||
parse_fn(malformed) | |||
except UnexpectedInput as ut: | |||
if ut.state == self.state: | |||
if use_accepts and ut.accepts != self.accepts: | |||
if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts: | |||
logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % | |||
(self.state, self.accepts, ut.accepts, i, j)) | |||
continue | |||
@@ -114,6 +107,19 @@ class UnexpectedInput(LarkError): | |||
return candidate[0] | |||
class UnexpectedEOF(ParseError, UnexpectedInput): | |||
def __init__(self, expected, state=None): | |||
self.expected = expected | |||
self.state = state | |||
from .lexer import Token | |||
self.token = Token("<EOF>", "") #, line=-1, column=-1, pos_in_stream=-1) | |||
self.pos_in_stream = -1 | |||
self.line = -1 | |||
self.column = -1 | |||
message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected)) | |||
super(UnexpectedEOF, self).__init__(message) | |||
class UnexpectedCharacters(LexError, UnexpectedInput): | |||
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None): | |||
@@ -299,7 +299,7 @@ class Parser: | |||
solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | |||
if not solutions: | |||
expected_terminals = [t.expect for t in to_scan] | |||
raise UnexpectedEOF(expected_terminals) | |||
raise UnexpectedEOF(expected_terminals, state={i.s for i in to_scan}) | |||
if self.debug: | |||
from .earley_forest import ForestToPyDotVisitor | |||
@@ -113,7 +113,7 @@ class Parser(BaseParser): | |||
del delayed_matches[i+1] # No longer needed, so unburden memory | |||
if not next_set and not delayed_matches and not next_to_scan: | |||
raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan)) | |||
raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan), state={i.s for i in next_to_scan}) | |||
return next_to_scan | |||