diff --git a/examples/advanced/error_reporting_earley.py b/examples/advanced/error_reporting_earley.py new file mode 100644 index 0000000..f0bcc20 --- /dev/null +++ b/examples/advanced/error_reporting_earley.py @@ -0,0 +1,79 @@ +""" +Example-Driven Error Reporting +============================== + +A demonstration of example-driven error reporting with the Earley parser +(See also: error_reporting_lalr.py) +""" +from lark import Lark, UnexpectedInput + +from _json_parser import json_grammar # Using the grammar from the json_parser example + +json_parser = Lark(json_grammar) + +class JsonSyntaxError(SyntaxError): + def __str__(self): + context, line, column = self.args + return '%s at line %s, column %s.\n\n%s' % (self.label, line, column, context) + +class JsonMissingValue(JsonSyntaxError): + label = 'Missing Value' + +class JsonMissingOpening(JsonSyntaxError): + label = 'Missing Opening' + +class JsonMissingClosing(JsonSyntaxError): + label = 'Missing Closing' + +class JsonMissingComma(JsonSyntaxError): + label = 'Missing Comma' + +class JsonTrailingComma(JsonSyntaxError): + label = 'Trailing Comma' + + +def parse(json_text): + try: + j = json_parser.parse(json_text) + except UnexpectedInput as u: + exc_class = u.match_examples(json_parser.parse, { + JsonMissingOpening: ['{"foo": ]}', + '{"foor": }}', + '{"foo": }'], + JsonMissingClosing: ['{"foo": [}', + '{', + '{"a": 1', + '[1'], + JsonMissingComma: ['[1 2]', + '[false 1]', + '["b" 1]', + '{"a":true 1:4}', + '{"a":1 1:4}', + '{"a":"b" 1:4}'], + JsonTrailingComma: ['[,]', + '[1,]', + '[1,2,]', + '{"foo":1,}', + '{"foo":false,"bar":true,}'] + }, use_accepts=True) + if not exc_class: + raise + raise exc_class(u.get_context(json_text), u.line, u.column) + + +def test(): + try: + parse('{"example1": "value"') + except JsonMissingClosing as e: + print(e) + + try: + parse('{"example2": ] ') + except JsonMissingOpening as e: + print(e) + + +if __name__ == '__main__': + test() + + diff --git a/examples/advanced/error_reporting_lalr.py b/examples/advanced/error_reporting_lalr.py index 102f7b1..c2cb239 100644 --- a/examples/advanced/error_reporting_lalr.py +++ b/examples/advanced/error_reporting_lalr.py @@ -3,7 +3,7 @@ Example-Driven Error Reporting ============================== A demonstration of example-driven error reporting with the LALR parser - +(See also: error_reporting_earley.py) """ from lark import Lark, UnexpectedInput diff --git a/lark/__init__.py b/lark/__init__.py index 814fe66..168a969 100644 --- a/lark/__init__.py +++ b/lark/__init__.py @@ -3,7 +3,7 @@ from .tree import Tree from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive from .visitors import InlineTransformer, inline_args # XXX Deprecated from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, - UnexpectedInput, UnexpectedCharacters, LarkError) + UnexpectedInput, UnexpectedCharacters, UnexpectedEOF, LarkError) from .lexer import Token from .lark import Lark diff --git a/lark/exceptions.py b/lark/exceptions.py index ed7b9c7..ab4b139 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -19,14 +19,6 @@ class LexError(LarkError): pass -class UnexpectedEOF(ParseError): - def __init__(self, expected): - self.expected = expected - - message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected)) - super(UnexpectedEOF, self).__init__(message) - - class UnexpectedInput(LarkError): """UnexpectedInput Error. @@ -47,6 +39,7 @@ class UnexpectedInput(LarkError): The parser doesn't hold a copy of the text it has to parse, so you have to provide it again """ + assert self.pos_in_stream is not None, self pos = self.pos_in_stream start = max(pos - span, 0) end = pos + span @@ -91,7 +84,7 @@ class UnexpectedInput(LarkError): parse_fn(malformed) except UnexpectedInput as ut: if ut.state == self.state: - if use_accepts and ut.accepts != self.accepts: + if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts: logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % (self.state, self.accepts, ut.accepts, i, j)) continue @@ -114,6 +107,19 @@ class UnexpectedInput(LarkError): return candidate[0] +class UnexpectedEOF(ParseError, UnexpectedInput): + def __init__(self, expected, state=None): + self.expected = expected + self.state = state + from .lexer import Token + self.token = Token("", "") #, line=-1, column=-1, pos_in_stream=-1) + self.pos_in_stream = -1 + self.line = -1 + self.column = -1 + + message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected)) + super(UnexpectedEOF, self).__init__(message) + class UnexpectedCharacters(LexError, UnexpectedInput): def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None): diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index f0bb7f5..aa18371 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -299,7 +299,7 @@ class Parser: solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] if not solutions: expected_terminals = [t.expect for t in to_scan] - raise UnexpectedEOF(expected_terminals) + raise UnexpectedEOF(expected_terminals, state={i.s for i in to_scan}) if self.debug: from .earley_forest import ForestToPyDotVisitor diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py index 256fc2c..ae98f0f 100644 --- a/lark/parsers/xearley.py +++ b/lark/parsers/xearley.py @@ -113,7 +113,7 @@ class Parser(BaseParser): del delayed_matches[i+1] # No longer needed, so unburden memory if not next_set and not delayed_matches and not next_to_scan: - raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan)) + raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan), state={i.s for i in next_to_scan}) return next_to_scan