| @@ -0,0 +1,79 @@ | |||
| """ | |||
| Example-Driven Error Reporting | |||
| ============================== | |||
| A demonstration of example-driven error reporting with the Earley parser | |||
| (See also: error_reporting_lalr.py) | |||
| """ | |||
| from lark import Lark, UnexpectedInput | |||
| from _json_parser import json_grammar # Using the grammar from the json_parser example | |||
| json_parser = Lark(json_grammar) | |||
| class JsonSyntaxError(SyntaxError): | |||
| def __str__(self): | |||
| context, line, column = self.args | |||
| return '%s at line %s, column %s.\n\n%s' % (self.label, line, column, context) | |||
| class JsonMissingValue(JsonSyntaxError): | |||
| label = 'Missing Value' | |||
| class JsonMissingOpening(JsonSyntaxError): | |||
| label = 'Missing Opening' | |||
| class JsonMissingClosing(JsonSyntaxError): | |||
| label = 'Missing Closing' | |||
| class JsonMissingComma(JsonSyntaxError): | |||
| label = 'Missing Comma' | |||
| class JsonTrailingComma(JsonSyntaxError): | |||
| label = 'Trailing Comma' | |||
| def parse(json_text): | |||
| try: | |||
| j = json_parser.parse(json_text) | |||
| except UnexpectedInput as u: | |||
| exc_class = u.match_examples(json_parser.parse, { | |||
| JsonMissingOpening: ['{"foo": ]}', | |||
| '{"foor": }}', | |||
| '{"foo": }'], | |||
| JsonMissingClosing: ['{"foo": [}', | |||
| '{', | |||
| '{"a": 1', | |||
| '[1'], | |||
| JsonMissingComma: ['[1 2]', | |||
| '[false 1]', | |||
| '["b" 1]', | |||
| '{"a":true 1:4}', | |||
| '{"a":1 1:4}', | |||
| '{"a":"b" 1:4}'], | |||
| JsonTrailingComma: ['[,]', | |||
| '[1,]', | |||
| '[1,2,]', | |||
| '{"foo":1,}', | |||
| '{"foo":false,"bar":true,}'] | |||
| }, use_accepts=True) | |||
| if not exc_class: | |||
| raise | |||
| raise exc_class(u.get_context(json_text), u.line, u.column) | |||
| def test(): | |||
| try: | |||
| parse('{"example1": "value"') | |||
| except JsonMissingClosing as e: | |||
| print(e) | |||
| try: | |||
| parse('{"example2": ] ') | |||
| except JsonMissingOpening as e: | |||
| print(e) | |||
| if __name__ == '__main__': | |||
| test() | |||
| @@ -3,7 +3,7 @@ Example-Driven Error Reporting | |||
| ============================== | |||
| A demonstration of example-driven error reporting with the LALR parser | |||
| (See also: error_reporting_earley.py) | |||
| """ | |||
| from lark import Lark, UnexpectedInput | |||
| @@ -3,7 +3,7 @@ from .tree import Tree | |||
| from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive | |||
| from .visitors import InlineTransformer, inline_args # XXX Deprecated | |||
| from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, | |||
| UnexpectedInput, UnexpectedCharacters, LarkError) | |||
| UnexpectedInput, UnexpectedCharacters, UnexpectedEOF, LarkError) | |||
| from .lexer import Token | |||
| from .lark import Lark | |||
| @@ -19,14 +19,6 @@ class LexError(LarkError): | |||
| pass | |||
| class UnexpectedEOF(ParseError): | |||
| def __init__(self, expected): | |||
| self.expected = expected | |||
| message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected)) | |||
| super(UnexpectedEOF, self).__init__(message) | |||
| class UnexpectedInput(LarkError): | |||
| """UnexpectedInput Error. | |||
| @@ -47,6 +39,7 @@ class UnexpectedInput(LarkError): | |||
| The parser doesn't hold a copy of the text it has to parse, | |||
| so you have to provide it again | |||
| """ | |||
| assert self.pos_in_stream is not None, self | |||
| pos = self.pos_in_stream | |||
| start = max(pos - span, 0) | |||
| end = pos + span | |||
| @@ -91,7 +84,7 @@ class UnexpectedInput(LarkError): | |||
| parse_fn(malformed) | |||
| except UnexpectedInput as ut: | |||
| if ut.state == self.state: | |||
| if use_accepts and ut.accepts != self.accepts: | |||
| if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts: | |||
| logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % | |||
| (self.state, self.accepts, ut.accepts, i, j)) | |||
| continue | |||
| @@ -114,6 +107,19 @@ class UnexpectedInput(LarkError): | |||
| return candidate[0] | |||
| class UnexpectedEOF(ParseError, UnexpectedInput): | |||
| def __init__(self, expected, state=None): | |||
| self.expected = expected | |||
| self.state = state | |||
| from .lexer import Token | |||
| self.token = Token("<EOF>", "") #, line=-1, column=-1, pos_in_stream=-1) | |||
| self.pos_in_stream = -1 | |||
| self.line = -1 | |||
| self.column = -1 | |||
| message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected)) | |||
| super(UnexpectedEOF, self).__init__(message) | |||
| class UnexpectedCharacters(LexError, UnexpectedInput): | |||
| def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None): | |||
| @@ -299,7 +299,7 @@ class Parser: | |||
| solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | |||
| if not solutions: | |||
| expected_terminals = [t.expect for t in to_scan] | |||
| raise UnexpectedEOF(expected_terminals) | |||
| raise UnexpectedEOF(expected_terminals, state={i.s for i in to_scan}) | |||
| if self.debug: | |||
| from .earley_forest import ForestToPyDotVisitor | |||
| @@ -113,7 +113,7 @@ class Parser(BaseParser): | |||
| del delayed_matches[i+1] # No longer needed, so unburden memory | |||
| if not next_set and not delayed_matches and not next_to_scan: | |||
| raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan)) | |||
| raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan), state={i.s for i in next_to_scan}) | |||
| return next_to_scan | |||