* All exceptions are now under exceptions.py * UnexpectedInput is now superclass of UnexpectedToken and UnexpectedCharacters, all of which support the get_context() and match_examples() methods.tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
@@ -2,7 +2,7 @@ | |||
# This demonstrates example-driven error reporting with the LALR parser | |||
# | |||
from lark import Lark, UnexpectedToken | |||
from lark import Lark, UnexpectedInput | |||
from .json_parser import json_grammar # Using the grammar from the json_parser example | |||
@@ -32,11 +32,11 @@ class JsonTrailingComma(JsonSyntaxError): | |||
def parse(json_text): | |||
try: | |||
j = json_parser.parse(json_text) | |||
except UnexpectedToken as ut: | |||
exc_class = ut.match_examples(json_parser.parse, { | |||
JsonMissingValue: ['{"foo": }'], | |||
except UnexpectedInput as u: | |||
exc_class = u.match_examples(json_parser.parse, { | |||
JsonMissingOpening: ['{"foo": ]}', | |||
'{"foor": }}'], | |||
'{"foor": }}', | |||
'{"foo": }'], | |||
JsonMissingClosing: ['{"foo": [}', | |||
'{', | |||
'{"a": 1', | |||
@@ -55,15 +55,10 @@ def parse(json_text): | |||
}) | |||
if not exc_class: | |||
raise | |||
raise exc_class(ut.get_context(json_text), ut.line, ut.column) | |||
raise exc_class(u.get_context(json_text), u.line, u.column) | |||
def test(): | |||
try: | |||
parse('{"key":') | |||
except JsonMissingValue: | |||
pass | |||
try: | |||
parse('{"key": "value"') | |||
except JsonMissingClosing: | |||
@@ -1,8 +1,7 @@ | |||
from .tree import Tree | |||
from .visitors import Transformer, Visitor, v_args, Discard | |||
from .visitors import InlineTransformer, inline_args # XXX Deprecated | |||
from .common import ParseError, GrammarError, UnexpectedToken | |||
from .lexer import UnexpectedInput, LexError | |||
from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters | |||
from .lark import Lark | |||
__version__ = "0.5.6" |
@@ -7,63 +7,6 @@ Py36 = (sys.version_info[:2] >= (3, 6)) | |||
###{standalone | |||
class GrammarError(Exception): | |||
pass | |||
class ParseError(Exception): | |||
pass | |||
class UnexpectedToken(ParseError): | |||
def __init__(self, token, expected, seq, index, considered_rules=None, state=None): | |||
self.token = token | |||
self.expected = expected | |||
self.line = getattr(token, 'line', '?') | |||
self.column = getattr(token, 'column', '?') | |||
self.considered_rules = considered_rules | |||
self.state = state | |||
try: | |||
context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) | |||
except AttributeError: | |||
context = seq[index:index+5] | |||
except TypeError: | |||
context = "<no context>" | |||
message = ("Unexpected token %r at line %s, column %s.\n" | |||
"Expected: %s\n" | |||
"Context: %s" % (token, self.line, self.column, expected, context)) | |||
super(UnexpectedToken, self).__init__(message) | |||
def match_examples(self, parse_fn, examples): | |||
""" Given a parser instance and a dictionary mapping some label with | |||
some malformed syntax examples, it'll return the label for the | |||
example that bests matches the current error. | |||
""" | |||
assert self.state, "Not supported for this exception" | |||
candidate = None | |||
for label, example in examples.items(): | |||
assert not isinstance(example, STRING_TYPE) | |||
for malformed in example: | |||
try: | |||
parse_fn(malformed) | |||
except UnexpectedToken as ut: | |||
if ut.state == self.state: | |||
if ut.token == self.token: # Try exact match first | |||
return label | |||
elif not candidate: | |||
candidate = label | |||
return candidate | |||
def get_context(self, text, span=10): | |||
pos = self.token.pos_in_stream | |||
start = max(pos - span, 0) | |||
end = pos + span | |||
before = text[start:pos].rsplit('\n', 1)[-1] | |||
after = text[pos:end].split('\n', 1)[0] | |||
return before + after + '\n' + ' ' * len(before) + '^\n' | |||
###} | |||
@@ -0,0 +1,85 @@ | |||
from .utils import STRING_TYPE | |||
class LarkError(Exception): | |||
pass | |||
class GrammarError(LarkError): | |||
pass | |||
class ParseError(LarkError): | |||
pass | |||
class LexError(LarkError): | |||
pass | |||
class UnexpectedInput(LarkError): | |||
def get_context(self, text, span=10): | |||
pos = self.pos_in_stream | |||
start = max(pos - span, 0) | |||
end = pos + span | |||
before = text[start:pos].rsplit('\n', 1)[-1] | |||
after = text[pos:end].split('\n', 1)[0] | |||
return before + after + '\n' + ' ' * len(before) + '^\n' | |||
def match_examples(self, parse_fn, examples): | |||
""" Given a parser instance and a dictionary mapping some label with | |||
some malformed syntax examples, it'll return the label for the | |||
example that bests matches the current error. | |||
""" | |||
assert self.state is not None, "Not supported for this exception" | |||
candidate = None | |||
for label, example in examples.items(): | |||
assert not isinstance(example, STRING_TYPE) | |||
for malformed in example: | |||
try: | |||
parse_fn(malformed) | |||
except UnexpectedInput as ut: | |||
if ut.state == self.state: | |||
try: | |||
if ut.token == self.token: # Try exact match first | |||
return label | |||
except AttributeError: | |||
pass | |||
if not candidate: | |||
candidate = label | |||
return candidate | |||
class UnexpectedCharacters(LexError, UnexpectedInput): | |||
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None): | |||
context = seq[lex_pos:lex_pos+10] | |||
message = "No token defined for '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column) | |||
if allowed: | |||
message += '\n\nExpecting: %s\n' % allowed | |||
super(UnexpectedCharacters, self).__init__(message) | |||
self.line = line | |||
self.column = column | |||
self.context = context | |||
self.allowed = allowed | |||
self.considered_tokens = considered_tokens | |||
self.pos_in_stream = lex_pos | |||
self.state = state | |||
class UnexpectedToken(ParseError, UnexpectedInput): | |||
def __init__(self, token, expected, considered_rules=None, state=None): | |||
self.token = token | |||
self.expected = expected # XXX str shouldn't necessary | |||
self.line = getattr(token, 'line', '?') | |||
self.column = getattr(token, 'column', '?') | |||
self.considered_rules = considered_rules | |||
self.state = state | |||
self.pos_in_stream = token.pos_in_stream | |||
message = ("Unexpected token %r at line %s, column %s.\n" | |||
"Expected: %s\n" | |||
% (token, self.line, self.column, ', '.join(self.expected))) | |||
super(UnexpectedToken, self).__init__(message) | |||
@@ -4,26 +4,9 @@ import re | |||
from .utils import Str, classify | |||
from .common import PatternStr, PatternRE, TokenDef | |||
from .exceptions import UnexpectedCharacters | |||
###{standalone | |||
class LexError(Exception): | |||
pass | |||
class UnexpectedInput(LexError): | |||
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_rules=None): | |||
context = seq[lex_pos:lex_pos+5] | |||
message = "No token defined for: '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column) | |||
if allowed: | |||
message += '\n\nExpecting: %s\n' % allowed | |||
super(UnexpectedInput, self).__init__(message) | |||
self.line = line | |||
self.column = column | |||
self.context = context | |||
self.allowed = allowed | |||
self.considered_rules = considered_rules | |||
class Token(Str): | |||
__slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column') | |||
@@ -84,8 +67,9 @@ class LineCounter: | |||
class _Lex: | |||
"Built to serve both Lexer and ContextualLexer" | |||
def __init__(self, lexer): | |||
def __init__(self, lexer, state=None): | |||
self.lexer = lexer | |||
self.state = state | |||
def lex(self, stream, newline_types, ignore_types): | |||
newline_types = list(newline_types) | |||
@@ -118,7 +102,7 @@ class _Lex: | |||
break | |||
else: | |||
if line_ctr.char_pos < len(stream): | |||
raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column) | |||
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, state=self.state) | |||
break | |||
class UnlessCallback: | |||
@@ -251,9 +235,10 @@ class ContextualLexer: | |||
self.parser_state = state | |||
def lex(self, stream): | |||
l = _Lex(self.lexers[self.parser_state]) | |||
l = _Lex(self.lexers[self.parser_state], self.parser_state) | |||
for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types): | |||
yield x | |||
l.lexer = self.lexers[self.parser_state] | |||
l.state = self.parser_state | |||
@@ -6,14 +6,15 @@ import re | |||
from ast import literal_eval | |||
from copy import deepcopy | |||
from .lexer import Token, UnexpectedInput | |||
from .lexer import Token | |||
from .parse_tree_builder import ParseTreeBuilder | |||
from .parser_frontends import LALR | |||
from .parsers.lalr_parser import UnexpectedToken | |||
from .common import GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef | |||
from .common import LexerConf, ParserConf, PatternStr, PatternRE, TokenDef | |||
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | |||
from .utils import classify, suppress | |||
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken | |||
from .tree import Tree, SlottedTree as ST | |||
from .visitors import Transformer, Visitor, v_args | |||
@@ -576,7 +577,7 @@ class GrammarLoader: | |||
try: | |||
tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') ) | |||
except UnexpectedInput as e: | |||
except UnexpectedCharacters as e: | |||
raise GrammarError("Unexpected input %r at line %d column %d in %s" % (e.context, e.line, e.column, name)) | |||
except UnexpectedToken as e: | |||
context = e.get_context(grammar_text) | |||
@@ -1,4 +1,4 @@ | |||
from .common import GrammarError | |||
from .exceptions import GrammarError | |||
from .utils import suppress | |||
from .lexer import Token | |||
from .grammar import Rule | |||
@@ -4,7 +4,7 @@ from .utils import get_regexp_width | |||
from .parsers.grammar_analysis import GrammarAnalyzer | |||
from .lexer import Lexer, ContextualLexer, Token | |||
from .common import GrammarError | |||
from .exceptions import GrammarError | |||
from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk | |||
from .tree import Tree | |||
@@ -8,7 +8,7 @@ | |||
from collections import defaultdict | |||
import itertools | |||
from ..common import ParseError | |||
from ..exceptions import ParseError | |||
from ..lexer import Token | |||
from ..tree import Tree | |||
from ..grammar import Terminal as T, NonTerminal as NT, Symbol | |||
@@ -15,7 +15,7 @@ | |||
from ..tree import Tree | |||
from ..visitors import Transformer_InPlace, v_args | |||
from ..common import ParseError, UnexpectedToken | |||
from ..exceptions import ParseError, UnexpectedToken | |||
from .grammar_analysis import GrammarAnalyzer | |||
from ..grammar import NonTerminal | |||
@@ -197,8 +197,8 @@ class Parser: | |||
next_set.add(item.advance(token) for item in column.to_scan if match(item.expect, token)) | |||
if not next_set: | |||
expect = {i.expect for i in column.to_scan} | |||
raise UnexpectedToken(token, expect, stream, set(column.to_scan)) | |||
expect = {i.expect.name for i in column.to_scan} | |||
raise UnexpectedToken(token, expect, considered_rules=set(column.to_scan)) | |||
return next_set | |||
@@ -1,6 +1,6 @@ | |||
from ..utils import bfs, fzset, classify | |||
from ..common import GrammarError | |||
from ..exceptions import GrammarError | |||
from ..grammar import Rule, Terminal, NonTerminal | |||
@@ -10,7 +10,7 @@ import logging | |||
from collections import defaultdict | |||
from ..utils import classify, classify_bool, bfs, fzset | |||
from ..common import GrammarError | |||
from ..exceptions import GrammarError | |||
from .grammar_analysis import GrammarAnalyzer, Terminal | |||
@@ -2,7 +2,7 @@ | |||
""" | |||
# Author: Erez Shinan (2017) | |||
# Email : erezshin@gmail.com | |||
from ..common import UnexpectedToken | |||
from ..exceptions import UnexpectedToken | |||
from .lalr_analysis import LALR_Analyzer, Shift | |||
@@ -46,7 +46,7 @@ class _Parser: | |||
return states[state][key] | |||
except KeyError: | |||
expected = states[state].keys() | |||
raise UnexpectedToken(token, expected, seq, i, state=state) | |||
raise UnexpectedToken(token, expected, state=state) # TODO filter out rules from expected | |||
def reduce(rule): | |||
size = len(rule.expansion) | |||
@@ -20,8 +20,8 @@ | |||
from collections import defaultdict | |||
from ..common import ParseError | |||
from ..lexer import Token, UnexpectedInput | |||
from ..exceptions import ParseError, UnexpectedInput | |||
from ..lexer import Token | |||
from ..tree import Tree | |||
from .grammar_analysis import GrammarAnalyzer | |||
from ..grammar import NonTerminal, Terminal | |||
@@ -110,6 +110,21 @@ class Tree(object): | |||
self.data = data | |||
self.children = children | |||
# XXX Deprecated! Here for backwards compatibility <0.6.0 | |||
@property | |||
def line(self): | |||
return self.meta.line | |||
@property | |||
def column(self): | |||
return self.meta.column | |||
@property | |||
def end_line(self): | |||
return self.meta.end_line | |||
@property | |||
def end_column(self): | |||
return self.meta.end_column | |||
class SlottedTree(Tree): | |||
__slots__ = 'data', 'children', 'rule', '_meta' | |||
@@ -18,8 +18,7 @@ from io import ( | |||
logging.basicConfig(level=logging.INFO) | |||
from lark.lark import Lark | |||
from lark.common import GrammarError, ParseError, UnexpectedToken | |||
from lark.lexer import LexError, UnexpectedInput | |||
from lark.exceptions import GrammarError, ParseError, UnexpectedToken, LexError, UnexpectedInput | |||
from lark.tree import Tree | |||
from lark.visitors import Transformer | |||