* All exceptions are now under exceptions.py * UnexpectedInput is now superclass of UnexpectedToken and UnexpectedCharacters, all of which support the get_context() and match_examples() methods.tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
| @@ -2,7 +2,7 @@ | |||||
| # This demonstrates example-driven error reporting with the LALR parser | # This demonstrates example-driven error reporting with the LALR parser | ||||
| # | # | ||||
| from lark import Lark, UnexpectedToken | |||||
| from lark import Lark, UnexpectedInput | |||||
| from .json_parser import json_grammar # Using the grammar from the json_parser example | from .json_parser import json_grammar # Using the grammar from the json_parser example | ||||
| @@ -32,11 +32,11 @@ class JsonTrailingComma(JsonSyntaxError): | |||||
| def parse(json_text): | def parse(json_text): | ||||
| try: | try: | ||||
| j = json_parser.parse(json_text) | j = json_parser.parse(json_text) | ||||
| except UnexpectedToken as ut: | |||||
| exc_class = ut.match_examples(json_parser.parse, { | |||||
| JsonMissingValue: ['{"foo": }'], | |||||
| except UnexpectedInput as u: | |||||
| exc_class = u.match_examples(json_parser.parse, { | |||||
| JsonMissingOpening: ['{"foo": ]}', | JsonMissingOpening: ['{"foo": ]}', | ||||
| '{"foor": }}'], | |||||
| '{"foor": }}', | |||||
| '{"foo": }'], | |||||
| JsonMissingClosing: ['{"foo": [}', | JsonMissingClosing: ['{"foo": [}', | ||||
| '{', | '{', | ||||
| '{"a": 1', | '{"a": 1', | ||||
| @@ -55,15 +55,10 @@ def parse(json_text): | |||||
| }) | }) | ||||
| if not exc_class: | if not exc_class: | ||||
| raise | raise | ||||
| raise exc_class(ut.get_context(json_text), ut.line, ut.column) | |||||
| raise exc_class(u.get_context(json_text), u.line, u.column) | |||||
| def test(): | def test(): | ||||
| try: | |||||
| parse('{"key":') | |||||
| except JsonMissingValue: | |||||
| pass | |||||
| try: | try: | ||||
| parse('{"key": "value"') | parse('{"key": "value"') | ||||
| except JsonMissingClosing: | except JsonMissingClosing: | ||||
| @@ -1,8 +1,7 @@ | |||||
| from .tree import Tree | from .tree import Tree | ||||
| from .visitors import Transformer, Visitor, v_args, Discard | from .visitors import Transformer, Visitor, v_args, Discard | ||||
| from .visitors import InlineTransformer, inline_args # XXX Deprecated | from .visitors import InlineTransformer, inline_args # XXX Deprecated | ||||
| from .common import ParseError, GrammarError, UnexpectedToken | |||||
| from .lexer import UnexpectedInput, LexError | |||||
| from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters | |||||
| from .lark import Lark | from .lark import Lark | ||||
| __version__ = "0.5.6" | __version__ = "0.5.6" | ||||
| @@ -7,63 +7,6 @@ Py36 = (sys.version_info[:2] >= (3, 6)) | |||||
| ###{standalone | ###{standalone | ||||
| class GrammarError(Exception): | |||||
| pass | |||||
| class ParseError(Exception): | |||||
| pass | |||||
| class UnexpectedToken(ParseError): | |||||
| def __init__(self, token, expected, seq, index, considered_rules=None, state=None): | |||||
| self.token = token | |||||
| self.expected = expected | |||||
| self.line = getattr(token, 'line', '?') | |||||
| self.column = getattr(token, 'column', '?') | |||||
| self.considered_rules = considered_rules | |||||
| self.state = state | |||||
| try: | |||||
| context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) | |||||
| except AttributeError: | |||||
| context = seq[index:index+5] | |||||
| except TypeError: | |||||
| context = "<no context>" | |||||
| message = ("Unexpected token %r at line %s, column %s.\n" | |||||
| "Expected: %s\n" | |||||
| "Context: %s" % (token, self.line, self.column, expected, context)) | |||||
| super(UnexpectedToken, self).__init__(message) | |||||
| def match_examples(self, parse_fn, examples): | |||||
| """ Given a parser instance and a dictionary mapping some label with | |||||
| some malformed syntax examples, it'll return the label for the | |||||
| example that bests matches the current error. | |||||
| """ | |||||
| assert self.state, "Not supported for this exception" | |||||
| candidate = None | |||||
| for label, example in examples.items(): | |||||
| assert not isinstance(example, STRING_TYPE) | |||||
| for malformed in example: | |||||
| try: | |||||
| parse_fn(malformed) | |||||
| except UnexpectedToken as ut: | |||||
| if ut.state == self.state: | |||||
| if ut.token == self.token: # Try exact match first | |||||
| return label | |||||
| elif not candidate: | |||||
| candidate = label | |||||
| return candidate | |||||
| def get_context(self, text, span=10): | |||||
| pos = self.token.pos_in_stream | |||||
| start = max(pos - span, 0) | |||||
| end = pos + span | |||||
| before = text[start:pos].rsplit('\n', 1)[-1] | |||||
| after = text[pos:end].split('\n', 1)[0] | |||||
| return before + after + '\n' + ' ' * len(before) + '^\n' | |||||
| ###} | ###} | ||||
| @@ -0,0 +1,85 @@ | |||||
| from .utils import STRING_TYPE | |||||
| class LarkError(Exception): | |||||
| pass | |||||
| class GrammarError(LarkError): | |||||
| pass | |||||
| class ParseError(LarkError): | |||||
| pass | |||||
| class LexError(LarkError): | |||||
| pass | |||||
| class UnexpectedInput(LarkError): | |||||
| def get_context(self, text, span=10): | |||||
| pos = self.pos_in_stream | |||||
| start = max(pos - span, 0) | |||||
| end = pos + span | |||||
| before = text[start:pos].rsplit('\n', 1)[-1] | |||||
| after = text[pos:end].split('\n', 1)[0] | |||||
| return before + after + '\n' + ' ' * len(before) + '^\n' | |||||
| def match_examples(self, parse_fn, examples): | |||||
| """ Given a parser instance and a dictionary mapping some label with | |||||
| some malformed syntax examples, it'll return the label for the | |||||
| example that bests matches the current error. | |||||
| """ | |||||
| assert self.state is not None, "Not supported for this exception" | |||||
| candidate = None | |||||
| for label, example in examples.items(): | |||||
| assert not isinstance(example, STRING_TYPE) | |||||
| for malformed in example: | |||||
| try: | |||||
| parse_fn(malformed) | |||||
| except UnexpectedInput as ut: | |||||
| if ut.state == self.state: | |||||
| try: | |||||
| if ut.token == self.token: # Try exact match first | |||||
| return label | |||||
| except AttributeError: | |||||
| pass | |||||
| if not candidate: | |||||
| candidate = label | |||||
| return candidate | |||||
| class UnexpectedCharacters(LexError, UnexpectedInput): | |||||
| def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None): | |||||
| context = seq[lex_pos:lex_pos+10] | |||||
| message = "No token defined for '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column) | |||||
| if allowed: | |||||
| message += '\n\nExpecting: %s\n' % allowed | |||||
| super(UnexpectedCharacters, self).__init__(message) | |||||
| self.line = line | |||||
| self.column = column | |||||
| self.context = context | |||||
| self.allowed = allowed | |||||
| self.considered_tokens = considered_tokens | |||||
| self.pos_in_stream = lex_pos | |||||
| self.state = state | |||||
| class UnexpectedToken(ParseError, UnexpectedInput): | |||||
| def __init__(self, token, expected, considered_rules=None, state=None): | |||||
| self.token = token | |||||
| self.expected = expected # XXX str shouldn't necessary | |||||
| self.line = getattr(token, 'line', '?') | |||||
| self.column = getattr(token, 'column', '?') | |||||
| self.considered_rules = considered_rules | |||||
| self.state = state | |||||
| self.pos_in_stream = token.pos_in_stream | |||||
| message = ("Unexpected token %r at line %s, column %s.\n" | |||||
| "Expected: %s\n" | |||||
| % (token, self.line, self.column, ', '.join(self.expected))) | |||||
| super(UnexpectedToken, self).__init__(message) | |||||
| @@ -4,26 +4,9 @@ import re | |||||
| from .utils import Str, classify | from .utils import Str, classify | ||||
| from .common import PatternStr, PatternRE, TokenDef | from .common import PatternStr, PatternRE, TokenDef | ||||
| from .exceptions import UnexpectedCharacters | |||||
| ###{standalone | ###{standalone | ||||
| class LexError(Exception): | |||||
| pass | |||||
| class UnexpectedInput(LexError): | |||||
| def __init__(self, seq, lex_pos, line, column, allowed=None, considered_rules=None): | |||||
| context = seq[lex_pos:lex_pos+5] | |||||
| message = "No token defined for: '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column) | |||||
| if allowed: | |||||
| message += '\n\nExpecting: %s\n' % allowed | |||||
| super(UnexpectedInput, self).__init__(message) | |||||
| self.line = line | |||||
| self.column = column | |||||
| self.context = context | |||||
| self.allowed = allowed | |||||
| self.considered_rules = considered_rules | |||||
| class Token(Str): | class Token(Str): | ||||
| __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column') | __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column') | ||||
| @@ -84,8 +67,9 @@ class LineCounter: | |||||
| class _Lex: | class _Lex: | ||||
| "Built to serve both Lexer and ContextualLexer" | "Built to serve both Lexer and ContextualLexer" | ||||
| def __init__(self, lexer): | |||||
| def __init__(self, lexer, state=None): | |||||
| self.lexer = lexer | self.lexer = lexer | ||||
| self.state = state | |||||
| def lex(self, stream, newline_types, ignore_types): | def lex(self, stream, newline_types, ignore_types): | ||||
| newline_types = list(newline_types) | newline_types = list(newline_types) | ||||
| @@ -118,7 +102,7 @@ class _Lex: | |||||
| break | break | ||||
| else: | else: | ||||
| if line_ctr.char_pos < len(stream): | if line_ctr.char_pos < len(stream): | ||||
| raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column) | |||||
| raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, state=self.state) | |||||
| break | break | ||||
| class UnlessCallback: | class UnlessCallback: | ||||
| @@ -251,9 +235,10 @@ class ContextualLexer: | |||||
| self.parser_state = state | self.parser_state = state | ||||
| def lex(self, stream): | def lex(self, stream): | ||||
| l = _Lex(self.lexers[self.parser_state]) | |||||
| l = _Lex(self.lexers[self.parser_state], self.parser_state) | |||||
| for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types): | for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types): | ||||
| yield x | yield x | ||||
| l.lexer = self.lexers[self.parser_state] | l.lexer = self.lexers[self.parser_state] | ||||
| l.state = self.parser_state | |||||
| @@ -6,14 +6,15 @@ import re | |||||
| from ast import literal_eval | from ast import literal_eval | ||||
| from copy import deepcopy | from copy import deepcopy | ||||
| from .lexer import Token, UnexpectedInput | |||||
| from .lexer import Token | |||||
| from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
| from .parser_frontends import LALR | from .parser_frontends import LALR | ||||
| from .parsers.lalr_parser import UnexpectedToken | |||||
| from .common import GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef | |||||
| from .common import LexerConf, ParserConf, PatternStr, PatternRE, TokenDef | |||||
| from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | ||||
| from .utils import classify, suppress | from .utils import classify, suppress | ||||
| from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken | |||||
| from .tree import Tree, SlottedTree as ST | from .tree import Tree, SlottedTree as ST | ||||
| from .visitors import Transformer, Visitor, v_args | from .visitors import Transformer, Visitor, v_args | ||||
| @@ -576,7 +577,7 @@ class GrammarLoader: | |||||
| try: | try: | ||||
| tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') ) | tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') ) | ||||
| except UnexpectedInput as e: | |||||
| except UnexpectedCharacters as e: | |||||
| raise GrammarError("Unexpected input %r at line %d column %d in %s" % (e.context, e.line, e.column, name)) | raise GrammarError("Unexpected input %r at line %d column %d in %s" % (e.context, e.line, e.column, name)) | ||||
| except UnexpectedToken as e: | except UnexpectedToken as e: | ||||
| context = e.get_context(grammar_text) | context = e.get_context(grammar_text) | ||||
| @@ -1,4 +1,4 @@ | |||||
| from .common import GrammarError | |||||
| from .exceptions import GrammarError | |||||
| from .utils import suppress | from .utils import suppress | ||||
| from .lexer import Token | from .lexer import Token | ||||
| from .grammar import Rule | from .grammar import Rule | ||||
| @@ -4,7 +4,7 @@ from .utils import get_regexp_width | |||||
| from .parsers.grammar_analysis import GrammarAnalyzer | from .parsers.grammar_analysis import GrammarAnalyzer | ||||
| from .lexer import Lexer, ContextualLexer, Token | from .lexer import Lexer, ContextualLexer, Token | ||||
| from .common import GrammarError | |||||
| from .exceptions import GrammarError | |||||
| from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk | from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk | ||||
| from .tree import Tree | from .tree import Tree | ||||
| @@ -8,7 +8,7 @@ | |||||
| from collections import defaultdict | from collections import defaultdict | ||||
| import itertools | import itertools | ||||
| from ..common import ParseError | |||||
| from ..exceptions import ParseError | |||||
| from ..lexer import Token | from ..lexer import Token | ||||
| from ..tree import Tree | from ..tree import Tree | ||||
| from ..grammar import Terminal as T, NonTerminal as NT, Symbol | from ..grammar import Terminal as T, NonTerminal as NT, Symbol | ||||
| @@ -15,7 +15,7 @@ | |||||
| from ..tree import Tree | from ..tree import Tree | ||||
| from ..visitors import Transformer_InPlace, v_args | from ..visitors import Transformer_InPlace, v_args | ||||
| from ..common import ParseError, UnexpectedToken | |||||
| from ..exceptions import ParseError, UnexpectedToken | |||||
| from .grammar_analysis import GrammarAnalyzer | from .grammar_analysis import GrammarAnalyzer | ||||
| from ..grammar import NonTerminal | from ..grammar import NonTerminal | ||||
| @@ -197,8 +197,8 @@ class Parser: | |||||
| next_set.add(item.advance(token) for item in column.to_scan if match(item.expect, token)) | next_set.add(item.advance(token) for item in column.to_scan if match(item.expect, token)) | ||||
| if not next_set: | if not next_set: | ||||
| expect = {i.expect for i in column.to_scan} | |||||
| raise UnexpectedToken(token, expect, stream, set(column.to_scan)) | |||||
| expect = {i.expect.name for i in column.to_scan} | |||||
| raise UnexpectedToken(token, expect, considered_rules=set(column.to_scan)) | |||||
| return next_set | return next_set | ||||
| @@ -1,6 +1,6 @@ | |||||
| from ..utils import bfs, fzset, classify | from ..utils import bfs, fzset, classify | ||||
| from ..common import GrammarError | |||||
| from ..exceptions import GrammarError | |||||
| from ..grammar import Rule, Terminal, NonTerminal | from ..grammar import Rule, Terminal, NonTerminal | ||||
| @@ -10,7 +10,7 @@ import logging | |||||
| from collections import defaultdict | from collections import defaultdict | ||||
| from ..utils import classify, classify_bool, bfs, fzset | from ..utils import classify, classify_bool, bfs, fzset | ||||
| from ..common import GrammarError | |||||
| from ..exceptions import GrammarError | |||||
| from .grammar_analysis import GrammarAnalyzer, Terminal | from .grammar_analysis import GrammarAnalyzer, Terminal | ||||
| @@ -2,7 +2,7 @@ | |||||
| """ | """ | ||||
| # Author: Erez Shinan (2017) | # Author: Erez Shinan (2017) | ||||
| # Email : erezshin@gmail.com | # Email : erezshin@gmail.com | ||||
| from ..common import UnexpectedToken | |||||
| from ..exceptions import UnexpectedToken | |||||
| from .lalr_analysis import LALR_Analyzer, Shift | from .lalr_analysis import LALR_Analyzer, Shift | ||||
| @@ -46,7 +46,7 @@ class _Parser: | |||||
| return states[state][key] | return states[state][key] | ||||
| except KeyError: | except KeyError: | ||||
| expected = states[state].keys() | expected = states[state].keys() | ||||
| raise UnexpectedToken(token, expected, seq, i, state=state) | |||||
| raise UnexpectedToken(token, expected, state=state) # TODO filter out rules from expected | |||||
| def reduce(rule): | def reduce(rule): | ||||
| size = len(rule.expansion) | size = len(rule.expansion) | ||||
| @@ -20,8 +20,8 @@ | |||||
| from collections import defaultdict | from collections import defaultdict | ||||
| from ..common import ParseError | |||||
| from ..lexer import Token, UnexpectedInput | |||||
| from ..exceptions import ParseError, UnexpectedInput | |||||
| from ..lexer import Token | |||||
| from ..tree import Tree | from ..tree import Tree | ||||
| from .grammar_analysis import GrammarAnalyzer | from .grammar_analysis import GrammarAnalyzer | ||||
| from ..grammar import NonTerminal, Terminal | from ..grammar import NonTerminal, Terminal | ||||
| @@ -110,6 +110,21 @@ class Tree(object): | |||||
| self.data = data | self.data = data | ||||
| self.children = children | self.children = children | ||||
| # XXX Deprecated! Here for backwards compatibility <0.6.0 | |||||
| @property | |||||
| def line(self): | |||||
| return self.meta.line | |||||
| @property | |||||
| def column(self): | |||||
| return self.meta.column | |||||
| @property | |||||
| def end_line(self): | |||||
| return self.meta.end_line | |||||
| @property | |||||
| def end_column(self): | |||||
| return self.meta.end_column | |||||
| class SlottedTree(Tree): | class SlottedTree(Tree): | ||||
| __slots__ = 'data', 'children', 'rule', '_meta' | __slots__ = 'data', 'children', 'rule', '_meta' | ||||
| @@ -18,8 +18,7 @@ from io import ( | |||||
| logging.basicConfig(level=logging.INFO) | logging.basicConfig(level=logging.INFO) | ||||
| from lark.lark import Lark | from lark.lark import Lark | ||||
| from lark.common import GrammarError, ParseError, UnexpectedToken | |||||
| from lark.lexer import LexError, UnexpectedInput | |||||
| from lark.exceptions import GrammarError, ParseError, UnexpectedToken, LexError, UnexpectedInput | |||||
| from lark.tree import Tree | from lark.tree import Tree | ||||
| from lark.visitors import Transformer | from lark.visitors import Transformer | ||||