Browse Source

Moved and restructured exceptions

* All exceptions are now under exceptions.py
* UnexpectedInput is now superclass of UnexpectedToken and UnexpectedCharacters,
  all of which support the get_context() and match_examples() methods.
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
Erez Shinan 7 years ago
parent
commit
5c6df8e825
16 changed files with 131 additions and 109 deletions
  1. +6
    -11
      examples/error_reporting_lalr.py
  2. +1
    -2
      lark/__init__.py
  3. +0
    -57
      lark/common.py
  4. +85
    -0
      lark/exceptions.py
  5. +6
    -21
      lark/lexer.py
  6. +5
    -4
      lark/load_grammar.py
  7. +1
    -1
      lark/parse_tree_builder.py
  8. +1
    -1
      lark/parser_frontends.py
  9. +1
    -1
      lark/parsers/cyk.py
  10. +3
    -3
      lark/parsers/earley.py
  11. +1
    -1
      lark/parsers/grammar_analysis.py
  12. +1
    -1
      lark/parsers/lalr_analysis.py
  13. +2
    -2
      lark/parsers/lalr_parser.py
  14. +2
    -2
      lark/parsers/xearley.py
  15. +15
    -0
      lark/tree.py
  16. +1
    -2
      tests/test_parser.py

+ 6
- 11
examples/error_reporting_lalr.py View File

@@ -2,7 +2,7 @@
# This demonstrates example-driven error reporting with the LALR parser # This demonstrates example-driven error reporting with the LALR parser
# #


from lark import Lark, UnexpectedToken
from lark import Lark, UnexpectedInput


from .json_parser import json_grammar # Using the grammar from the json_parser example from .json_parser import json_grammar # Using the grammar from the json_parser example


@@ -32,11 +32,11 @@ class JsonTrailingComma(JsonSyntaxError):
def parse(json_text): def parse(json_text):
try: try:
j = json_parser.parse(json_text) j = json_parser.parse(json_text)
except UnexpectedToken as ut:
exc_class = ut.match_examples(json_parser.parse, {
JsonMissingValue: ['{"foo": }'],
except UnexpectedInput as u:
exc_class = u.match_examples(json_parser.parse, {
JsonMissingOpening: ['{"foo": ]}', JsonMissingOpening: ['{"foo": ]}',
'{"foor": }}'],
'{"foor": }}',
'{"foo": }'],
JsonMissingClosing: ['{"foo": [}', JsonMissingClosing: ['{"foo": [}',
'{', '{',
'{"a": 1', '{"a": 1',
@@ -55,15 +55,10 @@ def parse(json_text):
}) })
if not exc_class: if not exc_class:
raise raise
raise exc_class(ut.get_context(json_text), ut.line, ut.column)
raise exc_class(u.get_context(json_text), u.line, u.column)




def test(): def test():
try:
parse('{"key":')
except JsonMissingValue:
pass

try: try:
parse('{"key": "value"') parse('{"key": "value"')
except JsonMissingClosing: except JsonMissingClosing:


+ 1
- 2
lark/__init__.py View File

@@ -1,8 +1,7 @@
from .tree import Tree from .tree import Tree
from .visitors import Transformer, Visitor, v_args, Discard from .visitors import Transformer, Visitor, v_args, Discard
from .visitors import InlineTransformer, inline_args # XXX Deprecated from .visitors import InlineTransformer, inline_args # XXX Deprecated
from .common import ParseError, GrammarError, UnexpectedToken
from .lexer import UnexpectedInput, LexError
from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
from .lark import Lark from .lark import Lark


__version__ = "0.5.6" __version__ = "0.5.6"

+ 0
- 57
lark/common.py View File

@@ -7,63 +7,6 @@ Py36 = (sys.version_info[:2] >= (3, 6))




###{standalone ###{standalone
class GrammarError(Exception):
pass

class ParseError(Exception):
pass

class UnexpectedToken(ParseError):
def __init__(self, token, expected, seq, index, considered_rules=None, state=None):
self.token = token
self.expected = expected
self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?')
self.considered_rules = considered_rules
self.state = state

try:
context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
except AttributeError:
context = seq[index:index+5]
except TypeError:
context = "<no context>"
message = ("Unexpected token %r at line %s, column %s.\n"
"Expected: %s\n"
"Context: %s" % (token, self.line, self.column, expected, context))

super(UnexpectedToken, self).__init__(message)

def match_examples(self, parse_fn, examples):
""" Given a parser instance and a dictionary mapping some label with
some malformed syntax examples, it'll return the label for the
example that bests matches the current error.
"""
assert self.state, "Not supported for this exception"

candidate = None
for label, example in examples.items():
assert not isinstance(example, STRING_TYPE)

for malformed in example:
try:
parse_fn(malformed)
except UnexpectedToken as ut:
if ut.state == self.state:
if ut.token == self.token: # Try exact match first
return label
elif not candidate:
candidate = label

return candidate

def get_context(self, text, span=10):
pos = self.token.pos_in_stream
start = max(pos - span, 0)
end = pos + span
before = text[start:pos].rsplit('\n', 1)[-1]
after = text[pos:end].split('\n', 1)[0]
return before + after + '\n' + ' ' * len(before) + '^\n'
###} ###}






+ 85
- 0
lark/exceptions.py View File

@@ -0,0 +1,85 @@
from .utils import STRING_TYPE

class LarkError(Exception):
pass

class GrammarError(LarkError):
pass

class ParseError(LarkError):
pass

class LexError(LarkError):
pass

class UnexpectedInput(LarkError):
def get_context(self, text, span=10):
pos = self.pos_in_stream
start = max(pos - span, 0)
end = pos + span
before = text[start:pos].rsplit('\n', 1)[-1]
after = text[pos:end].split('\n', 1)[0]
return before + after + '\n' + ' ' * len(before) + '^\n'

def match_examples(self, parse_fn, examples):
""" Given a parser instance and a dictionary mapping some label with
some malformed syntax examples, it'll return the label for the
example that bests matches the current error.
"""
assert self.state is not None, "Not supported for this exception"

candidate = None
for label, example in examples.items():
assert not isinstance(example, STRING_TYPE)

for malformed in example:
try:
parse_fn(malformed)
except UnexpectedInput as ut:
if ut.state == self.state:
try:
if ut.token == self.token: # Try exact match first
return label
except AttributeError:
pass
if not candidate:
candidate = label

return candidate


class UnexpectedCharacters(LexError, UnexpectedInput):
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None):
context = seq[lex_pos:lex_pos+10]
message = "No token defined for '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column)
if allowed:
message += '\n\nExpecting: %s\n' % allowed

super(UnexpectedCharacters, self).__init__(message)

self.line = line
self.column = column
self.context = context
self.allowed = allowed
self.considered_tokens = considered_tokens
self.pos_in_stream = lex_pos
self.state = state


class UnexpectedToken(ParseError, UnexpectedInput):
def __init__(self, token, expected, considered_rules=None, state=None):
self.token = token
self.expected = expected # XXX str shouldn't necessary
self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?')
self.considered_rules = considered_rules
self.state = state
self.pos_in_stream = token.pos_in_stream

message = ("Unexpected token %r at line %s, column %s.\n"
"Expected: %s\n"
% (token, self.line, self.column, ', '.join(self.expected)))

super(UnexpectedToken, self).__init__(message)



+ 6
- 21
lark/lexer.py View File

@@ -4,26 +4,9 @@ import re


from .utils import Str, classify from .utils import Str, classify
from .common import PatternStr, PatternRE, TokenDef from .common import PatternStr, PatternRE, TokenDef
from .exceptions import UnexpectedCharacters


###{standalone ###{standalone
class LexError(Exception):
pass

class UnexpectedInput(LexError):
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_rules=None):
context = seq[lex_pos:lex_pos+5]
message = "No token defined for: '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column)
if allowed:
message += '\n\nExpecting: %s\n' % allowed

super(UnexpectedInput, self).__init__(message)

self.line = line
self.column = column
self.context = context
self.allowed = allowed
self.considered_rules = considered_rules

class Token(Str): class Token(Str):
__slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column') __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column')


@@ -84,8 +67,9 @@ class LineCounter:


class _Lex: class _Lex:
"Built to serve both Lexer and ContextualLexer" "Built to serve both Lexer and ContextualLexer"
def __init__(self, lexer):
def __init__(self, lexer, state=None):
self.lexer = lexer self.lexer = lexer
self.state = state


def lex(self, stream, newline_types, ignore_types): def lex(self, stream, newline_types, ignore_types):
newline_types = list(newline_types) newline_types = list(newline_types)
@@ -118,7 +102,7 @@ class _Lex:
break break
else: else:
if line_ctr.char_pos < len(stream): if line_ctr.char_pos < len(stream):
raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column)
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, state=self.state)
break break


class UnlessCallback: class UnlessCallback:
@@ -251,9 +235,10 @@ class ContextualLexer:
self.parser_state = state self.parser_state = state


def lex(self, stream): def lex(self, stream):
l = _Lex(self.lexers[self.parser_state])
l = _Lex(self.lexers[self.parser_state], self.parser_state)
for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types): for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types):
yield x yield x
l.lexer = self.lexers[self.parser_state] l.lexer = self.lexers[self.parser_state]
l.state = self.parser_state





+ 5
- 4
lark/load_grammar.py View File

@@ -6,14 +6,15 @@ import re
from ast import literal_eval from ast import literal_eval
from copy import deepcopy from copy import deepcopy


from .lexer import Token, UnexpectedInput
from .lexer import Token



from .parse_tree_builder import ParseTreeBuilder from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import LALR from .parser_frontends import LALR
from .parsers.lalr_parser import UnexpectedToken
from .common import GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef
from .common import LexerConf, ParserConf, PatternStr, PatternRE, TokenDef
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
from .utils import classify, suppress from .utils import classify, suppress
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken


from .tree import Tree, SlottedTree as ST from .tree import Tree, SlottedTree as ST
from .visitors import Transformer, Visitor, v_args from .visitors import Transformer, Visitor, v_args
@@ -576,7 +577,7 @@ class GrammarLoader:


try: try:
tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') ) tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') )
except UnexpectedInput as e:
except UnexpectedCharacters as e:
raise GrammarError("Unexpected input %r at line %d column %d in %s" % (e.context, e.line, e.column, name)) raise GrammarError("Unexpected input %r at line %d column %d in %s" % (e.context, e.line, e.column, name))
except UnexpectedToken as e: except UnexpectedToken as e:
context = e.get_context(grammar_text) context = e.get_context(grammar_text)


+ 1
- 1
lark/parse_tree_builder.py View File

@@ -1,4 +1,4 @@
from .common import GrammarError
from .exceptions import GrammarError
from .utils import suppress from .utils import suppress
from .lexer import Token from .lexer import Token
from .grammar import Rule from .grammar import Rule


+ 1
- 1
lark/parser_frontends.py View File

@@ -4,7 +4,7 @@ from .utils import get_regexp_width
from .parsers.grammar_analysis import GrammarAnalyzer from .parsers.grammar_analysis import GrammarAnalyzer
from .lexer import Lexer, ContextualLexer, Token from .lexer import Lexer, ContextualLexer, Token


from .common import GrammarError
from .exceptions import GrammarError
from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk
from .tree import Tree from .tree import Tree




+ 1
- 1
lark/parsers/cyk.py View File

@@ -8,7 +8,7 @@
from collections import defaultdict from collections import defaultdict
import itertools import itertools


from ..common import ParseError
from ..exceptions import ParseError
from ..lexer import Token from ..lexer import Token
from ..tree import Tree from ..tree import Tree
from ..grammar import Terminal as T, NonTerminal as NT, Symbol from ..grammar import Terminal as T, NonTerminal as NT, Symbol


+ 3
- 3
lark/parsers/earley.py View File

@@ -15,7 +15,7 @@


from ..tree import Tree from ..tree import Tree
from ..visitors import Transformer_InPlace, v_args from ..visitors import Transformer_InPlace, v_args
from ..common import ParseError, UnexpectedToken
from ..exceptions import ParseError, UnexpectedToken
from .grammar_analysis import GrammarAnalyzer from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal from ..grammar import NonTerminal


@@ -197,8 +197,8 @@ class Parser:
next_set.add(item.advance(token) for item in column.to_scan if match(item.expect, token)) next_set.add(item.advance(token) for item in column.to_scan if match(item.expect, token))


if not next_set: if not next_set:
expect = {i.expect for i in column.to_scan}
raise UnexpectedToken(token, expect, stream, set(column.to_scan))
expect = {i.expect.name for i in column.to_scan}
raise UnexpectedToken(token, expect, considered_rules=set(column.to_scan))


return next_set return next_set




+ 1
- 1
lark/parsers/grammar_analysis.py View File

@@ -1,6 +1,6 @@


from ..utils import bfs, fzset, classify from ..utils import bfs, fzset, classify
from ..common import GrammarError
from ..exceptions import GrammarError
from ..grammar import Rule, Terminal, NonTerminal from ..grammar import Rule, Terminal, NonTerminal






+ 1
- 1
lark/parsers/lalr_analysis.py View File

@@ -10,7 +10,7 @@ import logging
from collections import defaultdict from collections import defaultdict


from ..utils import classify, classify_bool, bfs, fzset from ..utils import classify, classify_bool, bfs, fzset
from ..common import GrammarError
from ..exceptions import GrammarError


from .grammar_analysis import GrammarAnalyzer, Terminal from .grammar_analysis import GrammarAnalyzer, Terminal




+ 2
- 2
lark/parsers/lalr_parser.py View File

@@ -2,7 +2,7 @@
""" """
# Author: Erez Shinan (2017) # Author: Erez Shinan (2017)
# Email : erezshin@gmail.com # Email : erezshin@gmail.com
from ..common import UnexpectedToken
from ..exceptions import UnexpectedToken


from .lalr_analysis import LALR_Analyzer, Shift from .lalr_analysis import LALR_Analyzer, Shift


@@ -46,7 +46,7 @@ class _Parser:
return states[state][key] return states[state][key]
except KeyError: except KeyError:
expected = states[state].keys() expected = states[state].keys()
raise UnexpectedToken(token, expected, seq, i, state=state)
raise UnexpectedToken(token, expected, state=state) # TODO filter out rules from expected


def reduce(rule): def reduce(rule):
size = len(rule.expansion) size = len(rule.expansion)


+ 2
- 2
lark/parsers/xearley.py View File

@@ -20,8 +20,8 @@


from collections import defaultdict from collections import defaultdict


from ..common import ParseError
from ..lexer import Token, UnexpectedInput
from ..exceptions import ParseError, UnexpectedInput
from ..lexer import Token
from ..tree import Tree from ..tree import Tree
from .grammar_analysis import GrammarAnalyzer from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal, Terminal from ..grammar import NonTerminal, Terminal


+ 15
- 0
lark/tree.py View File

@@ -110,6 +110,21 @@ class Tree(object):
self.data = data self.data = data
self.children = children self.children = children


# XXX Deprecated! Here for backwards compatibility <0.6.0
@property
def line(self):
return self.meta.line
@property
def column(self):
return self.meta.column
@property
def end_line(self):
return self.meta.end_line
@property
def end_column(self):
return self.meta.end_column


class SlottedTree(Tree): class SlottedTree(Tree):
__slots__ = 'data', 'children', 'rule', '_meta' __slots__ = 'data', 'children', 'rule', '_meta'




+ 1
- 2
tests/test_parser.py View File

@@ -18,8 +18,7 @@ from io import (
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)


from lark.lark import Lark from lark.lark import Lark
from lark.common import GrammarError, ParseError, UnexpectedToken
from lark.lexer import LexError, UnexpectedInput
from lark.exceptions import GrammarError, ParseError, UnexpectedToken, LexError, UnexpectedInput
from lark.tree import Tree from lark.tree import Tree
from lark.visitors import Transformer from lark.visitors import Transformer




Loading…
Cancel
Save