Browse Source

Moved and restructured exceptions

* All exceptions are now under exceptions.py
* UnexpectedInput is now superclass of UnexpectedToken and UnexpectedCharacters,
  all of which support the get_context() and match_examples() methods.
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
Erez Shinan 6 years ago
parent
commit
5c6df8e825
16 changed files with 131 additions and 109 deletions
  1. +6
    -11
      examples/error_reporting_lalr.py
  2. +1
    -2
      lark/__init__.py
  3. +0
    -57
      lark/common.py
  4. +85
    -0
      lark/exceptions.py
  5. +6
    -21
      lark/lexer.py
  6. +5
    -4
      lark/load_grammar.py
  7. +1
    -1
      lark/parse_tree_builder.py
  8. +1
    -1
      lark/parser_frontends.py
  9. +1
    -1
      lark/parsers/cyk.py
  10. +3
    -3
      lark/parsers/earley.py
  11. +1
    -1
      lark/parsers/grammar_analysis.py
  12. +1
    -1
      lark/parsers/lalr_analysis.py
  13. +2
    -2
      lark/parsers/lalr_parser.py
  14. +2
    -2
      lark/parsers/xearley.py
  15. +15
    -0
      lark/tree.py
  16. +1
    -2
      tests/test_parser.py

+ 6
- 11
examples/error_reporting_lalr.py View File

@@ -2,7 +2,7 @@
# This demonstrates example-driven error reporting with the LALR parser
#

from lark import Lark, UnexpectedToken
from lark import Lark, UnexpectedInput

from .json_parser import json_grammar # Using the grammar from the json_parser example

@@ -32,11 +32,11 @@ class JsonTrailingComma(JsonSyntaxError):
def parse(json_text):
try:
j = json_parser.parse(json_text)
except UnexpectedToken as ut:
exc_class = ut.match_examples(json_parser.parse, {
JsonMissingValue: ['{"foo": }'],
except UnexpectedInput as u:
exc_class = u.match_examples(json_parser.parse, {
JsonMissingOpening: ['{"foo": ]}',
'{"foor": }}'],
'{"foor": }}',
'{"foo": }'],
JsonMissingClosing: ['{"foo": [}',
'{',
'{"a": 1',
@@ -55,15 +55,10 @@ def parse(json_text):
})
if not exc_class:
raise
raise exc_class(ut.get_context(json_text), ut.line, ut.column)
raise exc_class(u.get_context(json_text), u.line, u.column)


def test():
try:
parse('{"key":')
except JsonMissingValue:
pass

try:
parse('{"key": "value"')
except JsonMissingClosing:


+ 1
- 2
lark/__init__.py View File

@@ -1,8 +1,7 @@
from .tree import Tree
from .visitors import Transformer, Visitor, v_args, Discard
from .visitors import InlineTransformer, inline_args # XXX Deprecated
from .common import ParseError, GrammarError, UnexpectedToken
from .lexer import UnexpectedInput, LexError
from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
from .lark import Lark

__version__ = "0.5.6"

+ 0
- 57
lark/common.py View File

@@ -7,63 +7,6 @@ Py36 = (sys.version_info[:2] >= (3, 6))


###{standalone
class GrammarError(Exception):
pass

class ParseError(Exception):
pass

class UnexpectedToken(ParseError):
def __init__(self, token, expected, seq, index, considered_rules=None, state=None):
self.token = token
self.expected = expected
self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?')
self.considered_rules = considered_rules
self.state = state

try:
context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
except AttributeError:
context = seq[index:index+5]
except TypeError:
context = "<no context>"
message = ("Unexpected token %r at line %s, column %s.\n"
"Expected: %s\n"
"Context: %s" % (token, self.line, self.column, expected, context))

super(UnexpectedToken, self).__init__(message)

def match_examples(self, parse_fn, examples):
""" Given a parser instance and a dictionary mapping some label with
some malformed syntax examples, it'll return the label for the
example that bests matches the current error.
"""
assert self.state, "Not supported for this exception"

candidate = None
for label, example in examples.items():
assert not isinstance(example, STRING_TYPE)

for malformed in example:
try:
parse_fn(malformed)
except UnexpectedToken as ut:
if ut.state == self.state:
if ut.token == self.token: # Try exact match first
return label
elif not candidate:
candidate = label

return candidate

def get_context(self, text, span=10):
pos = self.token.pos_in_stream
start = max(pos - span, 0)
end = pos + span
before = text[start:pos].rsplit('\n', 1)[-1]
after = text[pos:end].split('\n', 1)[0]
return before + after + '\n' + ' ' * len(before) + '^\n'
###}




+ 85
- 0
lark/exceptions.py View File

@@ -0,0 +1,85 @@
from .utils import STRING_TYPE

class LarkError(Exception):
pass

class GrammarError(LarkError):
pass

class ParseError(LarkError):
pass

class LexError(LarkError):
pass

class UnexpectedInput(LarkError):
def get_context(self, text, span=10):
pos = self.pos_in_stream
start = max(pos - span, 0)
end = pos + span
before = text[start:pos].rsplit('\n', 1)[-1]
after = text[pos:end].split('\n', 1)[0]
return before + after + '\n' + ' ' * len(before) + '^\n'

def match_examples(self, parse_fn, examples):
""" Given a parser instance and a dictionary mapping some label with
some malformed syntax examples, it'll return the label for the
example that bests matches the current error.
"""
assert self.state is not None, "Not supported for this exception"

candidate = None
for label, example in examples.items():
assert not isinstance(example, STRING_TYPE)

for malformed in example:
try:
parse_fn(malformed)
except UnexpectedInput as ut:
if ut.state == self.state:
try:
if ut.token == self.token: # Try exact match first
return label
except AttributeError:
pass
if not candidate:
candidate = label

return candidate


class UnexpectedCharacters(LexError, UnexpectedInput):
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None):
context = seq[lex_pos:lex_pos+10]
message = "No token defined for '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column)
if allowed:
message += '\n\nExpecting: %s\n' % allowed

super(UnexpectedCharacters, self).__init__(message)

self.line = line
self.column = column
self.context = context
self.allowed = allowed
self.considered_tokens = considered_tokens
self.pos_in_stream = lex_pos
self.state = state


class UnexpectedToken(ParseError, UnexpectedInput):
def __init__(self, token, expected, considered_rules=None, state=None):
self.token = token
self.expected = expected # XXX str shouldn't necessary
self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?')
self.considered_rules = considered_rules
self.state = state
self.pos_in_stream = token.pos_in_stream

message = ("Unexpected token %r at line %s, column %s.\n"
"Expected: %s\n"
% (token, self.line, self.column, ', '.join(self.expected)))

super(UnexpectedToken, self).__init__(message)



+ 6
- 21
lark/lexer.py View File

@@ -4,26 +4,9 @@ import re

from .utils import Str, classify
from .common import PatternStr, PatternRE, TokenDef
from .exceptions import UnexpectedCharacters

###{standalone
class LexError(Exception):
pass

class UnexpectedInput(LexError):
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_rules=None):
context = seq[lex_pos:lex_pos+5]
message = "No token defined for: '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column)
if allowed:
message += '\n\nExpecting: %s\n' % allowed

super(UnexpectedInput, self).__init__(message)

self.line = line
self.column = column
self.context = context
self.allowed = allowed
self.considered_rules = considered_rules

class Token(Str):
__slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column')

@@ -84,8 +67,9 @@ class LineCounter:

class _Lex:
"Built to serve both Lexer and ContextualLexer"
def __init__(self, lexer):
def __init__(self, lexer, state=None):
self.lexer = lexer
self.state = state

def lex(self, stream, newline_types, ignore_types):
newline_types = list(newline_types)
@@ -118,7 +102,7 @@ class _Lex:
break
else:
if line_ctr.char_pos < len(stream):
raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column)
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, state=self.state)
break

class UnlessCallback:
@@ -251,9 +235,10 @@ class ContextualLexer:
self.parser_state = state

def lex(self, stream):
l = _Lex(self.lexers[self.parser_state])
l = _Lex(self.lexers[self.parser_state], self.parser_state)
for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types):
yield x
l.lexer = self.lexers[self.parser_state]
l.state = self.parser_state



+ 5
- 4
lark/load_grammar.py View File

@@ -6,14 +6,15 @@ import re
from ast import literal_eval
from copy import deepcopy

from .lexer import Token, UnexpectedInput
from .lexer import Token


from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import LALR
from .parsers.lalr_parser import UnexpectedToken
from .common import GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef
from .common import LexerConf, ParserConf, PatternStr, PatternRE, TokenDef
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
from .utils import classify, suppress
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken

from .tree import Tree, SlottedTree as ST
from .visitors import Transformer, Visitor, v_args
@@ -576,7 +577,7 @@ class GrammarLoader:

try:
tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') )
except UnexpectedInput as e:
except UnexpectedCharacters as e:
raise GrammarError("Unexpected input %r at line %d column %d in %s" % (e.context, e.line, e.column, name))
except UnexpectedToken as e:
context = e.get_context(grammar_text)


+ 1
- 1
lark/parse_tree_builder.py View File

@@ -1,4 +1,4 @@
from .common import GrammarError
from .exceptions import GrammarError
from .utils import suppress
from .lexer import Token
from .grammar import Rule


+ 1
- 1
lark/parser_frontends.py View File

@@ -4,7 +4,7 @@ from .utils import get_regexp_width
from .parsers.grammar_analysis import GrammarAnalyzer
from .lexer import Lexer, ContextualLexer, Token

from .common import GrammarError
from .exceptions import GrammarError
from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk
from .tree import Tree



+ 1
- 1
lark/parsers/cyk.py View File

@@ -8,7 +8,7 @@
from collections import defaultdict
import itertools

from ..common import ParseError
from ..exceptions import ParseError
from ..lexer import Token
from ..tree import Tree
from ..grammar import Terminal as T, NonTerminal as NT, Symbol


+ 3
- 3
lark/parsers/earley.py View File

@@ -15,7 +15,7 @@

from ..tree import Tree
from ..visitors import Transformer_InPlace, v_args
from ..common import ParseError, UnexpectedToken
from ..exceptions import ParseError, UnexpectedToken
from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal

@@ -197,8 +197,8 @@ class Parser:
next_set.add(item.advance(token) for item in column.to_scan if match(item.expect, token))

if not next_set:
expect = {i.expect for i in column.to_scan}
raise UnexpectedToken(token, expect, stream, set(column.to_scan))
expect = {i.expect.name for i in column.to_scan}
raise UnexpectedToken(token, expect, considered_rules=set(column.to_scan))

return next_set



+ 1
- 1
lark/parsers/grammar_analysis.py View File

@@ -1,6 +1,6 @@

from ..utils import bfs, fzset, classify
from ..common import GrammarError
from ..exceptions import GrammarError
from ..grammar import Rule, Terminal, NonTerminal




+ 1
- 1
lark/parsers/lalr_analysis.py View File

@@ -10,7 +10,7 @@ import logging
from collections import defaultdict

from ..utils import classify, classify_bool, bfs, fzset
from ..common import GrammarError
from ..exceptions import GrammarError

from .grammar_analysis import GrammarAnalyzer, Terminal



+ 2
- 2
lark/parsers/lalr_parser.py View File

@@ -2,7 +2,7 @@
"""
# Author: Erez Shinan (2017)
# Email : erezshin@gmail.com
from ..common import UnexpectedToken
from ..exceptions import UnexpectedToken

from .lalr_analysis import LALR_Analyzer, Shift

@@ -46,7 +46,7 @@ class _Parser:
return states[state][key]
except KeyError:
expected = states[state].keys()
raise UnexpectedToken(token, expected, seq, i, state=state)
raise UnexpectedToken(token, expected, state=state) # TODO filter out rules from expected

def reduce(rule):
size = len(rule.expansion)


+ 2
- 2
lark/parsers/xearley.py View File

@@ -20,8 +20,8 @@

from collections import defaultdict

from ..common import ParseError
from ..lexer import Token, UnexpectedInput
from ..exceptions import ParseError, UnexpectedInput
from ..lexer import Token
from ..tree import Tree
from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal, Terminal


+ 15
- 0
lark/tree.py View File

@@ -110,6 +110,21 @@ class Tree(object):
self.data = data
self.children = children

# XXX Deprecated! Here for backwards compatibility <0.6.0
@property
def line(self):
return self.meta.line
@property
def column(self):
return self.meta.column
@property
def end_line(self):
return self.meta.end_line
@property
def end_column(self):
return self.meta.end_column


class SlottedTree(Tree):
__slots__ = 'data', 'children', 'rule', '_meta'



+ 1
- 2
tests/test_parser.py View File

@@ -18,8 +18,7 @@ from io import (
logging.basicConfig(level=logging.INFO)

from lark.lark import Lark
from lark.common import GrammarError, ParseError, UnexpectedToken
from lark.lexer import LexError, UnexpectedInput
from lark.exceptions import GrammarError, ParseError, UnexpectedToken, LexError, UnexpectedInput
from lark.tree import Tree
from lark.visitors import Transformer



Loading…
Cancel
Save