Ver código fonte

Refactored all likely exceptions to inherit from LarkError, and improved error messages.

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
Erez Sh 3 anos atrás
pai
commit
c6819a0ed7
6 arquivos alterados com 69 adições e 54 exclusões
  1. +7
    -0
      lark/exceptions.py
  2. +22
    -16
      lark/lark.py
  3. +1
    -1
      lark/lexer.py
  4. +28
    -1
      lark/load_grammar.py
  5. +11
    -10
      lark/parser_frontends.py
  6. +0
    -26
      lark/utils.py

+ 7
- 0
lark/exceptions.py Ver arquivo

@@ -7,9 +7,16 @@ class LarkError(Exception):
pass


class ConfigurationError(LarkError, ValueError):
pass


class GrammarError(LarkError):
pass

class GrammarError_Value(LarkError):
pass


class ParseError(LarkError):
pass


+ 22
- 16
lark/lark.py Ver arquivo

@@ -1,5 +1,5 @@
from __future__ import absolute_import
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken, ConfigurationError

import sys, os, pickle, hashlib
from io import open
@@ -24,6 +24,10 @@ except ImportError:

###{standalone

def assert_config(value, options, msg='Got %r, expected one of %s'):
if value not in options:
raise ConfigurationError(msg % (value, options))


class LarkOptions(Serialize):
"""Specifies the options for Lark
@@ -155,14 +159,15 @@ class LarkOptions(Serialize):

self.__dict__['options'] = options

assert self.parser in ('earley', 'lalr', 'cyk', None)

assert_config(self.parser, ('earley', 'lalr', 'cyk', None))

if self.parser == 'earley' and self.transformer:
raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.'
raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm.'
'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')

if o:
raise ValueError("Unknown options: %s" % o.keys())
raise ConfigurationError("Unknown options: %s" % o.keys())

def __getattr__(self, name):
try:
@@ -171,7 +176,7 @@ class LarkOptions(Serialize):
raise AttributeError(e)

def __setattr__(self, name, value):
assert name in self.options
assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s")
self.options[name] = value

def serialize(self, memo):
@@ -237,20 +242,20 @@ class Lark(Serialize):
self.source_grammar = grammar
if self.options.use_bytes:
if not isascii(grammar):
raise ValueError("Grammar must be ascii only, when use_bytes=True")
raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
if sys.version_info[0] == 2 and self.options.use_bytes != 'force':
raise NotImplementedError("`use_bytes=True` may have issues on python2."
raise ConfigurationError("`use_bytes=True` may have issues on python2."
"Use `use_bytes='force'` to use it at your own risk.")

cache_fn = None
if self.options.cache:
if self.options.parser != 'lalr':
raise NotImplementedError("cache only works with parser='lalr' for now")
raise ConfigurationError("cache only works with parser='lalr' for now")
if isinstance(self.options.cache, STRING_TYPE):
cache_fn = self.options.cache
else:
if self.options.cache is not True:
raise ValueError("cache argument must be bool or str")
raise ConfigurationError("cache argument must be bool or str")
unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals')
from . import __version__
options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
@@ -277,24 +282,25 @@ class Lark(Serialize):
else:
assert False, self.options.parser
lexer = self.options.lexer
assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') or issubclass(lexer, Lexer)
if isinstance(lexer, type):
assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance
else:
assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete'))

if self.options.ambiguity == 'auto':
if self.options.parser == 'earley':
self.options.ambiguity = 'resolve'
else:
disambig_parsers = ['earley', 'cyk']
assert self.options.parser in disambig_parsers, (
'Only %s supports disambiguation right now') % ', '.join(disambig_parsers)
assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s")

if self.options.priority == 'auto':
self.options.priority = 'normal'

if self.options.priority not in _VALID_PRIORITY_OPTIONS:
raise ValueError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'
if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))
raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))

# Parse the grammar file and compose the grammars
self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens)
@@ -401,7 +407,7 @@ class Lark(Serialize):
memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {})
options = dict(data['options'])
if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults):
raise ValueError("Some options are not allowed when loading a Parser: {}"
raise ConfigurationError("Some options are not allowed when loading a Parser: {}"
.format(set(kwargs) - _LOAD_ALLOWED_OPTIONS))
options.update(kwargs)
self.options = LarkOptions.deserialize(options, memo)


+ 1
- 1
lark/lexer.py Ver arquivo

@@ -366,7 +366,7 @@ class TraditionalLexer(Lexer):
if t.type in self.callback:
t = self.callback[t.type](t)
if not isinstance(t, Token):
raise ValueError("Callbacks must return a token (returned %r)" % t)
raise LexError("Callbacks must return a token (returned %r)" % t)
lex_state.last_token = t
return t
else:


+ 28
- 1
lark/load_grammar.py Ver arquivo

@@ -5,8 +5,9 @@ import sys
from copy import copy, deepcopy
from io import open
import pkgutil
from ast import literal_eval

from .utils import bfs, eval_escaping, Py36, logger, classify_bool
from .utils import bfs, Py36, logger, classify_bool
from .lexer import Token, TerminalDef, PatternStr, PatternRE

from .parse_tree_builder import ParseTreeBuilder
@@ -405,6 +406,32 @@ def _rfind(s, choices):
return max(s.rfind(c) for c in choices)


def eval_escaping(s):
w = ''
i = iter(s)
for n in i:
w += n
if n == '\\':
try:
n2 = next(i)
except StopIteration:
raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s)
if n2 == '\\':
w += '\\\\'
elif n2 not in 'uxnftr':
w += '\\'
w += n2
w = w.replace('\\"', '"').replace("'", "\\'")

to_eval = "u'''%s'''" % w
try:
s = literal_eval(to_eval)
except SyntaxError as e:
raise GrammarError(s, e)

return s


def _literal_to_pattern(literal):
v = literal.value
flag_start = _rfind(v, '/"')+1


+ 11
- 10
lark/parser_frontends.py Ver arquivo

@@ -1,3 +1,4 @@
from .exceptions import ConfigurationError, GrammarError
from .utils import get_regexp_width, Serialize
from .parsers.grammar_analysis import GrammarAnalyzer
from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef
@@ -29,7 +30,7 @@ def _wrap_lexer(lexer_class):
def get_frontend(parser, lexer):
if parser=='lalr':
if lexer is None:
raise ValueError('The LALR parser requires use of a lexer')
raise ConfigurationError('The LALR parser requires use of a lexer')
elif lexer == 'standard':
return LALR_TraditionalLexer
elif lexer == 'contextual':
@@ -41,7 +42,7 @@ def get_frontend(parser, lexer):
self.lexer = wrapped(self.lexer_conf)
return LALR_CustomLexerWrapper
else:
raise ValueError('Unknown lexer: %s' % lexer)
raise ConfigurationError('Unknown lexer: %s' % lexer)
elif parser=='earley':
if lexer=='standard':
return Earley_Traditional
@@ -50,7 +51,7 @@ def get_frontend(parser, lexer):
elif lexer=='dynamic_complete':
return XEarley_CompleteLex
elif lexer=='contextual':
raise ValueError('The Earley parser does not support the contextual parser')
raise ConfigurationError('The Earley parser does not support the contextual parser')
elif issubclass(lexer, Lexer):
wrapped = _wrap_lexer(lexer)
class Earley_CustomLexerWrapper(Earley_WithLexer):
@@ -58,14 +59,14 @@ def get_frontend(parser, lexer):
self.lexer = wrapped(self.lexer_conf)
return Earley_CustomLexerWrapper
else:
raise ValueError('Unknown lexer: %s' % lexer)
raise ConfigurationError('Unknown lexer: %s' % lexer)
elif parser == 'cyk':
if lexer == 'standard':
return CYK
else:
raise ValueError('CYK parser requires using standard parser.')
raise ConfigurationError('CYK parser requires using standard parser.')
else:
raise ValueError('Unknown parser: %s' % parser)
raise ConfigurationError('Unknown parser: %s' % parser)


class _ParserFrontend(Serialize):
@@ -73,7 +74,7 @@ class _ParserFrontend(Serialize):
if start is None:
start = self.start
if len(start) > 1:
raise ValueError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
start ,= start
return self.parser.parse(input, start, *args)

@@ -215,15 +216,15 @@ class XEarley(_ParserFrontend):
self.regexps = {}
for t in lexer_conf.tokens:
if t.priority != 1:
raise ValueError("Dynamic Earley doesn't support weights on terminals", t, t.priority)
raise GrammarError("Dynamic Earley doesn't support weights on terminals", t, t.priority)
regexp = t.pattern.to_regexp()
try:
width = get_regexp_width(regexp)[0]
except ValueError:
raise ValueError("Bad regexp in token %s: %s" % (t.name, regexp))
raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp))
else:
if width == 0:
raise ValueError("Dynamic Earley doesn't allow zero-width regexps", t)
raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t)
if lexer_conf.use_bytes:
regexp = regexp.encode('utf-8')



+ 0
- 26
lark/utils.py Ver arquivo

@@ -1,6 +1,5 @@
import os
from functools import reduce
from ast import literal_eval
from collections import deque

###{standalone
@@ -225,31 +224,6 @@ class Enumerator(Serialize):
return r


def eval_escaping(s):
w = ''
i = iter(s)
for n in i:
w += n
if n == '\\':
try:
n2 = next(i)
except StopIteration:
raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s)
if n2 == '\\':
w += '\\\\'
elif n2 not in 'uxnftr':
w += '\\'
w += n2
w = w.replace('\\"', '"').replace("'", "\\'")

to_eval = "u'''%s'''" % w
try:
s = literal_eval(to_eval)
except SyntaxError as e:
raise ValueError(s, e)

return s


def combine_alternatives(lists):
"""


Carregando…
Cancelar
Salvar