Browse Source

Refactored all likely exceptions to inherit from LarkError, and improved error messages.

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
Erez Sh 3 years ago
parent
commit
c6819a0ed7
6 changed files with 69 additions and 54 deletions
  1. +7
    -0
      lark/exceptions.py
  2. +22
    -16
      lark/lark.py
  3. +1
    -1
      lark/lexer.py
  4. +28
    -1
      lark/load_grammar.py
  5. +11
    -10
      lark/parser_frontends.py
  6. +0
    -26
      lark/utils.py

+ 7
- 0
lark/exceptions.py View File

@@ -7,9 +7,16 @@ class LarkError(Exception):
pass pass




class ConfigurationError(LarkError, ValueError):
pass


class GrammarError(LarkError): class GrammarError(LarkError):
pass pass


class GrammarError_Value(LarkError):
pass



class ParseError(LarkError): class ParseError(LarkError):
pass pass


+ 22
- 16
lark/lark.py View File

@@ -1,5 +1,5 @@
from __future__ import absolute_import from __future__ import absolute_import
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken, ConfigurationError


import sys, os, pickle, hashlib import sys, os, pickle, hashlib
from io import open from io import open
@@ -24,6 +24,10 @@ except ImportError:


###{standalone ###{standalone


def assert_config(value, options, msg='Got %r, expected one of %s'):
if value not in options:
raise ConfigurationError(msg % (value, options))



class LarkOptions(Serialize): class LarkOptions(Serialize):
"""Specifies the options for Lark """Specifies the options for Lark
@@ -155,14 +159,15 @@ class LarkOptions(Serialize):


self.__dict__['options'] = options self.__dict__['options'] = options


assert self.parser in ('earley', 'lalr', 'cyk', None)

assert_config(self.parser, ('earley', 'lalr', 'cyk', None))


if self.parser == 'earley' and self.transformer: if self.parser == 'earley' and self.transformer:
raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.'
raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm.'
'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')


if o: if o:
raise ValueError("Unknown options: %s" % o.keys())
raise ConfigurationError("Unknown options: %s" % o.keys())


def __getattr__(self, name): def __getattr__(self, name):
try: try:
@@ -171,7 +176,7 @@ class LarkOptions(Serialize):
raise AttributeError(e) raise AttributeError(e)


def __setattr__(self, name, value): def __setattr__(self, name, value):
assert name in self.options
assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s")
self.options[name] = value self.options[name] = value


def serialize(self, memo): def serialize(self, memo):
@@ -237,20 +242,20 @@ class Lark(Serialize):
self.source_grammar = grammar self.source_grammar = grammar
if self.options.use_bytes: if self.options.use_bytes:
if not isascii(grammar): if not isascii(grammar):
raise ValueError("Grammar must be ascii only, when use_bytes=True")
raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
if sys.version_info[0] == 2 and self.options.use_bytes != 'force': if sys.version_info[0] == 2 and self.options.use_bytes != 'force':
raise NotImplementedError("`use_bytes=True` may have issues on python2."
raise ConfigurationError("`use_bytes=True` may have issues on python2."
"Use `use_bytes='force'` to use it at your own risk.") "Use `use_bytes='force'` to use it at your own risk.")


cache_fn = None cache_fn = None
if self.options.cache: if self.options.cache:
if self.options.parser != 'lalr': if self.options.parser != 'lalr':
raise NotImplementedError("cache only works with parser='lalr' for now")
raise ConfigurationError("cache only works with parser='lalr' for now")
if isinstance(self.options.cache, STRING_TYPE): if isinstance(self.options.cache, STRING_TYPE):
cache_fn = self.options.cache cache_fn = self.options.cache
else: else:
if self.options.cache is not True: if self.options.cache is not True:
raise ValueError("cache argument must be bool or str")
raise ConfigurationError("cache argument must be bool or str")
unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals') unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals')
from . import __version__ from . import __version__
options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
@@ -277,24 +282,25 @@ class Lark(Serialize):
else: else:
assert False, self.options.parser assert False, self.options.parser
lexer = self.options.lexer lexer = self.options.lexer
assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') or issubclass(lexer, Lexer)
if isinstance(lexer, type):
assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance
else:
assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete'))


if self.options.ambiguity == 'auto': if self.options.ambiguity == 'auto':
if self.options.parser == 'earley': if self.options.parser == 'earley':
self.options.ambiguity = 'resolve' self.options.ambiguity = 'resolve'
else: else:
disambig_parsers = ['earley', 'cyk']
assert self.options.parser in disambig_parsers, (
'Only %s supports disambiguation right now') % ', '.join(disambig_parsers)
assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s")


if self.options.priority == 'auto': if self.options.priority == 'auto':
self.options.priority = 'normal' self.options.priority = 'normal'


if self.options.priority not in _VALID_PRIORITY_OPTIONS: if self.options.priority not in _VALID_PRIORITY_OPTIONS:
raise ValueError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'
if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))
raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))


# Parse the grammar file and compose the grammars # Parse the grammar file and compose the grammars
self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens)
@@ -401,7 +407,7 @@ class Lark(Serialize):
memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {})
options = dict(data['options']) options = dict(data['options'])
if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults): if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults):
raise ValueError("Some options are not allowed when loading a Parser: {}"
raise ConfigurationError("Some options are not allowed when loading a Parser: {}"
.format(set(kwargs) - _LOAD_ALLOWED_OPTIONS)) .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS))
options.update(kwargs) options.update(kwargs)
self.options = LarkOptions.deserialize(options, memo) self.options = LarkOptions.deserialize(options, memo)


+ 1
- 1
lark/lexer.py View File

@@ -366,7 +366,7 @@ class TraditionalLexer(Lexer):
if t.type in self.callback: if t.type in self.callback:
t = self.callback[t.type](t) t = self.callback[t.type](t)
if not isinstance(t, Token): if not isinstance(t, Token):
raise ValueError("Callbacks must return a token (returned %r)" % t)
raise LexError("Callbacks must return a token (returned %r)" % t)
lex_state.last_token = t lex_state.last_token = t
return t return t
else: else:


+ 28
- 1
lark/load_grammar.py View File

@@ -5,8 +5,9 @@ import sys
from copy import copy, deepcopy from copy import copy, deepcopy
from io import open from io import open
import pkgutil import pkgutil
from ast import literal_eval


from .utils import bfs, eval_escaping, Py36, logger, classify_bool
from .utils import bfs, Py36, logger, classify_bool
from .lexer import Token, TerminalDef, PatternStr, PatternRE from .lexer import Token, TerminalDef, PatternStr, PatternRE


from .parse_tree_builder import ParseTreeBuilder from .parse_tree_builder import ParseTreeBuilder
@@ -405,6 +406,32 @@ def _rfind(s, choices):
return max(s.rfind(c) for c in choices) return max(s.rfind(c) for c in choices)




def eval_escaping(s):
w = ''
i = iter(s)
for n in i:
w += n
if n == '\\':
try:
n2 = next(i)
except StopIteration:
raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s)
if n2 == '\\':
w += '\\\\'
elif n2 not in 'uxnftr':
w += '\\'
w += n2
w = w.replace('\\"', '"').replace("'", "\\'")

to_eval = "u'''%s'''" % w
try:
s = literal_eval(to_eval)
except SyntaxError as e:
raise GrammarError(s, e)

return s


def _literal_to_pattern(literal): def _literal_to_pattern(literal):
v = literal.value v = literal.value
flag_start = _rfind(v, '/"')+1 flag_start = _rfind(v, '/"')+1


+ 11
- 10
lark/parser_frontends.py View File

@@ -1,3 +1,4 @@
from .exceptions import ConfigurationError, GrammarError
from .utils import get_regexp_width, Serialize from .utils import get_regexp_width, Serialize
from .parsers.grammar_analysis import GrammarAnalyzer from .parsers.grammar_analysis import GrammarAnalyzer
from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef
@@ -29,7 +30,7 @@ def _wrap_lexer(lexer_class):
def get_frontend(parser, lexer): def get_frontend(parser, lexer):
if parser=='lalr': if parser=='lalr':
if lexer is None: if lexer is None:
raise ValueError('The LALR parser requires use of a lexer')
raise ConfigurationError('The LALR parser requires use of a lexer')
elif lexer == 'standard': elif lexer == 'standard':
return LALR_TraditionalLexer return LALR_TraditionalLexer
elif lexer == 'contextual': elif lexer == 'contextual':
@@ -41,7 +42,7 @@ def get_frontend(parser, lexer):
self.lexer = wrapped(self.lexer_conf) self.lexer = wrapped(self.lexer_conf)
return LALR_CustomLexerWrapper return LALR_CustomLexerWrapper
else: else:
raise ValueError('Unknown lexer: %s' % lexer)
raise ConfigurationError('Unknown lexer: %s' % lexer)
elif parser=='earley': elif parser=='earley':
if lexer=='standard': if lexer=='standard':
return Earley_Traditional return Earley_Traditional
@@ -50,7 +51,7 @@ def get_frontend(parser, lexer):
elif lexer=='dynamic_complete': elif lexer=='dynamic_complete':
return XEarley_CompleteLex return XEarley_CompleteLex
elif lexer=='contextual': elif lexer=='contextual':
raise ValueError('The Earley parser does not support the contextual parser')
raise ConfigurationError('The Earley parser does not support the contextual parser')
elif issubclass(lexer, Lexer): elif issubclass(lexer, Lexer):
wrapped = _wrap_lexer(lexer) wrapped = _wrap_lexer(lexer)
class Earley_CustomLexerWrapper(Earley_WithLexer): class Earley_CustomLexerWrapper(Earley_WithLexer):
@@ -58,14 +59,14 @@ def get_frontend(parser, lexer):
self.lexer = wrapped(self.lexer_conf) self.lexer = wrapped(self.lexer_conf)
return Earley_CustomLexerWrapper return Earley_CustomLexerWrapper
else: else:
raise ValueError('Unknown lexer: %s' % lexer)
raise ConfigurationError('Unknown lexer: %s' % lexer)
elif parser == 'cyk': elif parser == 'cyk':
if lexer == 'standard': if lexer == 'standard':
return CYK return CYK
else: else:
raise ValueError('CYK parser requires using standard parser.')
raise ConfigurationError('CYK parser requires using standard parser.')
else: else:
raise ValueError('Unknown parser: %s' % parser)
raise ConfigurationError('Unknown parser: %s' % parser)




class _ParserFrontend(Serialize): class _ParserFrontend(Serialize):
@@ -73,7 +74,7 @@ class _ParserFrontend(Serialize):
if start is None: if start is None:
start = self.start start = self.start
if len(start) > 1: if len(start) > 1:
raise ValueError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
start ,= start start ,= start
return self.parser.parse(input, start, *args) return self.parser.parse(input, start, *args)


@@ -215,15 +216,15 @@ class XEarley(_ParserFrontend):
self.regexps = {} self.regexps = {}
for t in lexer_conf.tokens: for t in lexer_conf.tokens:
if t.priority != 1: if t.priority != 1:
raise ValueError("Dynamic Earley doesn't support weights on terminals", t, t.priority)
raise GrammarError("Dynamic Earley doesn't support weights on terminals", t, t.priority)
regexp = t.pattern.to_regexp() regexp = t.pattern.to_regexp()
try: try:
width = get_regexp_width(regexp)[0] width = get_regexp_width(regexp)[0]
except ValueError: except ValueError:
raise ValueError("Bad regexp in token %s: %s" % (t.name, regexp))
raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp))
else: else:
if width == 0: if width == 0:
raise ValueError("Dynamic Earley doesn't allow zero-width regexps", t)
raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t)
if lexer_conf.use_bytes: if lexer_conf.use_bytes:
regexp = regexp.encode('utf-8') regexp = regexp.encode('utf-8')




+ 0
- 26
lark/utils.py View File

@@ -1,6 +1,5 @@
import os import os
from functools import reduce from functools import reduce
from ast import literal_eval
from collections import deque from collections import deque


###{standalone ###{standalone
@@ -225,31 +224,6 @@ class Enumerator(Serialize):
return r return r




def eval_escaping(s):
w = ''
i = iter(s)
for n in i:
w += n
if n == '\\':
try:
n2 = next(i)
except StopIteration:
raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s)
if n2 == '\\':
w += '\\\\'
elif n2 not in 'uxnftr':
w += '\\'
w += n2
w = w.replace('\\"', '"').replace("'", "\\'")

to_eval = "u'''%s'''" % w
try:
s = literal_eval(to_eval)
except SyntaxError as e:
raise ValueError(s, e)

return s



def combine_alternatives(lists): def combine_alternatives(lists):
""" """


Loading…
Cancel
Save