Refactored all likely exceptions to inherit from LarkError, and improved error messages.

4 years ago · c6819a0ed7
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -7,9 +7,16 @@ class LarkError(Exception):
    pass


 class ConfigurationError(LarkError, ValueError):
    pass


 class GrammarError(LarkError):
    pass

 class GrammarError_Value(LarkError):
    pass


 class ParseError(LarkError):
    pass
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -1,5 +1,5 @@
 from __future__ import absolute_import
 from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
 from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken, ConfigurationError

 import sys, os, pickle, hashlib
 from io import open
@@ -24,6 +24,10 @@ except ImportError:

 ###{standalone

 def assert_config(value, options, msg='Got %r, expected one of %s'):
    if value not in options:
        raise ConfigurationError(msg % (value, options))


 class LarkOptions(Serialize):
    """Specifies the options for Lark
@@ -155,14 +159,15 @@ class LarkOptions(Serialize):

        self.__dict__['options'] = options

        assert self.parser in ('earley', 'lalr', 'cyk', None)

        assert_config(self.parser, ('earley', 'lalr', 'cyk', None))

        if self.parser == 'earley' and self.transformer:
            raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.'
            raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm.'
                             'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')

        if o:
            raise ValueError("Unknown options: %s" % o.keys())
            raise ConfigurationError("Unknown options: %s" % o.keys())

    def __getattr__(self, name):
        try:
@@ -171,7 +176,7 @@ class LarkOptions(Serialize):
            raise AttributeError(e)

    def __setattr__(self, name, value):
        assert name in self.options
        assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s")
        self.options[name] = value

    def serialize(self, memo):
@@ -237,20 +242,20 @@ class Lark(Serialize):
        self.source_grammar = grammar
        if self.options.use_bytes:
            if not isascii(grammar):
                raise ValueError("Grammar must be ascii only, when use_bytes=True")
                raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
            if sys.version_info[0] == 2 and self.options.use_bytes != 'force':
                raise NotImplementedError("`use_bytes=True` may have issues on python2."
                raise ConfigurationError("`use_bytes=True` may have issues on python2."
                                          "Use `use_bytes='force'` to use it at your own risk.")

        cache_fn = None
        if self.options.cache:
            if self.options.parser != 'lalr':
                raise NotImplementedError("cache only works with parser='lalr' for now")
                raise ConfigurationError("cache only works with parser='lalr' for now")
            if isinstance(self.options.cache, STRING_TYPE):
                cache_fn = self.options.cache
            else:
                if self.options.cache is not True:
                    raise ValueError("cache argument must be bool or str")
                    raise ConfigurationError("cache argument must be bool or str")
                unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals')
                from . import __version__
                options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
@@ -277,24 +282,25 @@ class Lark(Serialize):
            else:
                assert False, self.options.parser
        lexer = self.options.lexer
        assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') or issubclass(lexer, Lexer)
        if isinstance(lexer, type):
            assert issubclass(lexer, Lexer)     # XXX Is this really important? Maybe just ensure interface compliance
        else:
            assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete'))

        if self.options.ambiguity == 'auto':
            if self.options.parser == 'earley':
                self.options.ambiguity = 'resolve'
        else:
            disambig_parsers = ['earley', 'cyk']
            assert self.options.parser in disambig_parsers, (
                'Only %s supports disambiguation right now') % ', '.join(disambig_parsers)
            assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s")

        if self.options.priority == 'auto':
            self.options.priority = 'normal'

        if self.options.priority not in _VALID_PRIORITY_OPTIONS:
            raise ValueError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
            raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
        assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'
        if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
            raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))
            raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))

        # Parse the grammar file and compose the grammars
        self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens)
@@ -401,7 +407,7 @@ class Lark(Serialize):
        memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {})
        options = dict(data['options'])
        if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults):
            raise ValueError("Some options are not allowed when loading a Parser: {}"
            raise ConfigurationError("Some options are not allowed when loading a Parser: {}"
                             .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS))
        options.update(kwargs)
        self.options = LarkOptions.deserialize(options, memo)
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -366,7 +366,7 @@ class TraditionalLexer(Lexer):
                if t.type in self.callback:
                    t = self.callback[t.type](t)
                    if not isinstance(t, Token):
                        raise ValueError("Callbacks must return a token (returned %r)" % t)
                        raise LexError("Callbacks must return a token (returned %r)" % t)
                lex_state.last_token = t
                return t
            else:
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -5,8 +5,9 @@ import sys
 from copy import copy, deepcopy
 from io import open
 import pkgutil
 from ast import literal_eval

 from .utils import bfs, eval_escaping, Py36, logger, classify_bool
 from .utils import bfs, Py36, logger, classify_bool
 from .lexer import Token, TerminalDef, PatternStr, PatternRE

 from .parse_tree_builder import ParseTreeBuilder
@@ -405,6 +406,32 @@ def _rfind(s, choices):
    return max(s.rfind(c) for c in choices)


 def eval_escaping(s):
    w = ''
    i = iter(s)
    for n in i:
        w += n
        if n == '\\':
            try:
                n2 = next(i)
            except StopIteration:
                raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s)
            if n2 == '\\':
                w += '\\\\'
            elif n2 not in 'uxnftr':
                w += '\\'
            w += n2
    w = w.replace('\\"', '"').replace("'", "\\'")

    to_eval = "u'''%s'''" % w
    try:
        s = literal_eval(to_eval)
    except SyntaxError as e:
        raise GrammarError(s, e)

    return s


 def _literal_to_pattern(literal):
    v = literal.value
    flag_start = _rfind(v, '/"')+1
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -1,3 +1,4 @@
 from .exceptions import ConfigurationError, GrammarError
 from .utils import get_regexp_width, Serialize
 from .parsers.grammar_analysis import GrammarAnalyzer
 from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef
@@ -29,7 +30,7 @@ def _wrap_lexer(lexer_class):
 def get_frontend(parser, lexer):
    if parser=='lalr':
        if lexer is None:
            raise ValueError('The LALR parser requires use of a lexer')
            raise ConfigurationError('The LALR parser requires use of a lexer')
        elif lexer == 'standard':
            return LALR_TraditionalLexer
        elif lexer == 'contextual':
@@ -41,7 +42,7 @@ def get_frontend(parser, lexer):
                    self.lexer = wrapped(self.lexer_conf)
            return LALR_CustomLexerWrapper
        else:
            raise ValueError('Unknown lexer: %s' % lexer)
            raise ConfigurationError('Unknown lexer: %s' % lexer)
    elif parser=='earley':
        if lexer=='standard':
            return Earley_Traditional
@@ -50,7 +51,7 @@ def get_frontend(parser, lexer):
        elif lexer=='dynamic_complete':
            return XEarley_CompleteLex
        elif lexer=='contextual':
            raise ValueError('The Earley parser does not support the contextual parser')
            raise ConfigurationError('The Earley parser does not support the contextual parser')
        elif issubclass(lexer, Lexer):
            wrapped = _wrap_lexer(lexer)
            class Earley_CustomLexerWrapper(Earley_WithLexer):
@@ -58,14 +59,14 @@ def get_frontend(parser, lexer):
                    self.lexer = wrapped(self.lexer_conf)
            return Earley_CustomLexerWrapper
        else:
            raise ValueError('Unknown lexer: %s' % lexer)
            raise ConfigurationError('Unknown lexer: %s' % lexer)
    elif parser == 'cyk':
        if lexer == 'standard':
            return CYK
        else:
            raise ValueError('CYK parser requires using standard parser.')
            raise ConfigurationError('CYK parser requires using standard parser.')
    else:
        raise ValueError('Unknown parser: %s' % parser)
        raise ConfigurationError('Unknown parser: %s' % parser)


 class _ParserFrontend(Serialize):
@@ -73,7 +74,7 @@ class _ParserFrontend(Serialize):
        if start is None:
            start = self.start
            if len(start) > 1:
                raise ValueError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
                raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
            start ,= start
        return self.parser.parse(input, start, *args)

@@ -215,15 +216,15 @@ class XEarley(_ParserFrontend):
        self.regexps = {}
        for t in lexer_conf.tokens:
            if t.priority != 1:
                raise ValueError("Dynamic Earley doesn't support weights on terminals", t, t.priority)
                raise GrammarError("Dynamic Earley doesn't support weights on terminals", t, t.priority)
            regexp = t.pattern.to_regexp()
            try:
                width = get_regexp_width(regexp)[0]
            except ValueError:
                raise ValueError("Bad regexp in token %s: %s" % (t.name, regexp))
                raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp))
            else:
                if width == 0:
                    raise ValueError("Dynamic Earley doesn't allow zero-width regexps", t)
                    raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t)
            if lexer_conf.use_bytes:
                regexp = regexp.encode('utf-8')

--- a/lark/utils.py
+++ b/lark/utils.py
@@ -1,6 +1,5 @@
 import os
 from functools import reduce
 from ast import literal_eval
 from collections import deque

 ###{standalone
@@ -225,31 +224,6 @@ class Enumerator(Serialize):
        return r


 def eval_escaping(s):
    w = ''
    i = iter(s)
    for n in i:
        w += n
        if n == '\\':
            try:
                n2 = next(i)
            except StopIteration:
                raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s)
            if n2 == '\\':
                w += '\\\\'
            elif n2 not in 'uxnftr':
                w += '\\'
            w += n2
    w = w.replace('\\"', '"').replace("'", "\\'")

    to_eval = "u'''%s'''" % w
    try:
        s = literal_eval(to_eval)
    except SyntaxError as e:
        raise ValueError(s, e)

    return s


 def combine_alternatives(lists):
    """