diff --git a/lark-stubs/exceptions.pyi b/lark-stubs/exceptions.pyi index 587d60a..08f7b3e 100644 --- a/lark-stubs/exceptions.pyi +++ b/lark-stubs/exceptions.pyi @@ -9,6 +9,10 @@ class LarkError(Exception): pass +class ConfigurationError(LarkError, ValueError): + pass + + class GrammarError(LarkError): pass diff --git a/lark/exceptions.py b/lark/exceptions.py index 44f8cbb..72f6c6f 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -7,6 +7,10 @@ class LarkError(Exception): pass +class ConfigurationError(LarkError, ValueError): + pass + + class GrammarError(LarkError): pass diff --git a/lark/lark.py b/lark/lark.py index dd85f38..b94f26b 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -1,5 +1,5 @@ from __future__ import absolute_import -from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken +from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken, ConfigurationError import sys, os, pickle, hashlib from io import open @@ -24,6 +24,10 @@ except ImportError: ###{standalone +def assert_config(value, options, msg='Got %r, expected one of %s'): + if value not in options: + raise ConfigurationError(msg % (value, options)) + class LarkOptions(Serialize): """Specifies the options for Lark @@ -155,14 +159,15 @@ class LarkOptions(Serialize): self.__dict__['options'] = options - assert self.parser in ('earley', 'lalr', 'cyk', None) + + assert_config(self.parser, ('earley', 'lalr', 'cyk', None)) if self.parser == 'earley' and self.transformer: - raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.' + raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm.' 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') if o: - raise ValueError("Unknown options: %s" % o.keys()) + raise ConfigurationError("Unknown options: %s" % o.keys()) def __getattr__(self, name): try: @@ -171,7 +176,7 @@ class LarkOptions(Serialize): raise AttributeError(e) def __setattr__(self, name, value): - assert name in self.options + assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s") self.options[name] = value def serialize(self, memo): @@ -237,20 +242,20 @@ class Lark(Serialize): self.source_grammar = grammar if self.options.use_bytes: if not isascii(grammar): - raise ValueError("Grammar must be ascii only, when use_bytes=True") + raise ConfigurationError("Grammar must be ascii only, when use_bytes=True") if sys.version_info[0] == 2 and self.options.use_bytes != 'force': - raise NotImplementedError("`use_bytes=True` may have issues on python2." + raise ConfigurationError("`use_bytes=True` may have issues on python2." "Use `use_bytes='force'` to use it at your own risk.") cache_fn = None if self.options.cache: if self.options.parser != 'lalr': - raise NotImplementedError("cache only works with parser='lalr' for now") + raise ConfigurationError("cache only works with parser='lalr' for now") if isinstance(self.options.cache, STRING_TYPE): cache_fn = self.options.cache else: if self.options.cache is not True: - raise ValueError("cache argument must be bool or str") + raise ConfigurationError("cache argument must be bool or str") unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals') from . import __version__ options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) @@ -277,24 +282,25 @@ class Lark(Serialize): else: assert False, self.options.parser lexer = self.options.lexer - assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') or issubclass(lexer, Lexer) + if isinstance(lexer, type): + assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance + else: + assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete')) if self.options.ambiguity == 'auto': if self.options.parser == 'earley': self.options.ambiguity = 'resolve' else: - disambig_parsers = ['earley', 'cyk'] - assert self.options.parser in disambig_parsers, ( - 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) + assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s") if self.options.priority == 'auto': self.options.priority = 'normal' if self.options.priority not in _VALID_PRIORITY_OPTIONS: - raise ValueError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) + raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: - raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) + raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) # Parse the grammar file and compose the grammars self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) @@ -401,7 +407,7 @@ class Lark(Serialize): memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) options = dict(data['options']) if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults): - raise ValueError("Some options are not allowed when loading a Parser: {}" + raise ConfigurationError("Some options are not allowed when loading a Parser: {}" .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS)) options.update(kwargs) self.options = LarkOptions.deserialize(options, memo) diff --git a/lark/lexer.py b/lark/lexer.py index bda8497..63735e9 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -366,7 +366,7 @@ class TraditionalLexer(Lexer): if t.type in self.callback: t = self.callback[t.type](t) if not isinstance(t, Token): - raise ValueError("Callbacks must return a token (returned %r)" % t) + raise LexError("Callbacks must return a token (returned %r)" % t) lex_state.last_token = t return t else: diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 4b962fe..70fd7eb 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -5,8 +5,9 @@ import sys from copy import copy, deepcopy from io import open import pkgutil +from ast import literal_eval -from .utils import bfs, eval_escaping, Py36, logger, classify_bool +from .utils import bfs, Py36, logger, classify_bool from .lexer import Token, TerminalDef, PatternStr, PatternRE from .parse_tree_builder import ParseTreeBuilder @@ -405,6 +406,32 @@ def _rfind(s, choices): return max(s.rfind(c) for c in choices) +def eval_escaping(s): + w = '' + i = iter(s) + for n in i: + w += n + if n == '\\': + try: + n2 = next(i) + except StopIteration: + raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s) + if n2 == '\\': + w += '\\\\' + elif n2 not in 'uxnftr': + w += '\\' + w += n2 + w = w.replace('\\"', '"').replace("'", "\\'") + + to_eval = "u'''%s'''" % w + try: + s = literal_eval(to_eval) + except SyntaxError as e: + raise GrammarError(s, e) + + return s + + def _literal_to_pattern(literal): v = literal.value flag_start = _rfind(v, '/"')+1 diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 1ef1336..5d32589 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -1,3 +1,4 @@ +from .exceptions import ConfigurationError, GrammarError from .utils import get_regexp_width, Serialize from .parsers.grammar_analysis import GrammarAnalyzer from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef @@ -29,7 +30,7 @@ def _wrap_lexer(lexer_class): def get_frontend(parser, lexer): if parser=='lalr': if lexer is None: - raise ValueError('The LALR parser requires use of a lexer') + raise ConfigurationError('The LALR parser requires use of a lexer') elif lexer == 'standard': return LALR_TraditionalLexer elif lexer == 'contextual': @@ -41,7 +42,7 @@ def get_frontend(parser, lexer): self.lexer = wrapped(self.lexer_conf) return LALR_CustomLexerWrapper else: - raise ValueError('Unknown lexer: %s' % lexer) + raise ConfigurationError('Unknown lexer: %s' % lexer) elif parser=='earley': if lexer=='standard': return Earley_Traditional @@ -50,7 +51,7 @@ def get_frontend(parser, lexer): elif lexer=='dynamic_complete': return XEarley_CompleteLex elif lexer=='contextual': - raise ValueError('The Earley parser does not support the contextual parser') + raise ConfigurationError('The Earley parser does not support the contextual parser') elif issubclass(lexer, Lexer): wrapped = _wrap_lexer(lexer) class Earley_CustomLexerWrapper(Earley_WithLexer): @@ -58,14 +59,14 @@ def get_frontend(parser, lexer): self.lexer = wrapped(self.lexer_conf) return Earley_CustomLexerWrapper else: - raise ValueError('Unknown lexer: %s' % lexer) + raise ConfigurationError('Unknown lexer: %s' % lexer) elif parser == 'cyk': if lexer == 'standard': return CYK else: - raise ValueError('CYK parser requires using standard parser.') + raise ConfigurationError('CYK parser requires using standard parser.') else: - raise ValueError('Unknown parser: %s' % parser) + raise ConfigurationError('Unknown parser: %s' % parser) class _ParserFrontend(Serialize): @@ -73,7 +74,7 @@ class _ParserFrontend(Serialize): if start is None: start = self.start if len(start) > 1: - raise ValueError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) + raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) start ,= start return self.parser.parse(input, start, *args) @@ -215,15 +216,15 @@ class XEarley(_ParserFrontend): self.regexps = {} for t in lexer_conf.tokens: if t.priority != 1: - raise ValueError("Dynamic Earley doesn't support weights on terminals", t, t.priority) + raise GrammarError("Dynamic Earley doesn't support weights on terminals", t, t.priority) regexp = t.pattern.to_regexp() try: width = get_regexp_width(regexp)[0] except ValueError: - raise ValueError("Bad regexp in token %s: %s" % (t.name, regexp)) + raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp)) else: if width == 0: - raise ValueError("Dynamic Earley doesn't allow zero-width regexps", t) + raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t) if lexer_conf.use_bytes: regexp = regexp.encode('utf-8') diff --git a/lark/utils.py b/lark/utils.py index b1c3535..366922b 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -1,6 +1,5 @@ import os from functools import reduce -from ast import literal_eval from collections import deque ###{standalone @@ -225,31 +224,6 @@ class Enumerator(Serialize): return r -def eval_escaping(s): - w = '' - i = iter(s) - for n in i: - w += n - if n == '\\': - try: - n2 = next(i) - except StopIteration: - raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s) - if n2 == '\\': - w += '\\\\' - elif n2 not in 'uxnftr': - w += '\\' - w += n2 - w = w.replace('\\"', '"').replace("'", "\\'") - - to_eval = "u'''%s'''" % w - try: - s = literal_eval(to_eval) - except SyntaxError as e: - raise ValueError(s, e) - - return s - def combine_alternatives(lists): """