| @@ -9,6 +9,10 @@ class LarkError(Exception): | |||
| pass | |||
| class ConfigurationError(LarkError, ValueError): | |||
| pass | |||
| class GrammarError(LarkError): | |||
| pass | |||
| @@ -7,6 +7,10 @@ class LarkError(Exception): | |||
| pass | |||
| class ConfigurationError(LarkError, ValueError): | |||
| pass | |||
| class GrammarError(LarkError): | |||
| pass | |||
| @@ -1,5 +1,5 @@ | |||
| from __future__ import absolute_import | |||
| from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken | |||
| from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken, ConfigurationError | |||
| import sys, os, pickle, hashlib | |||
| from io import open | |||
| @@ -24,6 +24,10 @@ except ImportError: | |||
| ###{standalone | |||
| def assert_config(value, options, msg='Got %r, expected one of %s'): | |||
| if value not in options: | |||
| raise ConfigurationError(msg % (value, options)) | |||
| class LarkOptions(Serialize): | |||
| """Specifies the options for Lark | |||
| @@ -155,14 +159,15 @@ class LarkOptions(Serialize): | |||
| self.__dict__['options'] = options | |||
| assert self.parser in ('earley', 'lalr', 'cyk', None) | |||
| assert_config(self.parser, ('earley', 'lalr', 'cyk', None)) | |||
| if self.parser == 'earley' and self.transformer: | |||
| raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.' | |||
| raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm.' | |||
| 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') | |||
| if o: | |||
| raise ValueError("Unknown options: %s" % o.keys()) | |||
| raise ConfigurationError("Unknown options: %s" % o.keys()) | |||
| def __getattr__(self, name): | |||
| try: | |||
| @@ -171,7 +176,7 @@ class LarkOptions(Serialize): | |||
| raise AttributeError(e) | |||
| def __setattr__(self, name, value): | |||
| assert name in self.options | |||
| assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s") | |||
| self.options[name] = value | |||
| def serialize(self, memo): | |||
| @@ -237,20 +242,20 @@ class Lark(Serialize): | |||
| self.source_grammar = grammar | |||
| if self.options.use_bytes: | |||
| if not isascii(grammar): | |||
| raise ValueError("Grammar must be ascii only, when use_bytes=True") | |||
| raise ConfigurationError("Grammar must be ascii only, when use_bytes=True") | |||
| if sys.version_info[0] == 2 and self.options.use_bytes != 'force': | |||
| raise NotImplementedError("`use_bytes=True` may have issues on python2." | |||
| raise ConfigurationError("`use_bytes=True` may have issues on python2." | |||
| "Use `use_bytes='force'` to use it at your own risk.") | |||
| cache_fn = None | |||
| if self.options.cache: | |||
| if self.options.parser != 'lalr': | |||
| raise NotImplementedError("cache only works with parser='lalr' for now") | |||
| raise ConfigurationError("cache only works with parser='lalr' for now") | |||
| if isinstance(self.options.cache, STRING_TYPE): | |||
| cache_fn = self.options.cache | |||
| else: | |||
| if self.options.cache is not True: | |||
| raise ValueError("cache argument must be bool or str") | |||
| raise ConfigurationError("cache argument must be bool or str") | |||
| unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals') | |||
| from . import __version__ | |||
| options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) | |||
| @@ -277,24 +282,25 @@ class Lark(Serialize): | |||
| else: | |||
| assert False, self.options.parser | |||
| lexer = self.options.lexer | |||
| assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') or issubclass(lexer, Lexer) | |||
| if isinstance(lexer, type): | |||
| assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance | |||
| else: | |||
| assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete')) | |||
| if self.options.ambiguity == 'auto': | |||
| if self.options.parser == 'earley': | |||
| self.options.ambiguity = 'resolve' | |||
| else: | |||
| disambig_parsers = ['earley', 'cyk'] | |||
| assert self.options.parser in disambig_parsers, ( | |||
| 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) | |||
| assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s") | |||
| if self.options.priority == 'auto': | |||
| self.options.priority = 'normal' | |||
| if self.options.priority not in _VALID_PRIORITY_OPTIONS: | |||
| raise ValueError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) | |||
| raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) | |||
| assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' | |||
| if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: | |||
| raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) | |||
| raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) | |||
| # Parse the grammar file and compose the grammars | |||
| self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) | |||
| @@ -401,7 +407,7 @@ class Lark(Serialize): | |||
| memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) | |||
| options = dict(data['options']) | |||
| if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults): | |||
| raise ValueError("Some options are not allowed when loading a Parser: {}" | |||
| raise ConfigurationError("Some options are not allowed when loading a Parser: {}" | |||
| .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS)) | |||
| options.update(kwargs) | |||
| self.options = LarkOptions.deserialize(options, memo) | |||
| @@ -366,7 +366,7 @@ class TraditionalLexer(Lexer): | |||
| if t.type in self.callback: | |||
| t = self.callback[t.type](t) | |||
| if not isinstance(t, Token): | |||
| raise ValueError("Callbacks must return a token (returned %r)" % t) | |||
| raise LexError("Callbacks must return a token (returned %r)" % t) | |||
| lex_state.last_token = t | |||
| return t | |||
| else: | |||
| @@ -5,8 +5,9 @@ import sys | |||
| from copy import copy, deepcopy | |||
| from io import open | |||
| import pkgutil | |||
| from ast import literal_eval | |||
| from .utils import bfs, eval_escaping, Py36, logger, classify_bool | |||
| from .utils import bfs, Py36, logger, classify_bool | |||
| from .lexer import Token, TerminalDef, PatternStr, PatternRE | |||
| from .parse_tree_builder import ParseTreeBuilder | |||
| @@ -405,6 +406,32 @@ def _rfind(s, choices): | |||
| return max(s.rfind(c) for c in choices) | |||
| def eval_escaping(s): | |||
| w = '' | |||
| i = iter(s) | |||
| for n in i: | |||
| w += n | |||
| if n == '\\': | |||
| try: | |||
| n2 = next(i) | |||
| except StopIteration: | |||
| raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s) | |||
| if n2 == '\\': | |||
| w += '\\\\' | |||
| elif n2 not in 'uxnftr': | |||
| w += '\\' | |||
| w += n2 | |||
| w = w.replace('\\"', '"').replace("'", "\\'") | |||
| to_eval = "u'''%s'''" % w | |||
| try: | |||
| s = literal_eval(to_eval) | |||
| except SyntaxError as e: | |||
| raise GrammarError(s, e) | |||
| return s | |||
| def _literal_to_pattern(literal): | |||
| v = literal.value | |||
| flag_start = _rfind(v, '/"')+1 | |||
| @@ -1,3 +1,4 @@ | |||
| from .exceptions import ConfigurationError, GrammarError | |||
| from .utils import get_regexp_width, Serialize | |||
| from .parsers.grammar_analysis import GrammarAnalyzer | |||
| from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef | |||
| @@ -29,7 +30,7 @@ def _wrap_lexer(lexer_class): | |||
| def get_frontend(parser, lexer): | |||
| if parser=='lalr': | |||
| if lexer is None: | |||
| raise ValueError('The LALR parser requires use of a lexer') | |||
| raise ConfigurationError('The LALR parser requires use of a lexer') | |||
| elif lexer == 'standard': | |||
| return LALR_TraditionalLexer | |||
| elif lexer == 'contextual': | |||
| @@ -41,7 +42,7 @@ def get_frontend(parser, lexer): | |||
| self.lexer = wrapped(self.lexer_conf) | |||
| return LALR_CustomLexerWrapper | |||
| else: | |||
| raise ValueError('Unknown lexer: %s' % lexer) | |||
| raise ConfigurationError('Unknown lexer: %s' % lexer) | |||
| elif parser=='earley': | |||
| if lexer=='standard': | |||
| return Earley_Traditional | |||
| @@ -50,7 +51,7 @@ def get_frontend(parser, lexer): | |||
| elif lexer=='dynamic_complete': | |||
| return XEarley_CompleteLex | |||
| elif lexer=='contextual': | |||
| raise ValueError('The Earley parser does not support the contextual parser') | |||
| raise ConfigurationError('The Earley parser does not support the contextual parser') | |||
| elif issubclass(lexer, Lexer): | |||
| wrapped = _wrap_lexer(lexer) | |||
| class Earley_CustomLexerWrapper(Earley_WithLexer): | |||
| @@ -58,14 +59,14 @@ def get_frontend(parser, lexer): | |||
| self.lexer = wrapped(self.lexer_conf) | |||
| return Earley_CustomLexerWrapper | |||
| else: | |||
| raise ValueError('Unknown lexer: %s' % lexer) | |||
| raise ConfigurationError('Unknown lexer: %s' % lexer) | |||
| elif parser == 'cyk': | |||
| if lexer == 'standard': | |||
| return CYK | |||
| else: | |||
| raise ValueError('CYK parser requires using standard parser.') | |||
| raise ConfigurationError('CYK parser requires using standard parser.') | |||
| else: | |||
| raise ValueError('Unknown parser: %s' % parser) | |||
| raise ConfigurationError('Unknown parser: %s' % parser) | |||
| class _ParserFrontend(Serialize): | |||
| @@ -73,7 +74,7 @@ class _ParserFrontend(Serialize): | |||
| if start is None: | |||
| start = self.start | |||
| if len(start) > 1: | |||
| raise ValueError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) | |||
| raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) | |||
| start ,= start | |||
| return self.parser.parse(input, start, *args) | |||
| @@ -215,15 +216,15 @@ class XEarley(_ParserFrontend): | |||
| self.regexps = {} | |||
| for t in lexer_conf.tokens: | |||
| if t.priority != 1: | |||
| raise ValueError("Dynamic Earley doesn't support weights on terminals", t, t.priority) | |||
| raise GrammarError("Dynamic Earley doesn't support weights on terminals", t, t.priority) | |||
| regexp = t.pattern.to_regexp() | |||
| try: | |||
| width = get_regexp_width(regexp)[0] | |||
| except ValueError: | |||
| raise ValueError("Bad regexp in token %s: %s" % (t.name, regexp)) | |||
| raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp)) | |||
| else: | |||
| if width == 0: | |||
| raise ValueError("Dynamic Earley doesn't allow zero-width regexps", t) | |||
| raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t) | |||
| if lexer_conf.use_bytes: | |||
| regexp = regexp.encode('utf-8') | |||
| @@ -1,6 +1,5 @@ | |||
| import os | |||
| from functools import reduce | |||
| from ast import literal_eval | |||
| from collections import deque | |||
| ###{standalone | |||
| @@ -225,31 +224,6 @@ class Enumerator(Serialize): | |||
| return r | |||
| def eval_escaping(s): | |||
| w = '' | |||
| i = iter(s) | |||
| for n in i: | |||
| w += n | |||
| if n == '\\': | |||
| try: | |||
| n2 = next(i) | |||
| except StopIteration: | |||
| raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s) | |||
| if n2 == '\\': | |||
| w += '\\\\' | |||
| elif n2 not in 'uxnftr': | |||
| w += '\\' | |||
| w += n2 | |||
| w = w.replace('\\"', '"').replace("'", "\\'") | |||
| to_eval = "u'''%s'''" % w | |||
| try: | |||
| s = literal_eval(to_eval) | |||
| except SyntaxError as e: | |||
| raise ValueError(s, e) | |||
| return s | |||
| def combine_alternatives(lists): | |||
| """ | |||