@@ -7,9 +7,16 @@ class LarkError(Exception): | |||
pass | |||
class ConfigurationError(LarkError, ValueError): | |||
pass | |||
class GrammarError(LarkError): | |||
pass | |||
class GrammarError_Value(LarkError): | |||
pass | |||
class ParseError(LarkError): | |||
pass | |||
@@ -1,5 +1,5 @@ | |||
from __future__ import absolute_import | |||
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken | |||
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken, ConfigurationError | |||
import sys, os, pickle, hashlib | |||
from io import open | |||
@@ -24,6 +24,10 @@ except ImportError: | |||
###{standalone | |||
def assert_config(value, options, msg='Got %r, expected one of %s'): | |||
if value not in options: | |||
raise ConfigurationError(msg % (value, options)) | |||
class LarkOptions(Serialize): | |||
"""Specifies the options for Lark | |||
@@ -155,14 +159,15 @@ class LarkOptions(Serialize): | |||
self.__dict__['options'] = options | |||
assert self.parser in ('earley', 'lalr', 'cyk', None) | |||
assert_config(self.parser, ('earley', 'lalr', 'cyk', None)) | |||
if self.parser == 'earley' and self.transformer: | |||
raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.' | |||
raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm.' | |||
'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') | |||
if o: | |||
raise ValueError("Unknown options: %s" % o.keys()) | |||
raise ConfigurationError("Unknown options: %s" % o.keys()) | |||
def __getattr__(self, name): | |||
try: | |||
@@ -171,7 +176,7 @@ class LarkOptions(Serialize): | |||
raise AttributeError(e) | |||
def __setattr__(self, name, value): | |||
assert name in self.options | |||
assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s") | |||
self.options[name] = value | |||
def serialize(self, memo): | |||
@@ -237,20 +242,20 @@ class Lark(Serialize): | |||
self.source_grammar = grammar | |||
if self.options.use_bytes: | |||
if not isascii(grammar): | |||
raise ValueError("Grammar must be ascii only, when use_bytes=True") | |||
raise ConfigurationError("Grammar must be ascii only, when use_bytes=True") | |||
if sys.version_info[0] == 2 and self.options.use_bytes != 'force': | |||
raise NotImplementedError("`use_bytes=True` may have issues on python2." | |||
raise ConfigurationError("`use_bytes=True` may have issues on python2." | |||
"Use `use_bytes='force'` to use it at your own risk.") | |||
cache_fn = None | |||
if self.options.cache: | |||
if self.options.parser != 'lalr': | |||
raise NotImplementedError("cache only works with parser='lalr' for now") | |||
raise ConfigurationError("cache only works with parser='lalr' for now") | |||
if isinstance(self.options.cache, STRING_TYPE): | |||
cache_fn = self.options.cache | |||
else: | |||
if self.options.cache is not True: | |||
raise ValueError("cache argument must be bool or str") | |||
raise ConfigurationError("cache argument must be bool or str") | |||
unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals') | |||
from . import __version__ | |||
options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) | |||
@@ -277,24 +282,25 @@ class Lark(Serialize): | |||
else: | |||
assert False, self.options.parser | |||
lexer = self.options.lexer | |||
assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') or issubclass(lexer, Lexer) | |||
if isinstance(lexer, type): | |||
assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance | |||
else: | |||
assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete')) | |||
if self.options.ambiguity == 'auto': | |||
if self.options.parser == 'earley': | |||
self.options.ambiguity = 'resolve' | |||
else: | |||
disambig_parsers = ['earley', 'cyk'] | |||
assert self.options.parser in disambig_parsers, ( | |||
'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) | |||
assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s") | |||
if self.options.priority == 'auto': | |||
self.options.priority = 'normal' | |||
if self.options.priority not in _VALID_PRIORITY_OPTIONS: | |||
raise ValueError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) | |||
raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) | |||
assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' | |||
if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: | |||
raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) | |||
raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) | |||
# Parse the grammar file and compose the grammars | |||
self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) | |||
@@ -401,7 +407,7 @@ class Lark(Serialize): | |||
memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) | |||
options = dict(data['options']) | |||
if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults): | |||
raise ValueError("Some options are not allowed when loading a Parser: {}" | |||
raise ConfigurationError("Some options are not allowed when loading a Parser: {}" | |||
.format(set(kwargs) - _LOAD_ALLOWED_OPTIONS)) | |||
options.update(kwargs) | |||
self.options = LarkOptions.deserialize(options, memo) | |||
@@ -366,7 +366,7 @@ class TraditionalLexer(Lexer): | |||
if t.type in self.callback: | |||
t = self.callback[t.type](t) | |||
if not isinstance(t, Token): | |||
raise ValueError("Callbacks must return a token (returned %r)" % t) | |||
raise LexError("Callbacks must return a token (returned %r)" % t) | |||
lex_state.last_token = t | |||
return t | |||
else: | |||
@@ -5,8 +5,9 @@ import sys | |||
from copy import copy, deepcopy | |||
from io import open | |||
import pkgutil | |||
from ast import literal_eval | |||
from .utils import bfs, eval_escaping, Py36, logger, classify_bool | |||
from .utils import bfs, Py36, logger, classify_bool | |||
from .lexer import Token, TerminalDef, PatternStr, PatternRE | |||
from .parse_tree_builder import ParseTreeBuilder | |||
@@ -405,6 +406,32 @@ def _rfind(s, choices): | |||
return max(s.rfind(c) for c in choices) | |||
def eval_escaping(s): | |||
w = '' | |||
i = iter(s) | |||
for n in i: | |||
w += n | |||
if n == '\\': | |||
try: | |||
n2 = next(i) | |||
except StopIteration: | |||
raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s) | |||
if n2 == '\\': | |||
w += '\\\\' | |||
elif n2 not in 'uxnftr': | |||
w += '\\' | |||
w += n2 | |||
w = w.replace('\\"', '"').replace("'", "\\'") | |||
to_eval = "u'''%s'''" % w | |||
try: | |||
s = literal_eval(to_eval) | |||
except SyntaxError as e: | |||
raise GrammarError(s, e) | |||
return s | |||
def _literal_to_pattern(literal): | |||
v = literal.value | |||
flag_start = _rfind(v, '/"')+1 | |||
@@ -1,3 +1,4 @@ | |||
from .exceptions import ConfigurationError, GrammarError | |||
from .utils import get_regexp_width, Serialize | |||
from .parsers.grammar_analysis import GrammarAnalyzer | |||
from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef | |||
@@ -29,7 +30,7 @@ def _wrap_lexer(lexer_class): | |||
def get_frontend(parser, lexer): | |||
if parser=='lalr': | |||
if lexer is None: | |||
raise ValueError('The LALR parser requires use of a lexer') | |||
raise ConfigurationError('The LALR parser requires use of a lexer') | |||
elif lexer == 'standard': | |||
return LALR_TraditionalLexer | |||
elif lexer == 'contextual': | |||
@@ -41,7 +42,7 @@ def get_frontend(parser, lexer): | |||
self.lexer = wrapped(self.lexer_conf) | |||
return LALR_CustomLexerWrapper | |||
else: | |||
raise ValueError('Unknown lexer: %s' % lexer) | |||
raise ConfigurationError('Unknown lexer: %s' % lexer) | |||
elif parser=='earley': | |||
if lexer=='standard': | |||
return Earley_Traditional | |||
@@ -50,7 +51,7 @@ def get_frontend(parser, lexer): | |||
elif lexer=='dynamic_complete': | |||
return XEarley_CompleteLex | |||
elif lexer=='contextual': | |||
raise ValueError('The Earley parser does not support the contextual parser') | |||
raise ConfigurationError('The Earley parser does not support the contextual parser') | |||
elif issubclass(lexer, Lexer): | |||
wrapped = _wrap_lexer(lexer) | |||
class Earley_CustomLexerWrapper(Earley_WithLexer): | |||
@@ -58,14 +59,14 @@ def get_frontend(parser, lexer): | |||
self.lexer = wrapped(self.lexer_conf) | |||
return Earley_CustomLexerWrapper | |||
else: | |||
raise ValueError('Unknown lexer: %s' % lexer) | |||
raise ConfigurationError('Unknown lexer: %s' % lexer) | |||
elif parser == 'cyk': | |||
if lexer == 'standard': | |||
return CYK | |||
else: | |||
raise ValueError('CYK parser requires using standard parser.') | |||
raise ConfigurationError('CYK parser requires using standard parser.') | |||
else: | |||
raise ValueError('Unknown parser: %s' % parser) | |||
raise ConfigurationError('Unknown parser: %s' % parser) | |||
class _ParserFrontend(Serialize): | |||
@@ -73,7 +74,7 @@ class _ParserFrontend(Serialize): | |||
if start is None: | |||
start = self.start | |||
if len(start) > 1: | |||
raise ValueError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) | |||
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) | |||
start ,= start | |||
return self.parser.parse(input, start, *args) | |||
@@ -215,15 +216,15 @@ class XEarley(_ParserFrontend): | |||
self.regexps = {} | |||
for t in lexer_conf.tokens: | |||
if t.priority != 1: | |||
raise ValueError("Dynamic Earley doesn't support weights on terminals", t, t.priority) | |||
raise GrammarError("Dynamic Earley doesn't support weights on terminals", t, t.priority) | |||
regexp = t.pattern.to_regexp() | |||
try: | |||
width = get_regexp_width(regexp)[0] | |||
except ValueError: | |||
raise ValueError("Bad regexp in token %s: %s" % (t.name, regexp)) | |||
raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp)) | |||
else: | |||
if width == 0: | |||
raise ValueError("Dynamic Earley doesn't allow zero-width regexps", t) | |||
raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t) | |||
if lexer_conf.use_bytes: | |||
regexp = regexp.encode('utf-8') | |||
@@ -1,6 +1,5 @@ | |||
import os | |||
from functools import reduce | |||
from ast import literal_eval | |||
from collections import deque | |||
###{standalone | |||
@@ -225,31 +224,6 @@ class Enumerator(Serialize): | |||
return r | |||
def eval_escaping(s): | |||
w = '' | |||
i = iter(s) | |||
for n in i: | |||
w += n | |||
if n == '\\': | |||
try: | |||
n2 = next(i) | |||
except StopIteration: | |||
raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s) | |||
if n2 == '\\': | |||
w += '\\\\' | |||
elif n2 not in 'uxnftr': | |||
w += '\\' | |||
w += n2 | |||
w = w.replace('\\"', '"').replace("'", "\\'") | |||
to_eval = "u'''%s'''" % w | |||
try: | |||
s = literal_eval(to_eval) | |||
except SyntaxError as e: | |||
raise ValueError(s, e) | |||
return s | |||
def combine_alternatives(lists): | |||
""" | |||