@@ -7,9 +7,16 @@ class LarkError(Exception): | |||||
pass | pass | ||||
class ConfigurationError(LarkError, ValueError): | |||||
pass | |||||
class GrammarError(LarkError): | class GrammarError(LarkError): | ||||
pass | pass | ||||
class GrammarError_Value(LarkError): | |||||
pass | |||||
class ParseError(LarkError): | class ParseError(LarkError): | ||||
pass | pass | ||||
@@ -1,5 +1,5 @@ | |||||
from __future__ import absolute_import | from __future__ import absolute_import | ||||
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken | |||||
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken, ConfigurationError | |||||
import sys, os, pickle, hashlib | import sys, os, pickle, hashlib | ||||
from io import open | from io import open | ||||
@@ -24,6 +24,10 @@ except ImportError: | |||||
###{standalone | ###{standalone | ||||
def assert_config(value, options, msg='Got %r, expected one of %s'): | |||||
if value not in options: | |||||
raise ConfigurationError(msg % (value, options)) | |||||
class LarkOptions(Serialize): | class LarkOptions(Serialize): | ||||
"""Specifies the options for Lark | """Specifies the options for Lark | ||||
@@ -155,14 +159,15 @@ class LarkOptions(Serialize): | |||||
self.__dict__['options'] = options | self.__dict__['options'] = options | ||||
assert self.parser in ('earley', 'lalr', 'cyk', None) | |||||
assert_config(self.parser, ('earley', 'lalr', 'cyk', None)) | |||||
if self.parser == 'earley' and self.transformer: | if self.parser == 'earley' and self.transformer: | ||||
raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.' | |||||
raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm.' | |||||
'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') | 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') | ||||
if o: | if o: | ||||
raise ValueError("Unknown options: %s" % o.keys()) | |||||
raise ConfigurationError("Unknown options: %s" % o.keys()) | |||||
def __getattr__(self, name): | def __getattr__(self, name): | ||||
try: | try: | ||||
@@ -171,7 +176,7 @@ class LarkOptions(Serialize): | |||||
raise AttributeError(e) | raise AttributeError(e) | ||||
def __setattr__(self, name, value): | def __setattr__(self, name, value): | ||||
assert name in self.options | |||||
assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s") | |||||
self.options[name] = value | self.options[name] = value | ||||
def serialize(self, memo): | def serialize(self, memo): | ||||
@@ -237,20 +242,20 @@ class Lark(Serialize): | |||||
self.source_grammar = grammar | self.source_grammar = grammar | ||||
if self.options.use_bytes: | if self.options.use_bytes: | ||||
if not isascii(grammar): | if not isascii(grammar): | ||||
raise ValueError("Grammar must be ascii only, when use_bytes=True") | |||||
raise ConfigurationError("Grammar must be ascii only, when use_bytes=True") | |||||
if sys.version_info[0] == 2 and self.options.use_bytes != 'force': | if sys.version_info[0] == 2 and self.options.use_bytes != 'force': | ||||
raise NotImplementedError("`use_bytes=True` may have issues on python2." | |||||
raise ConfigurationError("`use_bytes=True` may have issues on python2." | |||||
"Use `use_bytes='force'` to use it at your own risk.") | "Use `use_bytes='force'` to use it at your own risk.") | ||||
cache_fn = None | cache_fn = None | ||||
if self.options.cache: | if self.options.cache: | ||||
if self.options.parser != 'lalr': | if self.options.parser != 'lalr': | ||||
raise NotImplementedError("cache only works with parser='lalr' for now") | |||||
raise ConfigurationError("cache only works with parser='lalr' for now") | |||||
if isinstance(self.options.cache, STRING_TYPE): | if isinstance(self.options.cache, STRING_TYPE): | ||||
cache_fn = self.options.cache | cache_fn = self.options.cache | ||||
else: | else: | ||||
if self.options.cache is not True: | if self.options.cache is not True: | ||||
raise ValueError("cache argument must be bool or str") | |||||
raise ConfigurationError("cache argument must be bool or str") | |||||
unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals') | unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals') | ||||
from . import __version__ | from . import __version__ | ||||
options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) | options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) | ||||
@@ -277,24 +282,25 @@ class Lark(Serialize): | |||||
else: | else: | ||||
assert False, self.options.parser | assert False, self.options.parser | ||||
lexer = self.options.lexer | lexer = self.options.lexer | ||||
assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') or issubclass(lexer, Lexer) | |||||
if isinstance(lexer, type): | |||||
assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance | |||||
else: | |||||
assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete')) | |||||
if self.options.ambiguity == 'auto': | if self.options.ambiguity == 'auto': | ||||
if self.options.parser == 'earley': | if self.options.parser == 'earley': | ||||
self.options.ambiguity = 'resolve' | self.options.ambiguity = 'resolve' | ||||
else: | else: | ||||
disambig_parsers = ['earley', 'cyk'] | |||||
assert self.options.parser in disambig_parsers, ( | |||||
'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) | |||||
assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s") | |||||
if self.options.priority == 'auto': | if self.options.priority == 'auto': | ||||
self.options.priority = 'normal' | self.options.priority = 'normal' | ||||
if self.options.priority not in _VALID_PRIORITY_OPTIONS: | if self.options.priority not in _VALID_PRIORITY_OPTIONS: | ||||
raise ValueError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) | |||||
raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) | |||||
assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' | assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' | ||||
if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: | if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: | ||||
raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) | |||||
raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) | |||||
# Parse the grammar file and compose the grammars | # Parse the grammar file and compose the grammars | ||||
self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) | self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) | ||||
@@ -401,7 +407,7 @@ class Lark(Serialize): | |||||
memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) | memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) | ||||
options = dict(data['options']) | options = dict(data['options']) | ||||
if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults): | if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults): | ||||
raise ValueError("Some options are not allowed when loading a Parser: {}" | |||||
raise ConfigurationError("Some options are not allowed when loading a Parser: {}" | |||||
.format(set(kwargs) - _LOAD_ALLOWED_OPTIONS)) | .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS)) | ||||
options.update(kwargs) | options.update(kwargs) | ||||
self.options = LarkOptions.deserialize(options, memo) | self.options = LarkOptions.deserialize(options, memo) | ||||
@@ -366,7 +366,7 @@ class TraditionalLexer(Lexer): | |||||
if t.type in self.callback: | if t.type in self.callback: | ||||
t = self.callback[t.type](t) | t = self.callback[t.type](t) | ||||
if not isinstance(t, Token): | if not isinstance(t, Token): | ||||
raise ValueError("Callbacks must return a token (returned %r)" % t) | |||||
raise LexError("Callbacks must return a token (returned %r)" % t) | |||||
lex_state.last_token = t | lex_state.last_token = t | ||||
return t | return t | ||||
else: | else: | ||||
@@ -5,8 +5,9 @@ import sys | |||||
from copy import copy, deepcopy | from copy import copy, deepcopy | ||||
from io import open | from io import open | ||||
import pkgutil | import pkgutil | ||||
from ast import literal_eval | |||||
from .utils import bfs, eval_escaping, Py36, logger, classify_bool | |||||
from .utils import bfs, Py36, logger, classify_bool | |||||
from .lexer import Token, TerminalDef, PatternStr, PatternRE | from .lexer import Token, TerminalDef, PatternStr, PatternRE | ||||
from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
@@ -405,6 +406,32 @@ def _rfind(s, choices): | |||||
return max(s.rfind(c) for c in choices) | return max(s.rfind(c) for c in choices) | ||||
def eval_escaping(s): | |||||
w = '' | |||||
i = iter(s) | |||||
for n in i: | |||||
w += n | |||||
if n == '\\': | |||||
try: | |||||
n2 = next(i) | |||||
except StopIteration: | |||||
raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s) | |||||
if n2 == '\\': | |||||
w += '\\\\' | |||||
elif n2 not in 'uxnftr': | |||||
w += '\\' | |||||
w += n2 | |||||
w = w.replace('\\"', '"').replace("'", "\\'") | |||||
to_eval = "u'''%s'''" % w | |||||
try: | |||||
s = literal_eval(to_eval) | |||||
except SyntaxError as e: | |||||
raise GrammarError(s, e) | |||||
return s | |||||
def _literal_to_pattern(literal): | def _literal_to_pattern(literal): | ||||
v = literal.value | v = literal.value | ||||
flag_start = _rfind(v, '/"')+1 | flag_start = _rfind(v, '/"')+1 | ||||
@@ -1,3 +1,4 @@ | |||||
from .exceptions import ConfigurationError, GrammarError | |||||
from .utils import get_regexp_width, Serialize | from .utils import get_regexp_width, Serialize | ||||
from .parsers.grammar_analysis import GrammarAnalyzer | from .parsers.grammar_analysis import GrammarAnalyzer | ||||
from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef | from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef | ||||
@@ -29,7 +30,7 @@ def _wrap_lexer(lexer_class): | |||||
def get_frontend(parser, lexer): | def get_frontend(parser, lexer): | ||||
if parser=='lalr': | if parser=='lalr': | ||||
if lexer is None: | if lexer is None: | ||||
raise ValueError('The LALR parser requires use of a lexer') | |||||
raise ConfigurationError('The LALR parser requires use of a lexer') | |||||
elif lexer == 'standard': | elif lexer == 'standard': | ||||
return LALR_TraditionalLexer | return LALR_TraditionalLexer | ||||
elif lexer == 'contextual': | elif lexer == 'contextual': | ||||
@@ -41,7 +42,7 @@ def get_frontend(parser, lexer): | |||||
self.lexer = wrapped(self.lexer_conf) | self.lexer = wrapped(self.lexer_conf) | ||||
return LALR_CustomLexerWrapper | return LALR_CustomLexerWrapper | ||||
else: | else: | ||||
raise ValueError('Unknown lexer: %s' % lexer) | |||||
raise ConfigurationError('Unknown lexer: %s' % lexer) | |||||
elif parser=='earley': | elif parser=='earley': | ||||
if lexer=='standard': | if lexer=='standard': | ||||
return Earley_Traditional | return Earley_Traditional | ||||
@@ -50,7 +51,7 @@ def get_frontend(parser, lexer): | |||||
elif lexer=='dynamic_complete': | elif lexer=='dynamic_complete': | ||||
return XEarley_CompleteLex | return XEarley_CompleteLex | ||||
elif lexer=='contextual': | elif lexer=='contextual': | ||||
raise ValueError('The Earley parser does not support the contextual parser') | |||||
raise ConfigurationError('The Earley parser does not support the contextual parser') | |||||
elif issubclass(lexer, Lexer): | elif issubclass(lexer, Lexer): | ||||
wrapped = _wrap_lexer(lexer) | wrapped = _wrap_lexer(lexer) | ||||
class Earley_CustomLexerWrapper(Earley_WithLexer): | class Earley_CustomLexerWrapper(Earley_WithLexer): | ||||
@@ -58,14 +59,14 @@ def get_frontend(parser, lexer): | |||||
self.lexer = wrapped(self.lexer_conf) | self.lexer = wrapped(self.lexer_conf) | ||||
return Earley_CustomLexerWrapper | return Earley_CustomLexerWrapper | ||||
else: | else: | ||||
raise ValueError('Unknown lexer: %s' % lexer) | |||||
raise ConfigurationError('Unknown lexer: %s' % lexer) | |||||
elif parser == 'cyk': | elif parser == 'cyk': | ||||
if lexer == 'standard': | if lexer == 'standard': | ||||
return CYK | return CYK | ||||
else: | else: | ||||
raise ValueError('CYK parser requires using standard parser.') | |||||
raise ConfigurationError('CYK parser requires using standard parser.') | |||||
else: | else: | ||||
raise ValueError('Unknown parser: %s' % parser) | |||||
raise ConfigurationError('Unknown parser: %s' % parser) | |||||
class _ParserFrontend(Serialize): | class _ParserFrontend(Serialize): | ||||
@@ -73,7 +74,7 @@ class _ParserFrontend(Serialize): | |||||
if start is None: | if start is None: | ||||
start = self.start | start = self.start | ||||
if len(start) > 1: | if len(start) > 1: | ||||
raise ValueError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) | |||||
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) | |||||
start ,= start | start ,= start | ||||
return self.parser.parse(input, start, *args) | return self.parser.parse(input, start, *args) | ||||
@@ -215,15 +216,15 @@ class XEarley(_ParserFrontend): | |||||
self.regexps = {} | self.regexps = {} | ||||
for t in lexer_conf.tokens: | for t in lexer_conf.tokens: | ||||
if t.priority != 1: | if t.priority != 1: | ||||
raise ValueError("Dynamic Earley doesn't support weights on terminals", t, t.priority) | |||||
raise GrammarError("Dynamic Earley doesn't support weights on terminals", t, t.priority) | |||||
regexp = t.pattern.to_regexp() | regexp = t.pattern.to_regexp() | ||||
try: | try: | ||||
width = get_regexp_width(regexp)[0] | width = get_regexp_width(regexp)[0] | ||||
except ValueError: | except ValueError: | ||||
raise ValueError("Bad regexp in token %s: %s" % (t.name, regexp)) | |||||
raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp)) | |||||
else: | else: | ||||
if width == 0: | if width == 0: | ||||
raise ValueError("Dynamic Earley doesn't allow zero-width regexps", t) | |||||
raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t) | |||||
if lexer_conf.use_bytes: | if lexer_conf.use_bytes: | ||||
regexp = regexp.encode('utf-8') | regexp = regexp.encode('utf-8') | ||||
@@ -1,6 +1,5 @@ | |||||
import os | import os | ||||
from functools import reduce | from functools import reduce | ||||
from ast import literal_eval | |||||
from collections import deque | from collections import deque | ||||
###{standalone | ###{standalone | ||||
@@ -225,31 +224,6 @@ class Enumerator(Serialize): | |||||
return r | return r | ||||
def eval_escaping(s): | |||||
w = '' | |||||
i = iter(s) | |||||
for n in i: | |||||
w += n | |||||
if n == '\\': | |||||
try: | |||||
n2 = next(i) | |||||
except StopIteration: | |||||
raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s) | |||||
if n2 == '\\': | |||||
w += '\\\\' | |||||
elif n2 not in 'uxnftr': | |||||
w += '\\' | |||||
w += n2 | |||||
w = w.replace('\\"', '"').replace("'", "\\'") | |||||
to_eval = "u'''%s'''" % w | |||||
try: | |||||
s = literal_eval(to_eval) | |||||
except SyntaxError as e: | |||||
raise ValueError(s, e) | |||||
return s | |||||
def combine_alternatives(lists): | def combine_alternatives(lists): | ||||
""" | """ | ||||