diff --git a/lark/lark.py b/lark/lark.py index b28c4e6..f0f3435 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -163,6 +163,10 @@ class LarkOptions(Serialize): return cls(data) +_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'keep_all_tokens', + 'tree_class'} + + class Lark(Serialize): """Main interface for the library. @@ -230,8 +234,10 @@ class Lark(Serialize): if FS.exists(cache_fn): logger.debug('Loading grammar from cache: %s', cache_fn) + for name in (set(LarkOptions._defaults) - _LOAD_ALLOWED_OPTIONS): + options.pop(name, None) with FS.open(cache_fn, 'rb') as f: - self._load(f, self.options.transformer, self.options.postlex) + self._load(f, **options) return if self.options.lexer == 'auto': @@ -353,7 +359,7 @@ class Lark(Serialize): inst = cls.__new__(cls) return inst._load(f) - def _load(self, f, transformer=None, postlex=None): + def _load(self, f, **kwargs): if isinstance(f, dict): d = f else: @@ -364,12 +370,11 @@ class Lark(Serialize): assert memo memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) options = dict(data['options']) - if transformer is not None: - options['transformer'] = transformer - if postlex is not None: - options['postlex'] = postlex + if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults): + raise ValueError("Some options are not allowed when loading a Parser: {}" + .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS)) + options.update(kwargs) self.options = LarkOptions.deserialize(options, memo) - re_module = regex if self.options.regex else re self.rules = [Rule.deserialize(r, memo) for r in data['rules']] self.source = '' self._prepare_callbacks() @@ -377,19 +382,16 @@ class Lark(Serialize): data['parser'], memo, self._callbacks, - self.options.postlex, - self.options.transformer, - re_module, - self.options.debug + self.options, # Not all, but multiple attributes are used ) self.terminals = self.parser.lexer_conf.tokens self._terminals_dict = {t.name: t for t in self.terminals} return self @classmethod - def _load_from_dict(cls, data, memo, transformer=None, postlex=None): + def _load_from_dict(cls, data, memo, **kwargs): inst = cls.__new__(cls) - return inst._load({'data': data, 'memo': memo}, transformer, postlex) + return inst._load({'data': data, 'memo': memo}, **kwargs) @classmethod def open(cls, grammar_filename, rel_to=None, **options): diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index d433d09..325315e 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -6,6 +6,11 @@ from .parsers.lalr_parser import LALR_Parser from .grammar import Rule from .tree import Tree from .common import LexerConf +try: + import regex +except ImportError: + regex = None +import re ###{standalone @@ -82,16 +87,18 @@ class WithLexer(_ParserFrontend): self.postlex = lexer_conf.postlex @classmethod - def deserialize(cls, data, memo, callbacks, postlex, transformer, re_module, debug): + def deserialize(cls, data, memo, callbacks, options): inst = super(WithLexer, cls).deserialize(data, memo) - inst.postlex = postlex - inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks, debug) + inst.postlex = options.postlex + inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks, options.debug) terminals = [item for item in memo.values() if isinstance(item, TerminalDef)] - inst.lexer_conf.callbacks = _get_lexer_callbacks(transformer, terminals) - inst.lexer_conf.re_module = re_module - inst.lexer_conf.skip_validation=True + inst.lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals) + inst.lexer_conf.re_module = regex if options.regex else re + inst.lexer_conf.skip_validation = True + inst.lexer_conf.use_bytes = options.use_bytes + inst.lexer_conf.g_regex_flags = options.g_regex_flags inst.init_lexer() return inst diff --git a/lark/tools/standalone.py b/lark/tools/standalone.py index 7993be0..f2af015 100644 --- a/lark/tools/standalone.py +++ b/lark/tools/standalone.py @@ -145,8 +145,8 @@ def main(fobj, start, print=print): print('Shift = 0') print('Reduce = 1') - print("def Lark_StandAlone(transformer=None, postlex=None):") - print(" return Lark._load_from_dict(DATA, MEMO, transformer=transformer, postlex=postlex)") + print("def Lark_StandAlone(**kwargs):") + print(" return Lark._load_from_dict(DATA, MEMO, **kwargs)")