From e1b572c3a92f05d113f1e748885285e803811541 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sun, 4 Oct 2020 01:08:50 +0200 Subject: [PATCH 1/3] Allows the standalone parser to take more options. --- lark/lark.py | 23 +++++++++++++---------- lark/parser_frontends.py | 12 +++++++----- lark/tools/standalone.py | 4 ++-- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/lark/lark.py b/lark/lark.py index e749138..d18bf47 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -163,6 +163,11 @@ class LarkOptions(Serialize): return cls(data) +_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'keep_all_tokens', + 'tree_class'} +_LOAD_BLOCKED_OPTIONS = set(LarkOptions._defaults.keys()) - _LOAD_ALLOWED_OPTIONS + + class Lark(Serialize): """Main interface for the library. @@ -347,7 +352,7 @@ class Lark(Serialize): inst = cls.__new__(cls) return inst._load(f) - def _load(self, f, transformer=None, postlex=None): + def _load(self, f, **kwargs): if isinstance(f, dict): d = f else: @@ -358,10 +363,10 @@ class Lark(Serialize): assert memo memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) options = dict(data['options']) - if transformer is not None: - options['transformer'] = transformer - if postlex is not None: - options['postlex'] = postlex + if _LOAD_BLOCKED_OPTIONS.intersection(kwargs.keys()): + raise ValueError("Some options are not allowed when loading a Parser: {}" + .format(_LOAD_BLOCKED_OPTIONS.intersection(kwargs.keys()))) + options.update(kwargs) self.options = LarkOptions.deserialize(options, memo) re_module = regex if self.options.regex else re self.rules = [Rule.deserialize(r, memo) for r in data['rules']] @@ -371,19 +376,17 @@ class Lark(Serialize): data['parser'], memo, self._callbacks, - self.options.postlex, - self.options.transformer, re_module, - self.options.debug + self.options, # Not all, but multiple attributes are used ) self.terminals = self.parser.lexer_conf.tokens self._terminals_dict = {t.name: t for t in self.terminals} return self @classmethod - def _load_from_dict(cls, data, memo, transformer=None, postlex=None): + def _load_from_dict(cls, data, memo, **kwargs): inst = cls.__new__(cls) - return inst._load({'data': data, 'memo': memo}, transformer, postlex) + return inst._load({'data': data, 'memo': memo}, **kwargs) @classmethod def open(cls, grammar_filename, rel_to=None, **options): diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index d433d09..6b12ef2 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -82,16 +82,18 @@ class WithLexer(_ParserFrontend): self.postlex = lexer_conf.postlex @classmethod - def deserialize(cls, data, memo, callbacks, postlex, transformer, re_module, debug): + def deserialize(cls, data, memo, callbacks, re_module, options): inst = super(WithLexer, cls).deserialize(data, memo) - inst.postlex = postlex - inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks, debug) + inst.postlex = options.postlex + inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks, options.debug) terminals = [item for item in memo.values() if isinstance(item, TerminalDef)] - inst.lexer_conf.callbacks = _get_lexer_callbacks(transformer, terminals) + inst.lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals) inst.lexer_conf.re_module = re_module - inst.lexer_conf.skip_validation=True + inst.lexer_conf.skip_validation = True + inst.lexer_conf.use_bytes = options.use_bytes + inst.lexer_conf.g_regex_flags = options.g_regex_flags inst.init_lexer() return inst diff --git a/lark/tools/standalone.py b/lark/tools/standalone.py index 7993be0..f2af015 100644 --- a/lark/tools/standalone.py +++ b/lark/tools/standalone.py @@ -145,8 +145,8 @@ def main(fobj, start, print=print): print('Shift = 0') print('Reduce = 1') - print("def Lark_StandAlone(transformer=None, postlex=None):") - print(" return Lark._load_from_dict(DATA, MEMO, transformer=transformer, postlex=postlex)") + print("def Lark_StandAlone(**kwargs):") + print(" return Lark._load_from_dict(DATA, MEMO, **kwargs)") From acd04442bc30a625102a85b51b9b82ebd04891a7 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sun, 4 Oct 2020 01:42:58 +0200 Subject: [PATCH 2/3] Fix for cache --- lark/lark.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lark/lark.py b/lark/lark.py index d18bf47..42bff12 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -236,7 +236,7 @@ class Lark(Serialize): if FS.exists(cache_fn): logger.debug('Loading grammar from cache: %s', cache_fn) with FS.open(cache_fn, 'rb') as f: - self._load(f, self.options.transformer, self.options.postlex) + self._load(f, self.options) return if self.options.lexer == 'auto': @@ -352,7 +352,7 @@ class Lark(Serialize): inst = cls.__new__(cls) return inst._load(f) - def _load(self, f, **kwargs): + def _load(self, f, temp_options=None, **kwargs): if isinstance(f, dict): d = f else: @@ -367,6 +367,9 @@ class Lark(Serialize): raise ValueError("Some options are not allowed when loading a Parser: {}" .format(_LOAD_BLOCKED_OPTIONS.intersection(kwargs.keys()))) options.update(kwargs) + if temp_options is not None: + for o in _LOAD_ALLOWED_OPTIONS: + options[o] = temp_options.options[o] self.options = LarkOptions.deserialize(options, memo) re_module = regex if self.options.regex else re self.rules = [Rule.deserialize(r, memo) for r in data['rules']] From bd22a2f79e3b86ef7b2873b13ef6986142cc8e46 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Mon, 5 Oct 2020 14:00:50 +0200 Subject: [PATCH 3/3] Removed _LOAD_BLOCKED_OPTIONS --- lark/lark.py | 16 ++++++---------- lark/parser_frontends.py | 9 +++++++-- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/lark/lark.py b/lark/lark.py index 42bff12..f6d2c65 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -165,7 +165,6 @@ class LarkOptions(Serialize): _LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'keep_all_tokens', 'tree_class'} -_LOAD_BLOCKED_OPTIONS = set(LarkOptions._defaults.keys()) - _LOAD_ALLOWED_OPTIONS class Lark(Serialize): @@ -235,8 +234,10 @@ class Lark(Serialize): if FS.exists(cache_fn): logger.debug('Loading grammar from cache: %s', cache_fn) + for name in (set(LarkOptions._defaults) - _LOAD_ALLOWED_OPTIONS): + options.pop(name, None) with FS.open(cache_fn, 'rb') as f: - self._load(f, self.options) + self._load(f, **options) return if self.options.lexer == 'auto': @@ -352,7 +353,7 @@ class Lark(Serialize): inst = cls.__new__(cls) return inst._load(f) - def _load(self, f, temp_options=None, **kwargs): + def _load(self, f, **kwargs): if isinstance(f, dict): d = f else: @@ -363,15 +364,11 @@ class Lark(Serialize): assert memo memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) options = dict(data['options']) - if _LOAD_BLOCKED_OPTIONS.intersection(kwargs.keys()): + if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults): raise ValueError("Some options are not allowed when loading a Parser: {}" - .format(_LOAD_BLOCKED_OPTIONS.intersection(kwargs.keys()))) + .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS)) options.update(kwargs) - if temp_options is not None: - for o in _LOAD_ALLOWED_OPTIONS: - options[o] = temp_options.options[o] self.options = LarkOptions.deserialize(options, memo) - re_module = regex if self.options.regex else re self.rules = [Rule.deserialize(r, memo) for r in data['rules']] self.source = '' self._prepare_callbacks() @@ -379,7 +376,6 @@ class Lark(Serialize): data['parser'], memo, self._callbacks, - re_module, self.options, # Not all, but multiple attributes are used ) self.terminals = self.parser.lexer_conf.tokens diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 6b12ef2..325315e 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -6,6 +6,11 @@ from .parsers.lalr_parser import LALR_Parser from .grammar import Rule from .tree import Tree from .common import LexerConf +try: + import regex +except ImportError: + regex = None +import re ###{standalone @@ -82,7 +87,7 @@ class WithLexer(_ParserFrontend): self.postlex = lexer_conf.postlex @classmethod - def deserialize(cls, data, memo, callbacks, re_module, options): + def deserialize(cls, data, memo, callbacks, options): inst = super(WithLexer, cls).deserialize(data, memo) inst.postlex = options.postlex @@ -90,7 +95,7 @@ class WithLexer(_ParserFrontend): terminals = [item for item in memo.values() if isinstance(item, TerminalDef)] inst.lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals) - inst.lexer_conf.re_module = re_module + inst.lexer_conf.re_module = regex if options.regex else re inst.lexer_conf.skip_validation = True inst.lexer_conf.use_bytes = options.use_bytes inst.lexer_conf.g_regex_flags = options.g_regex_flags