@@ -63,7 +63,10 @@ Useful for caching and multiprocessing. | |||
**keep_all_tokens** - Prevent the tree builder from automagically removing "punctuation" tokens (default: False) | |||
**cache_grammar** - Cache the Lark grammar (Default: False) | |||
**cache** - Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now. | |||
- When `False`, does nothing (default) | |||
- When `True`, caches to a temporary file in the local directory | |||
- When given a string, caches to the path pointed by the string | |||
#### Algorithm | |||
@@ -1809,7 +1809,7 @@ class LarkOptions(Serialize): | |||
'debug': False, | |||
'keep_all_tokens': False, | |||
'tree_class': None, | |||
'cache_grammar': False, | |||
'cache: False, | |||
'postlex': None, | |||
'parser': 'earley', | |||
'lexer': 'auto', | |||
@@ -1,11 +1,10 @@ | |||
from __future__ import absolute_import | |||
import os | |||
import sys, os, pickle, hashlib, logging | |||
from io import open | |||
import pickle | |||
from .utils import STRING_TYPE, Serialize, SerializeMemoizer | |||
from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS | |||
from .load_grammar import load_grammar | |||
from .tree import Tree | |||
from .common import LexerConf, ParserConf | |||
@@ -35,7 +34,12 @@ class LarkOptions(Serialize): | |||
When `False`, `[]` behaves like the `?` operator, | |||
and returns no value at all. | |||
(default=`False`. Recommended to set to `True`) | |||
cache_grammar - Cache the Lark grammar (Default: False) | |||
cache - Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. | |||
LALR only for now. | |||
When `False`, does nothing (default) | |||
When `True`, caches to a temporary file in the local directory | |||
When given a string, caches to the path pointed by the string | |||
g_regex_flags - Flags that are applied to all terminals | |||
(both regex and strings) | |||
keep_all_tokens - Prevent the tree builder from automagically | |||
@@ -80,7 +84,7 @@ class LarkOptions(Serialize): | |||
'debug': False, | |||
'keep_all_tokens': False, | |||
'tree_class': None, | |||
'cache_grammar': False, | |||
'cache': False, | |||
'postlex': None, | |||
'parser': 'earley', | |||
'lexer': 'auto', | |||
@@ -102,7 +106,7 @@ class LarkOptions(Serialize): | |||
for name, default in self._defaults.items(): | |||
if name in o: | |||
value = o.pop(name) | |||
if isinstance(default, bool): | |||
if isinstance(default, bool) and name != 'cache': | |||
value = bool(value) | |||
else: | |||
value = default | |||
@@ -147,6 +151,7 @@ class Lark(Serialize): | |||
grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax) | |||
options : a dictionary controlling various aspects of Lark. | |||
""" | |||
self.options = LarkOptions(options) | |||
# Some, but not all file-like objects have a 'name' attribute | |||
@@ -165,8 +170,24 @@ class Lark(Serialize): | |||
assert isinstance(grammar, STRING_TYPE) | |||
if self.options.cache_grammar: | |||
raise NotImplementedError("Not available yet") | |||
cache_fn = None | |||
if self.options.cache: | |||
if isinstance(self.options.cache, STRING_TYPE): | |||
cache_fn = self.options.cache | |||
else: | |||
if self.options.cache is not True: | |||
raise ValueError("cache must be bool or str") | |||
unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals') | |||
options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) | |||
s = grammar + options_str | |||
md5 = hashlib.md5(s.encode()).hexdigest() | |||
cache_fn = '.lark_cache_%s.tmp' % md5 | |||
if FS.exists(cache_fn): | |||
logging.debug('Loading grammar from cache: %s', cache_fn) | |||
with FS.open(cache_fn, 'rb') as f: | |||
self._load(f, self.options.transformer, self.options.postlex) | |||
return | |||
if self.options.lexer == 'auto': | |||
if self.options.parser == 'lalr': | |||
@@ -241,6 +262,11 @@ class Lark(Serialize): | |||
elif lexer: | |||
self.lexer = self._build_lexer() | |||
if cache_fn: | |||
logging.debug('Saving grammar to cache: %s', cache_fn) | |||
with FS.open(cache_fn, 'wb') as f: | |||
self.save(f) | |||
if __init__.__doc__: | |||
__init__.__doc__ += "\nOptions:\n" + LarkOptions.OPTIONS_DOC | |||
@@ -259,34 +285,41 @@ class Lark(Serialize): | |||
parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) | |||
return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | |||
def save(self, f): | |||
data, m = self.memo_serialize([TerminalDef, Rule]) | |||
pickle.dump({'data': data, 'memo': m}, f) | |||
@classmethod | |||
def deserialize(cls, data, namespace, memo, transformer=None, postlex=None): | |||
if memo: | |||
memo = SerializeMemoizer.deserialize(memo, namespace, {}) | |||
def load(cls, f): | |||
inst = cls.__new__(cls) | |||
return inst._load(f) | |||
def _load(self, f, transformer=None, postlex=None): | |||
if isinstance(f, dict): | |||
d = f | |||
else: | |||
d = pickle.load(f) | |||
memo = d['memo'] | |||
data = d['data'] | |||
assert memo | |||
memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) | |||
options = dict(data['options']) | |||
if transformer is not None: | |||
options['transformer'] = transformer | |||
if postlex is not None: | |||
options['postlex'] = postlex | |||
inst.options = LarkOptions.deserialize(options, memo) | |||
inst.rules = [Rule.deserialize(r, memo) for r in data['rules']] | |||
inst.source = '<deserialized>' | |||
inst._prepare_callbacks() | |||
inst.parser = inst.parser_class.deserialize(data['parser'], memo, inst._callbacks, inst.options.postlex) | |||
return inst | |||
def save(self, f): | |||
data, m = self.memo_serialize([TerminalDef, Rule]) | |||
pickle.dump({'data': data, 'memo': m}, f) | |||
self.options = LarkOptions.deserialize(options, memo) | |||
self.rules = [Rule.deserialize(r, memo) for r in data['rules']] | |||
self.source = '<deserialized>' | |||
self._prepare_callbacks() | |||
self.parser = self.parser_class.deserialize(data['parser'], memo, self._callbacks, self.options.postlex) | |||
return self | |||
@classmethod | |||
def load(cls, f): | |||
d = pickle.load(f) | |||
namespace = {'Rule': Rule, 'TerminalDef': TerminalDef} | |||
memo = d['memo'] | |||
return Lark.deserialize(d['data'], namespace, memo) | |||
def _load_from_dict(cls, data, memo, transformer=None, postlex=None): | |||
inst = cls.__new__(cls) | |||
return inst._load({'data': data, 'memo': memo}, transformer, postlex) | |||
@classmethod | |||
def open(cls, grammar_filename, rel_to=None, **options): | |||
@@ -106,8 +106,7 @@ def main(fobj, start): | |||
print('Shift = 0') | |||
print('Reduce = 1') | |||
print("def Lark_StandAlone(transformer=None, postlex=None):") | |||
print(" namespace = {'Rule': Rule, 'TerminalDef': TerminalDef}") | |||
print(" return Lark.deserialize(DATA, namespace, MEMO, transformer=transformer, postlex=postlex)") | |||
print(" return Lark._load_from_dict(DATA, MEMO, transformer=transformer, postlex=postlex)") | |||
@@ -1,4 +1,5 @@ | |||
import sys | |||
import os | |||
from functools import reduce | |||
from ast import literal_eval | |||
from collections import deque | |||
@@ -37,9 +38,6 @@ def bfs(initial, expand): | |||
def _serialize(value, memo): | |||
# if memo and memo.in_types(value): | |||
# return {'__memo__': memo.memoized.get(value)} | |||
if isinstance(value, Serialize): | |||
return value.serialize(memo) | |||
elif isinstance(value, list): | |||
@@ -287,3 +285,9 @@ def combine_alternatives(lists): | |||
assert all(l for l in lists), lists | |||
init = [[x] for x in lists[0]] | |||
return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init) | |||
class FS: | |||
open = open | |||
exists = os.path.exists |
@@ -33,7 +33,7 @@ class LarkOptions: | |||
propagate_positions: bool | |||
maybe_placeholders: bool | |||
lexer_callbacks: Dict[str, Callable[[Token], Token]] | |||
cache_grammar: bool | |||
cache: Union[bool, str] | |||
g_regex_flags: int | |||
@@ -5,6 +5,7 @@ import logging | |||
from .test_trees import TestTrees | |||
from .test_tools import TestStandalone | |||
from .test_cache import TestCache | |||
from .test_reconstructor import TestReconstructor | |||
try: | |||
@@ -0,0 +1,82 @@ | |||
from __future__ import absolute_import | |||
import sys | |||
from unittest import TestCase, main | |||
from lark import Lark, Tree | |||
import lark.lark as lark_module | |||
try: | |||
from StringIO import StringIO | |||
except ImportError: | |||
from io import BytesIO as StringIO | |||
import tempfile, os | |||
class MockFile(StringIO): | |||
def close(self): | |||
pass | |||
def __enter__(self): | |||
return self | |||
def __exit__(self, *args): | |||
pass | |||
class MockFS: | |||
def __init__(self): | |||
self.files = {} | |||
def open(self, name, mode=None): | |||
if name not in self.files: | |||
f = self.files[name] = MockFile() | |||
else: | |||
f = self.files[name] | |||
f.seek(0) | |||
return f | |||
def exists(self, name): | |||
return name in self.files | |||
class TestCache(TestCase): | |||
def setUp(self): | |||
pass | |||
def test_simple(self): | |||
g = '''start: "a"''' | |||
fn = "bla" | |||
fs = lark_module.FS | |||
mock_fs = MockFS() | |||
try: | |||
lark_module.FS = mock_fs | |||
Lark(g, parser='lalr', cache=fn) | |||
assert fn in mock_fs.files | |||
parser = Lark(g, parser='lalr', cache=fn) | |||
assert parser.parse('a') == Tree('start', []) | |||
mock_fs.files = {} | |||
assert len(mock_fs.files) == 0 | |||
Lark(g, parser='lalr', cache=True) | |||
assert len(mock_fs.files) == 1 | |||
parser = Lark(g, parser='lalr', cache=True) | |||
assert parser.parse('a') == Tree('start', []) | |||
parser = Lark(g+' "b"', parser='lalr', cache=True) | |||
assert len(mock_fs.files) == 2 | |||
assert parser.parse('ab') == Tree('start', []) | |||
parser = Lark(g, parser='lalr', cache=True) | |||
assert parser.parse('a') == Tree('start', []) | |||
finally: | |||
lark_module.FS = fs | |||
if __name__ == '__main__': | |||
main() | |||
@@ -14,6 +14,7 @@ except ImportError: | |||
cStringIO = None | |||
from io import ( | |||
StringIO as uStringIO, | |||
BytesIO, | |||
open, | |||
) | |||
@@ -26,6 +27,8 @@ from lark.visitors import Transformer, Transformer_InPlace, v_args | |||
from lark.grammar import Rule | |||
from lark.lexer import TerminalDef, Lexer, TraditionalLexer | |||
__path__ = os.path.dirname(__file__) | |||
def _read(n, *args): | |||
with open(os.path.join(__path__, n), *args) as f: | |||
@@ -873,7 +876,7 @@ def _make_parser_test(LEXER, PARSER): | |||
self.assertSequenceEqual(x.children, [Tree('expr', [])]) | |||
x = g.parse("BC") | |||
self.assertSequenceEqual(x.children, [Tree('b', [])]) | |||
def test_templates_modifiers(self): | |||
g = _Lark(r""" | |||
start: expr{"B"} | |||
@@ -1736,15 +1739,12 @@ def _make_parser_test(LEXER, PARSER): | |||
b: "B" | |||
""" | |||
parser = _Lark(grammar) | |||
d = parser.serialize() | |||
parser2 = Lark.deserialize(d, {}, {}) | |||
s = BytesIO() | |||
parser.save(s) | |||
s.seek(0) | |||
parser2 = Lark.load(s) | |||
self.assertEqual(parser2.parse('ABC'), Tree('start', [Tree('b', [])]) ) | |||
namespace = {'Rule': Rule, 'TerminalDef': TerminalDef} | |||
d, m = parser.memo_serialize(namespace.values()) | |||
parser3 = Lark.deserialize(d, namespace, m) | |||
self.assertEqual(parser3.parse('ABC'), Tree('start', [Tree('b', [])]) ) | |||
def test_multi_start(self): | |||
parser = _Lark(''' | |||
a: "x" "a"? | |||