@@ -63,7 +63,10 @@ Useful for caching and multiprocessing. | |||||
**keep_all_tokens** - Prevent the tree builder from automagically removing "punctuation" tokens (default: False) | **keep_all_tokens** - Prevent the tree builder from automagically removing "punctuation" tokens (default: False) | ||||
**cache_grammar** - Cache the Lark grammar (Default: False) | |||||
**cache** - Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now. | |||||
- When `False`, does nothing (default) | |||||
- When `True`, caches to a temporary file in the local directory | |||||
- When given a string, caches to the path pointed by the string | |||||
#### Algorithm | #### Algorithm | ||||
@@ -1809,7 +1809,7 @@ class LarkOptions(Serialize): | |||||
'debug': False, | 'debug': False, | ||||
'keep_all_tokens': False, | 'keep_all_tokens': False, | ||||
'tree_class': None, | 'tree_class': None, | ||||
'cache_grammar': False, | |||||
'cache: False, | |||||
'postlex': None, | 'postlex': None, | ||||
'parser': 'earley', | 'parser': 'earley', | ||||
'lexer': 'auto', | 'lexer': 'auto', | ||||
@@ -1,11 +1,10 @@ | |||||
from __future__ import absolute_import | from __future__ import absolute_import | ||||
import os | |||||
import sys, os, pickle, hashlib, logging | |||||
from io import open | from io import open | ||||
import pickle | |||||
from .utils import STRING_TYPE, Serialize, SerializeMemoizer | |||||
from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS | |||||
from .load_grammar import load_grammar | from .load_grammar import load_grammar | ||||
from .tree import Tree | from .tree import Tree | ||||
from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
@@ -35,7 +34,12 @@ class LarkOptions(Serialize): | |||||
When `False`, `[]` behaves like the `?` operator, | When `False`, `[]` behaves like the `?` operator, | ||||
and returns no value at all. | and returns no value at all. | ||||
(default=`False`. Recommended to set to `True`) | (default=`False`. Recommended to set to `True`) | ||||
cache_grammar - Cache the Lark grammar (Default: False) | |||||
cache - Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. | |||||
LALR only for now. | |||||
When `False`, does nothing (default) | |||||
When `True`, caches to a temporary file in the local directory | |||||
When given a string, caches to the path pointed by the string | |||||
g_regex_flags - Flags that are applied to all terminals | g_regex_flags - Flags that are applied to all terminals | ||||
(both regex and strings) | (both regex and strings) | ||||
keep_all_tokens - Prevent the tree builder from automagically | keep_all_tokens - Prevent the tree builder from automagically | ||||
@@ -80,7 +84,7 @@ class LarkOptions(Serialize): | |||||
'debug': False, | 'debug': False, | ||||
'keep_all_tokens': False, | 'keep_all_tokens': False, | ||||
'tree_class': None, | 'tree_class': None, | ||||
'cache_grammar': False, | |||||
'cache': False, | |||||
'postlex': None, | 'postlex': None, | ||||
'parser': 'earley', | 'parser': 'earley', | ||||
'lexer': 'auto', | 'lexer': 'auto', | ||||
@@ -102,7 +106,7 @@ class LarkOptions(Serialize): | |||||
for name, default in self._defaults.items(): | for name, default in self._defaults.items(): | ||||
if name in o: | if name in o: | ||||
value = o.pop(name) | value = o.pop(name) | ||||
if isinstance(default, bool): | |||||
if isinstance(default, bool) and name != 'cache': | |||||
value = bool(value) | value = bool(value) | ||||
else: | else: | ||||
value = default | value = default | ||||
@@ -147,6 +151,7 @@ class Lark(Serialize): | |||||
grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax) | grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax) | ||||
options : a dictionary controlling various aspects of Lark. | options : a dictionary controlling various aspects of Lark. | ||||
""" | """ | ||||
self.options = LarkOptions(options) | self.options = LarkOptions(options) | ||||
# Some, but not all file-like objects have a 'name' attribute | # Some, but not all file-like objects have a 'name' attribute | ||||
@@ -165,8 +170,24 @@ class Lark(Serialize): | |||||
assert isinstance(grammar, STRING_TYPE) | assert isinstance(grammar, STRING_TYPE) | ||||
if self.options.cache_grammar: | |||||
raise NotImplementedError("Not available yet") | |||||
cache_fn = None | |||||
if self.options.cache: | |||||
if isinstance(self.options.cache, STRING_TYPE): | |||||
cache_fn = self.options.cache | |||||
else: | |||||
if self.options.cache is not True: | |||||
raise ValueError("cache must be bool or str") | |||||
unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals') | |||||
options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) | |||||
s = grammar + options_str | |||||
md5 = hashlib.md5(s.encode()).hexdigest() | |||||
cache_fn = '.lark_cache_%s.tmp' % md5 | |||||
if FS.exists(cache_fn): | |||||
logging.debug('Loading grammar from cache: %s', cache_fn) | |||||
with FS.open(cache_fn, 'rb') as f: | |||||
self._load(f, self.options.transformer, self.options.postlex) | |||||
return | |||||
if self.options.lexer == 'auto': | if self.options.lexer == 'auto': | ||||
if self.options.parser == 'lalr': | if self.options.parser == 'lalr': | ||||
@@ -241,6 +262,11 @@ class Lark(Serialize): | |||||
elif lexer: | elif lexer: | ||||
self.lexer = self._build_lexer() | self.lexer = self._build_lexer() | ||||
if cache_fn: | |||||
logging.debug('Saving grammar to cache: %s', cache_fn) | |||||
with FS.open(cache_fn, 'wb') as f: | |||||
self.save(f) | |||||
if __init__.__doc__: | if __init__.__doc__: | ||||
__init__.__doc__ += "\nOptions:\n" + LarkOptions.OPTIONS_DOC | __init__.__doc__ += "\nOptions:\n" + LarkOptions.OPTIONS_DOC | ||||
@@ -259,34 +285,41 @@ class Lark(Serialize): | |||||
parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) | parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) | ||||
return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | ||||
def save(self, f): | |||||
data, m = self.memo_serialize([TerminalDef, Rule]) | |||||
pickle.dump({'data': data, 'memo': m}, f) | |||||
@classmethod | @classmethod | ||||
def deserialize(cls, data, namespace, memo, transformer=None, postlex=None): | |||||
if memo: | |||||
memo = SerializeMemoizer.deserialize(memo, namespace, {}) | |||||
def load(cls, f): | |||||
inst = cls.__new__(cls) | inst = cls.__new__(cls) | ||||
return inst._load(f) | |||||
def _load(self, f, transformer=None, postlex=None): | |||||
if isinstance(f, dict): | |||||
d = f | |||||
else: | |||||
d = pickle.load(f) | |||||
memo = d['memo'] | |||||
data = d['data'] | |||||
assert memo | |||||
memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) | |||||
options = dict(data['options']) | options = dict(data['options']) | ||||
if transformer is not None: | if transformer is not None: | ||||
options['transformer'] = transformer | options['transformer'] = transformer | ||||
if postlex is not None: | if postlex is not None: | ||||
options['postlex'] = postlex | options['postlex'] = postlex | ||||
inst.options = LarkOptions.deserialize(options, memo) | |||||
inst.rules = [Rule.deserialize(r, memo) for r in data['rules']] | |||||
inst.source = '<deserialized>' | |||||
inst._prepare_callbacks() | |||||
inst.parser = inst.parser_class.deserialize(data['parser'], memo, inst._callbacks, inst.options.postlex) | |||||
return inst | |||||
def save(self, f): | |||||
data, m = self.memo_serialize([TerminalDef, Rule]) | |||||
pickle.dump({'data': data, 'memo': m}, f) | |||||
self.options = LarkOptions.deserialize(options, memo) | |||||
self.rules = [Rule.deserialize(r, memo) for r in data['rules']] | |||||
self.source = '<deserialized>' | |||||
self._prepare_callbacks() | |||||
self.parser = self.parser_class.deserialize(data['parser'], memo, self._callbacks, self.options.postlex) | |||||
return self | |||||
@classmethod | @classmethod | ||||
def load(cls, f): | |||||
d = pickle.load(f) | |||||
namespace = {'Rule': Rule, 'TerminalDef': TerminalDef} | |||||
memo = d['memo'] | |||||
return Lark.deserialize(d['data'], namespace, memo) | |||||
def _load_from_dict(cls, data, memo, transformer=None, postlex=None): | |||||
inst = cls.__new__(cls) | |||||
return inst._load({'data': data, 'memo': memo}, transformer, postlex) | |||||
@classmethod | @classmethod | ||||
def open(cls, grammar_filename, rel_to=None, **options): | def open(cls, grammar_filename, rel_to=None, **options): | ||||
@@ -106,8 +106,7 @@ def main(fobj, start): | |||||
print('Shift = 0') | print('Shift = 0') | ||||
print('Reduce = 1') | print('Reduce = 1') | ||||
print("def Lark_StandAlone(transformer=None, postlex=None):") | print("def Lark_StandAlone(transformer=None, postlex=None):") | ||||
print(" namespace = {'Rule': Rule, 'TerminalDef': TerminalDef}") | |||||
print(" return Lark.deserialize(DATA, namespace, MEMO, transformer=transformer, postlex=postlex)") | |||||
print(" return Lark._load_from_dict(DATA, MEMO, transformer=transformer, postlex=postlex)") | |||||
@@ -1,4 +1,5 @@ | |||||
import sys | import sys | ||||
import os | |||||
from functools import reduce | from functools import reduce | ||||
from ast import literal_eval | from ast import literal_eval | ||||
from collections import deque | from collections import deque | ||||
@@ -37,9 +38,6 @@ def bfs(initial, expand): | |||||
def _serialize(value, memo): | def _serialize(value, memo): | ||||
# if memo and memo.in_types(value): | |||||
# return {'__memo__': memo.memoized.get(value)} | |||||
if isinstance(value, Serialize): | if isinstance(value, Serialize): | ||||
return value.serialize(memo) | return value.serialize(memo) | ||||
elif isinstance(value, list): | elif isinstance(value, list): | ||||
@@ -287,3 +285,9 @@ def combine_alternatives(lists): | |||||
assert all(l for l in lists), lists | assert all(l for l in lists), lists | ||||
init = [[x] for x in lists[0]] | init = [[x] for x in lists[0]] | ||||
return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init) | return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init) | ||||
class FS: | |||||
open = open | |||||
exists = os.path.exists |
@@ -33,7 +33,7 @@ class LarkOptions: | |||||
propagate_positions: bool | propagate_positions: bool | ||||
maybe_placeholders: bool | maybe_placeholders: bool | ||||
lexer_callbacks: Dict[str, Callable[[Token], Token]] | lexer_callbacks: Dict[str, Callable[[Token], Token]] | ||||
cache_grammar: bool | |||||
cache: Union[bool, str] | |||||
g_regex_flags: int | g_regex_flags: int | ||||
@@ -5,6 +5,7 @@ import logging | |||||
from .test_trees import TestTrees | from .test_trees import TestTrees | ||||
from .test_tools import TestStandalone | from .test_tools import TestStandalone | ||||
from .test_cache import TestCache | |||||
from .test_reconstructor import TestReconstructor | from .test_reconstructor import TestReconstructor | ||||
try: | try: | ||||
@@ -0,0 +1,82 @@ | |||||
from __future__ import absolute_import | |||||
import sys | |||||
from unittest import TestCase, main | |||||
from lark import Lark, Tree | |||||
import lark.lark as lark_module | |||||
try: | |||||
from StringIO import StringIO | |||||
except ImportError: | |||||
from io import BytesIO as StringIO | |||||
import tempfile, os | |||||
class MockFile(StringIO): | |||||
def close(self): | |||||
pass | |||||
def __enter__(self): | |||||
return self | |||||
def __exit__(self, *args): | |||||
pass | |||||
class MockFS: | |||||
def __init__(self): | |||||
self.files = {} | |||||
def open(self, name, mode=None): | |||||
if name not in self.files: | |||||
f = self.files[name] = MockFile() | |||||
else: | |||||
f = self.files[name] | |||||
f.seek(0) | |||||
return f | |||||
def exists(self, name): | |||||
return name in self.files | |||||
class TestCache(TestCase): | |||||
def setUp(self): | |||||
pass | |||||
def test_simple(self): | |||||
g = '''start: "a"''' | |||||
fn = "bla" | |||||
fs = lark_module.FS | |||||
mock_fs = MockFS() | |||||
try: | |||||
lark_module.FS = mock_fs | |||||
Lark(g, parser='lalr', cache=fn) | |||||
assert fn in mock_fs.files | |||||
parser = Lark(g, parser='lalr', cache=fn) | |||||
assert parser.parse('a') == Tree('start', []) | |||||
mock_fs.files = {} | |||||
assert len(mock_fs.files) == 0 | |||||
Lark(g, parser='lalr', cache=True) | |||||
assert len(mock_fs.files) == 1 | |||||
parser = Lark(g, parser='lalr', cache=True) | |||||
assert parser.parse('a') == Tree('start', []) | |||||
parser = Lark(g+' "b"', parser='lalr', cache=True) | |||||
assert len(mock_fs.files) == 2 | |||||
assert parser.parse('ab') == Tree('start', []) | |||||
parser = Lark(g, parser='lalr', cache=True) | |||||
assert parser.parse('a') == Tree('start', []) | |||||
finally: | |||||
lark_module.FS = fs | |||||
if __name__ == '__main__': | |||||
main() | |||||
@@ -14,6 +14,7 @@ except ImportError: | |||||
cStringIO = None | cStringIO = None | ||||
from io import ( | from io import ( | ||||
StringIO as uStringIO, | StringIO as uStringIO, | ||||
BytesIO, | |||||
open, | open, | ||||
) | ) | ||||
@@ -26,6 +27,8 @@ from lark.visitors import Transformer, Transformer_InPlace, v_args | |||||
from lark.grammar import Rule | from lark.grammar import Rule | ||||
from lark.lexer import TerminalDef, Lexer, TraditionalLexer | from lark.lexer import TerminalDef, Lexer, TraditionalLexer | ||||
__path__ = os.path.dirname(__file__) | __path__ = os.path.dirname(__file__) | ||||
def _read(n, *args): | def _read(n, *args): | ||||
with open(os.path.join(__path__, n), *args) as f: | with open(os.path.join(__path__, n), *args) as f: | ||||
@@ -873,7 +876,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
self.assertSequenceEqual(x.children, [Tree('expr', [])]) | self.assertSequenceEqual(x.children, [Tree('expr', [])]) | ||||
x = g.parse("BC") | x = g.parse("BC") | ||||
self.assertSequenceEqual(x.children, [Tree('b', [])]) | self.assertSequenceEqual(x.children, [Tree('b', [])]) | ||||
def test_templates_modifiers(self): | def test_templates_modifiers(self): | ||||
g = _Lark(r""" | g = _Lark(r""" | ||||
start: expr{"B"} | start: expr{"B"} | ||||
@@ -1736,15 +1739,12 @@ def _make_parser_test(LEXER, PARSER): | |||||
b: "B" | b: "B" | ||||
""" | """ | ||||
parser = _Lark(grammar) | parser = _Lark(grammar) | ||||
d = parser.serialize() | |||||
parser2 = Lark.deserialize(d, {}, {}) | |||||
s = BytesIO() | |||||
parser.save(s) | |||||
s.seek(0) | |||||
parser2 = Lark.load(s) | |||||
self.assertEqual(parser2.parse('ABC'), Tree('start', [Tree('b', [])]) ) | self.assertEqual(parser2.parse('ABC'), Tree('start', [Tree('b', [])]) ) | ||||
namespace = {'Rule': Rule, 'TerminalDef': TerminalDef} | |||||
d, m = parser.memo_serialize(namespace.values()) | |||||
parser3 = Lark.deserialize(d, namespace, m) | |||||
self.assertEqual(parser3.parse('ABC'), Tree('start', [Tree('b', [])]) ) | |||||
def test_multi_start(self): | def test_multi_start(self): | ||||
parser = _Lark(''' | parser = _Lark(''' | ||||
a: "x" "a"? | a: "x" "a"? | ||||