From 704e7552bd23a71b06ba087b056d874db1649891 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Fri, 9 Apr 2021 17:23:19 +0200 Subject: [PATCH 1/4] Added support for atomicwrites --- lark-stubs/lark.pyi | 2 ++ lark/lark.py | 22 ++++++++++++++++++---- lark/utils.py | 13 ++++++++++++- setup.py | 3 ++- 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/lark-stubs/lark.pyi b/lark-stubs/lark.pyi index fba567b..69c7375 100644 --- a/lark-stubs/lark.pyi +++ b/lark-stubs/lark.pyi @@ -38,6 +38,7 @@ class LarkOptions: use_bytes: bool import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] source_path: Optional[str] + safe_cache: Literal[False, True, "atomic"] class PackageResource(object): @@ -81,6 +82,7 @@ class Lark: use_bytes: bool = False, import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] = ..., source_path: Optional[str]=None, + safe_cache: Literal[False, True, "atomic"]=True, ): ... diff --git a/lark/lark.py b/lark/lark.py index ba98d16..238d77a 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -23,6 +23,11 @@ try: import regex except ImportError: regex = None +try: + import atomicwrites +except ImportError: + atomicwrites = None + ###{standalone @@ -100,7 +105,11 @@ class LarkOptions(Serialize): A List of either paths or loader functions to specify from where grammars are imported source_path Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading - + safe_cache + Controls how exactly the cache is saved & verified + - False: simple read/write, no extend file checking + - True: use atomicwrites if available and check if any of the imported files were modified + - "atomic": same as True, but require atomicwrites to be installed **=== End Options ===** """ if __doc__: @@ -136,6 +145,7 @@ class LarkOptions(Serialize): 'use_bytes': False, 'import_paths': [], 'source_path': None, + 'safe_cache': True, } def __init__(self, options_dict): @@ -145,7 +155,7 @@ class LarkOptions(Serialize): for name, default in self._defaults.items(): if name in o: value = o.pop(name) - if isinstance(default, bool) and name not in ('cache', 'use_bytes'): + if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'safe_cache'): value = bool(value) else: value = default @@ -258,11 +268,15 @@ class Lark(Serialize): if self.options.cache: if self.options.parser != 'lalr': raise ConfigurationError("cache only works with parser='lalr' for now") + + if self.options.safe_cache == "atomic": + if not atomicwrites: + raise ConfigurationError("safe_cache='atomic' requires atomicwrites to be installed") unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals') options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) from . import __version__ - s = grammar + options_str + __version__ + s = grammar + options_str + __version__ + str(sys.version_info[:2]) cache_md5 = hashlib.md5(s.encode()).hexdigest() if isinstance(self.options.cache, STRING_TYPE): @@ -270,7 +284,7 @@ class Lark(Serialize): else: if self.options.cache is not True: raise ConfigurationError("cache argument must be bool or str") - cache_fn = tempfile.gettempdir() + '/.lark_cache_%s.tmp' % cache_md5 + cache_fn = tempfile.gettempdir() + '/.lark_cache_%s_%s_%s.tmp' % (cache_md5, *sys.version_info[:2]) if FS.exists(cache_fn): logger.debug('Loading grammar from cache: %s', cache_fn) diff --git a/lark/utils.py b/lark/utils.py index f3bd957..1cd8bac 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -281,9 +281,20 @@ def combine_alternatives(lists): return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init) +try: + import atomicwrites +except ImportError: + atomicwrites = None + class FS: - open = open exists = os.path.exists + + @staticmethod + def open(name, mode="r", **kwargs): + if atomicwrites and "w" in mode: + return atomicwrites.atomic_write(name, mode=mode, override=True, **kwargs) + else: + return open(name, mode, **kwargs) def isascii(s): diff --git a/setup.py b/setup.py index b3897c5..0dc3784 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,8 @@ setup( extras_require = { "regex": ["regex"], - "nearley": ["js2py"] + "nearley": ["js2py"], + "atomicwrites": ["atomicwrites"], }, package_data = {'': ['*.md', '*.lark'], 'lark-stubs': ['*.pyi']}, From 0f4ca60d83beec511e766da64af725b2d6af49be Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Fri, 9 Apr 2021 18:15:33 +0200 Subject: [PATCH 2/4] Added support for verifying imported files --- lark-stubs/load_grammar.pyi | 3 +- lark/lark.py | 32 ++++++--- lark/load_grammar.py | 13 ++-- lark/utils.py | 12 ++++ tests/test_cache.py | 133 ++++++++++++++++++++---------------- 5 files changed, 117 insertions(+), 76 deletions(-) diff --git a/lark-stubs/load_grammar.pyi b/lark-stubs/load_grammar.pyi index 7202fa8..86a6341 100644 --- a/lark-stubs/load_grammar.pyi +++ b/lark-stubs/load_grammar.pyi @@ -14,8 +14,9 @@ class Grammar: class GrammarBuilder: global_keep_all_tokens: bool import_paths: List[Union[str, Callable]] + used_files: Dict[str, str] - def __init__(self, global_keep_all_tokens: bool = False, import_paths: List[Union[str, Callable]] = None) -> None: ... + def __init__(self, global_keep_all_tokens: bool = False, import_paths: List[Union[str, Callable]] = None, used_files: Dict[str, str]=None) -> None: ... def load_grammar(self, grammar_text: str, grammar_name: str = ..., mangle: Callable[[str], str] = None) -> None: ... diff --git a/lark/lark.py b/lark/lark.py index 238d77a..f5407db 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -8,7 +8,7 @@ from io import open import tempfile from warnings import warn -from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger, ABC, abstractmethod +from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger, ABC, abstractmethod, verify_used_files from .load_grammar import load_grammar, FromPackageLoader, Grammar from .tree import Tree from .common import LexerConf, ParserConf @@ -277,14 +277,15 @@ class Lark(Serialize): options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) from . import __version__ s = grammar + options_str + __version__ + str(sys.version_info[:2]) - cache_md5 = hashlib.md5(s.encode()).hexdigest() + cache_md5 = hashlib.md5(s.encode('utf8')).hexdigest() if isinstance(self.options.cache, STRING_TYPE): cache_fn = self.options.cache else: if self.options.cache is not True: raise ConfigurationError("cache argument must be bool or str") - cache_fn = tempfile.gettempdir() + '/.lark_cache_%s_%s_%s.tmp' % (cache_md5, *sys.version_info[:2]) + # Python2.7 doesn't support * syntax in tuples + cache_fn = tempfile.gettempdir() + '/.lark_cache_%s_%s_%s.tmp' % ((cache_md5,) + sys.version_info[:2]) if FS.exists(cache_fn): logger.debug('Loading grammar from cache: %s', cache_fn) @@ -293,16 +294,23 @@ class Lark(Serialize): del options[name] with FS.open(cache_fn, 'rb') as f: file_md5 = f.readline().rstrip(b'\n') - if file_md5 == cache_md5.encode(): - try: - self._load(f, **options) - except Exception: - raise RuntimeError("Failed to load Lark from cache: %r. Try to delete the file and run again." % cache_fn) - return + if file_md5 == cache_md5.encode('utf8'): + if (not self.options.safe_cache) or verify_used_files(pickle.load(f)): + old_options = self.options + try: + self._load(f, **options) + except Exception: # We should probably narrow done which errors we catch here. + logger.exception("Failed to load Lark from cache: %r. We will try to carry on." % cache_fn) + + # In theory, the Lark instance might have been messed up by the call to `_load`. + # In practice the only relevant thing that might have been overriden should be `options` + self.options = old_options + else: + return # Parse the grammar file and compose the grammars - self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) + self.grammar, used_files = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) else: assert isinstance(grammar, Grammar) self.grammar = grammar @@ -387,7 +395,9 @@ class Lark(Serialize): if cache_fn: logger.debug('Saving grammar to cache: %s', cache_fn) with FS.open(cache_fn, 'wb') as f: - f.write(b'%s\n' % cache_md5.encode()) + f.write(b'%s\n' % cache_md5.encode('utf8')) + if self.options.safe_cache: + pickle.dump(used_files, f) self.save(f) if __doc__: diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 63369a9..830fc02 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -1,5 +1,5 @@ """Parses and creates Grammar objects""" - +import hashlib import os.path import sys from copy import copy, deepcopy @@ -931,9 +931,10 @@ def _mangle_exp(exp, mangle): class GrammarBuilder: - def __init__(self, global_keep_all_tokens=False, import_paths=None): + def __init__(self, global_keep_all_tokens=False, import_paths=None, used_files=None): self.global_keep_all_tokens = global_keep_all_tokens self.import_paths = import_paths or [] + self.used_files = used_files or {} self._definitions = {} self._ignore_names = [] @@ -1150,10 +1151,14 @@ class GrammarBuilder: joined_path = os.path.join(source, grammar_path) with open(joined_path, encoding='utf8') as f: text = f.read() + h = hashlib.md5(text.encode('utf8')).hexdigest() + if self.used_files.get(joined_path, h) != h: + raise RuntimeError("Grammar file was changed during importing") + self.used_files[joined_path] = h except IOError: continue else: - gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths) + gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths, self.used_files) gb.load_grammar(text, joined_path, mangle) gb._remove_unused(map(mangle, aliases)) for name in gb._definitions: @@ -1213,4 +1218,4 @@ class GrammarBuilder: def load_grammar(grammar, source, import_paths, global_keep_all_tokens): builder = GrammarBuilder(global_keep_all_tokens, import_paths) builder.load_grammar(grammar, source) - return builder.build() + return builder.build(), builder.used_files diff --git a/lark/utils.py b/lark/utils.py index 1cd8bac..f399b08 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -1,3 +1,4 @@ +import hashlib import unicodedata import os from functools import reduce @@ -6,6 +7,7 @@ from collections import deque ###{standalone import sys, re import logging +from io import open logger = logging.getLogger("lark") logger.addHandler(logging.StreamHandler()) # Set to highest level, since we have some warnings amongst the code @@ -296,6 +298,16 @@ class FS: else: return open(name, mode, **kwargs) +def verify_used_files(file_hashes): + for path, old in file_hashes.items(): + with open(path, encoding='utf8') as f: + text = f.read() + current = hashlib.md5(text.encode()).hexdigest() + if old != current: + logger.info("File %r changed, rebuilding Parser" % path) + return False + return True + def isascii(s): """ str.isascii only exists in python3.7+ """ diff --git a/tests/test_cache.py b/tests/test_cache.py index af0d295..3b6f202 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -58,71 +58,84 @@ def append_zero(t): class TestCache(TestCase): + g = '''start: "a"''' + + def setUp(self): - pass + self.fs = lark_module.FS + self.mock_fs = MockFS() + lark_module.FS = self.mock_fs + + def tearDown(self): + self.mock_fs.files = {} + lark_module.FS = self.fs def test_simple(self): - g = '''start: "a"''' - fn = "bla" - fs = lark_module.FS - mock_fs = MockFS() - try: - lark_module.FS = mock_fs - Lark(g, parser='lalr', cache=fn) - assert fn in mock_fs.files - parser = Lark(g, parser='lalr', cache=fn) - assert parser.parse('a') == Tree('start', []) - - mock_fs.files = {} - assert len(mock_fs.files) == 0 - Lark(g, parser='lalr', cache=True) - assert len(mock_fs.files) == 1 - parser = Lark(g, parser='lalr', cache=True) - assert parser.parse('a') == Tree('start', []) - - parser = Lark(g + ' "b"', parser='lalr', cache=True) - assert len(mock_fs.files) == 2 - assert parser.parse('ab') == Tree('start', []) - - parser = Lark(g, parser='lalr', cache=True) - assert parser.parse('a') == Tree('start', []) - - # Test with custom lexer - mock_fs.files = {} - parser = Lark(g, parser='lalr', lexer=CustomLexer, cache=True) - parser = Lark(g, parser='lalr', lexer=CustomLexer, cache=True) - assert len(mock_fs.files) == 1 - assert parser.parse('a') == Tree('start', []) - - # Test options persistence - mock_fs.files = {} - Lark(g, parser="lalr", debug=True, cache=True) - parser = Lark(g, parser="lalr", debug=True, cache=True) - assert parser.options.options['debug'] - - # Test inline transformer (tree-less) & lexer_callbacks - mock_fs.files = {} - g = """ - start: add+ - add: NUM "+" NUM - NUM: /\d+/ - %ignore " " - """ - text = "1+2 3+4" - expected = Tree('start', [30, 70]) - - parser = Lark(g, parser='lalr', transformer=TestT(), cache=True, lexer_callbacks={'NUM': append_zero}) - res0 = parser.parse(text) - parser = Lark(g, parser='lalr', transformer=TestT(), cache=True, lexer_callbacks={'NUM': append_zero}) - assert len(mock_fs.files) == 1 - res1 = parser.parse(text) - res2 = TestT().transform(Lark(g, parser="lalr", cache=True, lexer_callbacks={'NUM': append_zero}).parse(text)) - assert res0 == res1 == res2 == expected - - finally: - lark_module.FS = fs + Lark(self.g, parser='lalr', cache=fn) + assert fn in self.mock_fs.files + parser = Lark(self.g, parser='lalr', cache=fn) + assert parser.parse('a') == Tree('start', []) + + def test_automatic_naming(self): + assert len(self.mock_fs.files) == 0 + Lark(self.g, parser='lalr', cache=True) + assert len(self.mock_fs.files) == 1 + parser = Lark(self.g, parser='lalr', cache=True) + assert parser.parse('a') == Tree('start', []) + + parser = Lark(self.g + ' "b"', parser='lalr', cache=True) + assert len(self.mock_fs.files) == 2 + assert parser.parse('ab') == Tree('start', []) + + parser = Lark(self.g, parser='lalr', cache=True) + assert parser.parse('a') == Tree('start', []) + + def test_custom_lexer(self): + + parser = Lark(self.g, parser='lalr', lexer=CustomLexer, cache=True) + parser = Lark(self.g, parser='lalr', lexer=CustomLexer, cache=True) + assert len(self.mock_fs.files) == 1 + assert parser.parse('a') == Tree('start', []) + + def test_options(self): + # Test options persistence + Lark(self.g, parser="lalr", debug=True, cache=True) + parser = Lark(self.g, parser="lalr", debug=True, cache=True) + assert parser.options.options['debug'] + + def test_inline(self): + # Test inline transformer (tree-less) & lexer_callbacks + g = """ + start: add+ + add: NUM "+" NUM + NUM: /\d+/ + %ignore " " + """ + text = "1+2 3+4" + expected = Tree('start', [30, 70]) + + parser = Lark(g, parser='lalr', transformer=TestT(), cache=True, lexer_callbacks={'NUM': append_zero}) + res0 = parser.parse(text) + parser = Lark(g, parser='lalr', transformer=TestT(), cache=True, lexer_callbacks={'NUM': append_zero}) + assert len(self.mock_fs.files) == 1 + res1 = parser.parse(text) + res2 = TestT().transform(Lark(g, parser="lalr", cache=True, lexer_callbacks={'NUM': append_zero}).parse(text)) + assert res0 == res1 == res2 == expected + + def test_imports(self): + g = """ + %import .grammars.ab (startab, expr) + """ + parser = Lark(g, parser='lalr', start='startab', cache=True) + assert len(self.mock_fs.files) == 1 + parser = Lark(g, parser='lalr', start='startab', cache=True) + assert len(self.mock_fs.files) == 1 + res = parser.parse("ab") + self.assertEqual(res, Tree('startab', [Tree('expr', ['a', 'b'])])) + + if __name__ == '__main__': From 2eb5c746ffd61c078c2a061611eba95e9b394267 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Fri, 16 Apr 2021 13:24:07 +0200 Subject: [PATCH 3/4] Removed safe_cache option --- lark-stubs/lark.pyi | 2 -- lark/lark.py | 40 ++++++++++++++-------------------------- 2 files changed, 14 insertions(+), 28 deletions(-) diff --git a/lark-stubs/lark.pyi b/lark-stubs/lark.pyi index 69c7375..fba567b 100644 --- a/lark-stubs/lark.pyi +++ b/lark-stubs/lark.pyi @@ -38,7 +38,6 @@ class LarkOptions: use_bytes: bool import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] source_path: Optional[str] - safe_cache: Literal[False, True, "atomic"] class PackageResource(object): @@ -82,7 +81,6 @@ class Lark: use_bytes: bool = False, import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] = ..., source_path: Optional[str]=None, - safe_cache: Literal[False, True, "atomic"]=True, ): ... diff --git a/lark/lark.py b/lark/lark.py index f5407db..1839a87 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -105,11 +105,6 @@ class LarkOptions(Serialize): A List of either paths or loader functions to specify from where grammars are imported source_path Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading - safe_cache - Controls how exactly the cache is saved & verified - - False: simple read/write, no extend file checking - - True: use atomicwrites if available and check if any of the imported files were modified - - "atomic": same as True, but require atomicwrites to be installed **=== End Options ===** """ if __doc__: @@ -145,7 +140,6 @@ class LarkOptions(Serialize): 'use_bytes': False, 'import_paths': [], 'source_path': None, - 'safe_cache': True, } def __init__(self, options_dict): @@ -155,7 +149,7 @@ class LarkOptions(Serialize): for name, default in self._defaults.items(): if name in o: value = o.pop(name) - if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'safe_cache'): + if isinstance(default, bool) and name not in ('cache', 'use_bytes'): value = bool(value) else: value = default @@ -268,10 +262,6 @@ class Lark(Serialize): if self.options.cache: if self.options.parser != 'lalr': raise ConfigurationError("cache only works with parser='lalr' for now") - - if self.options.safe_cache == "atomic": - if not atomicwrites: - raise ConfigurationError("safe_cache='atomic' requires atomicwrites to be installed") unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals') options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) @@ -294,19 +284,18 @@ class Lark(Serialize): del options[name] with FS.open(cache_fn, 'rb') as f: file_md5 = f.readline().rstrip(b'\n') - if file_md5 == cache_md5.encode('utf8'): - if (not self.options.safe_cache) or verify_used_files(pickle.load(f)): - old_options = self.options - try: - self._load(f, **options) - except Exception: # We should probably narrow done which errors we catch here. - logger.exception("Failed to load Lark from cache: %r. We will try to carry on." % cache_fn) - - # In theory, the Lark instance might have been messed up by the call to `_load`. - # In practice the only relevant thing that might have been overriden should be `options` - self.options = old_options - else: - return + if file_md5 == cache_md5.encode('utf8') and verify_used_files(pickle.load(f)): + old_options = self.options + try: + self._load(f, **options) + except Exception: # We should probably narrow done which errors we catch here. + logger.exception("Failed to load Lark from cache: %r. We will try to carry on." % cache_fn) + + # In theory, the Lark instance might have been messed up by the call to `_load`. + # In practice the only relevant thing that might have been overriden should be `options` + self.options = old_options + else: + return # Parse the grammar file and compose the grammars @@ -396,8 +385,7 @@ class Lark(Serialize): logger.debug('Saving grammar to cache: %s', cache_fn) with FS.open(cache_fn, 'wb') as f: f.write(b'%s\n' % cache_md5.encode('utf8')) - if self.options.safe_cache: - pickle.dump(used_files, f) + pickle.dump(used_files, f) self.save(f) if __doc__: From 97c3202973df6b3a414a92b4f4e662e24453c1fa Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Fri, 16 Apr 2021 13:50:25 +0200 Subject: [PATCH 4/4] Make verify_used_files work with stdlib. --- lark/lark.py | 4 ++-- lark/load_grammar.py | 43 ++++++++++++++++++++++++++----------------- lark/utils.py | 9 --------- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/lark/lark.py b/lark/lark.py index 1839a87..d55591a 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -8,8 +8,8 @@ from io import open import tempfile from warnings import warn -from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger, ABC, abstractmethod, verify_used_files -from .load_grammar import load_grammar, FromPackageLoader, Grammar +from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger, ABC, abstractmethod +from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files from .tree import Tree from .common import LexerConf, ParserConf diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 830fc02..ea5e390 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -2,6 +2,7 @@ import hashlib import os.path import sys +from collections import namedtuple from copy import copy, deepcopy from io import open import pkgutil @@ -673,19 +674,7 @@ class Grammar: return terminals, compiled_rules, self.ignore -class PackageResource(object): - """ - Represents a path inside a Package. Used by `FromPackageLoader` - """ - def __init__(self, pkg_name, path): - self.pkg_name = pkg_name - self.path = path - - def __str__(self): - return "<%s: %s>" % (self.pkg_name, self.path) - - def __repr__(self): - return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.path) +PackageResource = namedtuple('PackageResource', 'pkg_name path') class FromPackageLoader(object): @@ -1151,13 +1140,14 @@ class GrammarBuilder: joined_path = os.path.join(source, grammar_path) with open(joined_path, encoding='utf8') as f: text = f.read() - h = hashlib.md5(text.encode('utf8')).hexdigest() - if self.used_files.get(joined_path, h) != h: - raise RuntimeError("Grammar file was changed during importing") - self.used_files[joined_path] = h except IOError: continue else: + h = hashlib.md5(text.encode('utf8')).hexdigest() + if self.used_files.get(joined_path, h) != h: + raise RuntimeError("Grammar file was changed during importing") + self.used_files[joined_path] = h + gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths, self.used_files) gb.load_grammar(text, joined_path, mangle) gb._remove_unused(map(mangle, aliases)) @@ -1215,6 +1205,25 @@ class GrammarBuilder: # resolve_term_references(term_defs) return Grammar(rule_defs, term_defs, self._ignore_names) + +def verify_used_files(file_hashes): + for path, old in file_hashes.items(): + text = None + if isinstance(path, str) and os.path.exists(path): + with open(path, encoding='utf8') as f: + text = f.read() + elif isinstance(path, PackageResource): + with suppress(IOError): + text = pkgutil.get_data(*path).decode('utf-8') + if text is None: # We don't know how to load the path. ignore it. + continue + + current = hashlib.md5(text.encode()).hexdigest() + if old != current: + logger.info("File %r changed, rebuilding Parser" % path) + return False + return True + def load_grammar(grammar, source, import_paths, global_keep_all_tokens): builder = GrammarBuilder(global_keep_all_tokens, import_paths) builder.load_grammar(grammar, source) diff --git a/lark/utils.py b/lark/utils.py index f399b08..c9bdf88 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -298,15 +298,6 @@ class FS: else: return open(name, mode, **kwargs) -def verify_used_files(file_hashes): - for path, old in file_hashes.items(): - with open(path, encoding='utf8') as f: - text = f.read() - current = hashlib.md5(text.encode()).hexdigest() - if old != current: - logger.info("File %r changed, rebuilding Parser" % path) - return False - return True def isascii(s):