From a50fc107737f7a89e60711d7be1f186e1c24b1fb Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Tue, 29 Sep 2020 15:24:00 +0200 Subject: [PATCH] import_paths->sources, source->source_path, various implementation changes --- lark-stubs/lark.pyi | 14 ++++----- lark/lark.py | 49 ++++++++++++++++------------- lark/load_grammar.py | 75 +++++++++++++++++++++++++------------------- tests/test_parser.py | 6 ++-- 4 files changed, 81 insertions(+), 63 deletions(-) diff --git a/lark-stubs/lark.pyi b/lark-stubs/lark.pyi index 5cb94b2..96eeda4 100644 --- a/lark-stubs/lark.pyi +++ b/lark-stubs/lark.pyi @@ -32,19 +32,19 @@ class LarkOptions: cache: Union[bool, str] g_regex_flags: int use_bytes: bool - import_sources: List[Union[str, Callable[[str, str], str]]] - source: Optional[str] + import_paths: List[Union[str, Callable[[Optional[str], str], Tuple[str, str]]]] + source_path: Optional[str] class FromPackageLoader: def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... - def __call__(self, base_paths: List[str], grammar_path: str) -> Tuple[str, str]: ... + def __call__(self, base_paths: str, grammar_path: str) -> Tuple[str, str]: ... class Lark: - source: str - grammar_source: str + source_path: str + source_code: str options: LarkOptions lexer: Lexer terminals: List[TerminalDef] @@ -68,8 +68,8 @@ class Lark: cache: Union[bool, str] = False, g_regex_flags: int = ..., use_bytes: bool = False, - import_sources: List[Union[str, Callable[[List[str], str], Tuple[str, str]]]] = ..., - source: Optional[str], + import_paths: List[Union[str, Callable[[Optional[str], str], Tuple[str, str]]]] = ..., + source_path: Optional[str], ): ... diff --git a/lark/lark.py b/lark/lark.py index 9f53841..8107e34 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -2,7 +2,7 @@ from __future__ import absolute_import import sys, os, pickle, hashlib from io import open - +from warnings import warn from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger from .load_grammar import load_grammar, FromPackageLoader @@ -90,10 +90,10 @@ class LarkOptions(Serialize): Accept an input of type ``bytes`` instead of ``str`` (Python 3 only). edit_terminals A callback for editing the terminals before parse. - import_sources + import_paths A List of either paths or loader functions to specify from where grammars are imported - source - Override the source of from where the grammar was loaded. Usefull for relative imports and unconventional grammar loading + source_path + Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading **=== End Options ===** """ @@ -119,8 +119,8 @@ class LarkOptions(Serialize): 'edit_terminals': None, 'g_regex_flags': 0, 'use_bytes': False, - 'import_sources': [], - 'source': None, + 'import_paths': [], + 'source_path': None, } def __init__(self, options_dict): @@ -196,13 +196,13 @@ class Lark(Serialize): re_module = re # Some, but not all file-like objects have a 'name' attribute - if self.options.source is None: + if self.options.source_path is None: try: - self.source = grammar.name + self.source_path = grammar.name except AttributeError: - self.source = '' + self.source_path = '' else: - self.source = self.options.source + self.source_path = self.options.source_path # Drain file-like objects to get their contents try: @@ -213,7 +213,7 @@ class Lark(Serialize): grammar = read() assert isinstance(grammar, STRING_TYPE) - self.grammar_source = grammar + self.source_code = grammar if self.options.use_bytes: if not isascii(grammar): raise ValueError("Grammar must be ascii only, when use_bytes=True") @@ -276,7 +276,7 @@ class Lark(Serialize): assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', ) # Parse the grammar file and compose the grammars (TODO) - self.grammar = load_grammar(grammar, self.source, re_module, self.options.import_sources) + self.grammar = load_grammar(grammar, self.source_path, re_module, self.options.import_paths) # Compile the EBNF grammar into BNF self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) @@ -374,7 +374,7 @@ class Lark(Serialize): self.options = LarkOptions.deserialize(options, memo) re_module = regex if self.options.regex else re self.rules = [Rule.deserialize(r, memo) for r in data['rules']] - self.source = '' + self.source_path = '' self._prepare_callbacks() self.parser = self.parser_class.deserialize( data['parser'], @@ -416,9 +416,7 @@ class Lark(Serialize): """Create an instance of Lark with the grammar loaded from within the package `package`. This allows grammar loading from zipapps. - Will also create a `FromPackageLoader` instance and add it to the `import_sources` to simplify importing - - ``search_paths`` is passed to `FromPackageLoader` + Imports in the grammar will use the `package` and `search_paths` provided, through `FromPackageLoader` Example: @@ -426,15 +424,15 @@ class Lark(Serialize): """ package = FromPackageLoader(package, search_paths) full_path, text = package([], grammar_path) - options.setdefault('source', full_path) - if 'import_sources' in options: - options['import_sources'].append(package) + options.setdefault('source_path', full_path) + if 'import_paths' in options: + options['import_paths'].append(package) else: - options['import_sources'] = [package] + options['import_paths'] = [package] return cls(text, **options) def __repr__(self): - return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer) + return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer) def lex(self, text): @@ -481,6 +479,15 @@ class Lark(Serialize): # Prevent infinite loop raise e2 e = e2 + + @property + def source(self): + warn("Lark.source attribute has been renamed to Lark.source_path", DeprecationWarning) + return self.source_path + + @source.setter + def source(self, value): + self.source_path = value ###} diff --git a/lark/load_grammar.py b/lark/load_grammar.py index ba1f0f2..022e024 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -666,50 +666,61 @@ class FromPackageLoader(object): def __repr__(self): return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) - def __call__(self, base_paths, grammar_path): - if len(base_paths) == 0: + def __call__(self, base_path, grammar_path): + if base_path is None: to_try = self.search_paths else: - assert len(base_paths) == 1 - if not base_paths[0].startswith('<%s:' % (self.pkg_name,)): + # Check whether or not the importing grammar was loaded by this module. + if not base_path.startswith('<%s:' % (self.pkg_name,)): # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway raise IOError() - base_path = base_paths[0].partition(':')[2] - if base_path and base_path[0] == '/': - base_path = base_path[1:] + # Separate the path and the pkg_name and throw away the slash. `pkgutil.get_data` doesn't like it. (see below) + base_path = base_path.partition(':')[2].lstrip('/') to_try = [base_path] for path in to_try: full_path = os.path.join(path, grammar_path) - text = None - with suppress(IOError): + try: text = pkgutil.get_data(self.pkg_name, full_path) - if text is None: + except IOError: continue - return '<%s:/%s>' % (self.pkg_name, full_path), text.decode() + else: + # Custom format `<{pkg_name}:/{full_path}>` + # These are the arguments to `pkgutil.get_data(pkg_name, full_path)` + # Required since we can not easily provided a actual file path for all package data (e.g. from inside a zip) + + # The additional slash after the `:` is to allow `os.path.split` to work on this without accidentally + # throwing away the `pkg_name`. (As it would inside of `GrammarLoader.load_grammar` otherwise when relative imports + # are resolved. + # Without the slash `""` would turn into `""`, losing the pacakge information + # With the slash `""` turns into `""` into `"' % (self.pkg_name, full_path), text.decode() raise IOError() stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) _imported_grammars = {} -def import_grammar(grammar_path, re_, base_paths=[], import_sources=[]): +def import_grammar(grammar_path, re_, base_path=None, import_paths=[]): if grammar_path not in _imported_grammars: - # import_sources take priority over base_paths since they should handle relative imports and ignore everthing else. + # import_paths take priority over base_path since they should handle relative imports and ignore everything else. # Question: should the stdlib_loader really be pushed to the end? - import_paths = import_sources + base_paths + [stdlib_loader] - for source in import_paths: - text = None - with suppress(IOError): + to_try = import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader] + for source in to_try: + try: if callable(source): - joined_path, text = source(base_paths, grammar_path) + joined_path, text = source(base_path, grammar_path) else: joined_path = os.path.join(source, grammar_path) with open(joined_path, encoding='utf8') as f: text = f.read() - if text is not None: - # Don't load the grammar from within the suppress statement. Otherwise the underlying error message will be swallowed + except IOError: + continue + else: + # Don't load the grammar from within the try statement. Otherwise the underlying error message will be swallowed # and the wrong file will be reported as missing - grammar = load_grammar(text, joined_path, re_, import_sources) + grammar = load_grammar(text, joined_path, re_, import_paths) _imported_grammars[grammar_path] = grammar break else: @@ -868,7 +879,7 @@ class GrammarLoader: self.canonize_tree = CanonizeTree() self.re_module = re_module - def load_grammar(self, grammar_text, grammar_name='', import_sources=[]): + def load_grammar(self, grammar_text, grammar_name='', import_paths=[]): "Parse grammar_text, verify, and create Grammar object. Display nice messages on error." try: @@ -922,7 +933,7 @@ class GrammarLoader: aliases = {name: arg1 or name} # Aliases if exist if path_node.data == 'import_lib': # Import from library - base_paths = [] + base_path = None else: # Relative import if grammar_name == '': # Import relative to script file path if grammar is coded in script try: @@ -932,16 +943,16 @@ class GrammarLoader: else: base_file = grammar_name # Import relative to grammar file path if external grammar file if base_file: - base_paths = [os.path.split(base_file)[0]] + base_path = os.path.split(base_file)[0] else: - base_paths = [os.path.abspath(os.path.curdir)] + base_path = os.path.abspath(os.path.curdir) try: - import_base_paths, import_aliases = imports[dotted_path] - assert base_paths == import_base_paths, 'Inconsistent base_paths for %s.' % '.'.join(dotted_path) + import_base_path, import_aliases = imports[dotted_path] + assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path) import_aliases.update(aliases) except KeyError: - imports[dotted_path] = base_paths, aliases + imports[dotted_path] = base_path, aliases elif stmt.data == 'declare': for t in stmt.children: @@ -950,9 +961,9 @@ class GrammarLoader: assert False, stmt # import grammars - for dotted_path, (base_paths, aliases) in imports.items(): + for dotted_path, (base_path, aliases) in imports.items(): grammar_path = os.path.join(*dotted_path) + EXT - g = import_grammar(grammar_path, self.re_module, base_paths=base_paths, import_sources=import_sources) + g = import_grammar(grammar_path, self.re_module, base_path=base_path, import_paths=import_paths) new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) term_defs += new_td @@ -1032,5 +1043,5 @@ class GrammarLoader: -def load_grammar(grammar, source, re_, import_sources): - return GrammarLoader(re_).load_grammar(grammar, source, import_sources) +def load_grammar(grammar, source, re_, import_paths): + return GrammarLoader(re_).load_grammar(grammar, source, import_paths) diff --git a/tests/test_parser.py b/tests/test_parser.py index 0406f46..6aaee4d 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1792,7 +1792,7 @@ def _make_parser_test(LEXER, PARSER): %import ab.startab """ - p = _Lark(grammar, import_sources=[custom_loader]) + p = _Lark(grammar, import_paths=[custom_loader]) self.assertEqual(p.parse('ab'), Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])])) @@ -1801,7 +1801,7 @@ def _make_parser_test(LEXER, PARSER): %import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import """ - p = _Lark(grammar, import_sources=[custom_loader]) + p = _Lark(grammar, import_paths=[custom_loader]) x = p.parse('N') self.assertEqual(next(x.find_data('rule_to_import')).children, ['N']) @@ -1810,7 +1810,7 @@ def _make_parser_test(LEXER, PARSER): %import .test_relative_import (start, WS) %ignore WS """ - p = _Lark(grammar, import_sources=[custom_loader2]) + p = _Lark(grammar, import_paths=[custom_loader2]) x = p.parse('12 capybaras') self.assertEqual(x.children, ['12', 'capybaras'])