Resolved conflicts manually /ereztags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
| @@ -2,7 +2,7 @@ | |||||
| from typing import ( | from typing import ( | ||||
| TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional, | TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional, | ||||
| Literal, Protocol, Iterable, | |||||
| Literal, Protocol, Tuple, Iterable, | |||||
| ) | ) | ||||
| from .visitors import Transformer | from .visitors import Transformer | ||||
| from .lexer import Token, Lexer, TerminalDef | from .lexer import Token, Lexer, TerminalDef | ||||
| @@ -34,11 +34,25 @@ class LarkOptions: | |||||
| cache: Union[bool, str] | cache: Union[bool, str] | ||||
| g_regex_flags: int | g_regex_flags: int | ||||
| use_bytes: bool | use_bytes: bool | ||||
| import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] | |||||
| source_path: Optional[str] | |||||
| class PackageResource(object): | |||||
| pkg_name: str | |||||
| path: str | |||||
| def __init__(self, pkg_name: str, path: str): | |||||
| class FromPackageLoader: | |||||
| def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... | |||||
| def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: ... | |||||
| class Lark: | class Lark: | ||||
| source: str | |||||
| grammar_source: str | |||||
| source_path: str | |||||
| source_grammar: str | |||||
| options: LarkOptions | options: LarkOptions | ||||
| lexer: Lexer | lexer: Lexer | ||||
| terminals: List[TerminalDef] | terminals: List[TerminalDef] | ||||
| @@ -62,6 +76,8 @@ class Lark: | |||||
| cache: Union[bool, str] = False, | cache: Union[bool, str] = False, | ||||
| g_regex_flags: int = ..., | g_regex_flags: int = ..., | ||||
| use_bytes: bool = False, | use_bytes: bool = False, | ||||
| import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] = ..., | |||||
| source_path: Optional[str]=None, | |||||
| ): | ): | ||||
| ... | ... | ||||
| @@ -71,6 +87,10 @@ class Lark: | |||||
| @classmethod | @classmethod | ||||
| def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: | def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: | ||||
| ... | ... | ||||
| @classmethod | |||||
| def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T: | |||||
| ... | |||||
| def lex(self, text: str) -> Iterator[Token]: | def lex(self, text: str) -> Iterator[Token]: | ||||
| ... | ... | ||||
| @@ -4,10 +4,10 @@ from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedTok | |||||
| import sys, os, pickle, hashlib | import sys, os, pickle, hashlib | ||||
| from io import open | from io import open | ||||
| import tempfile | import tempfile | ||||
| from warnings import warn | |||||
| from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger | from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger | ||||
| from .load_grammar import load_grammar | |||||
| from .load_grammar import load_grammar, FromPackageLoader | |||||
| from .tree import Tree | from .tree import Tree | ||||
| from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
| @@ -92,6 +92,10 @@ class LarkOptions(Serialize): | |||||
| Accept an input of type ``bytes`` instead of ``str`` (Python 3 only). | Accept an input of type ``bytes`` instead of ``str`` (Python 3 only). | ||||
| edit_terminals | edit_terminals | ||||
| A callback for editing the terminals before parse. | A callback for editing the terminals before parse. | ||||
| import_paths | |||||
| A List of either paths or loader functions to specify from where grammars are imported | |||||
| source_path | |||||
| Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading | |||||
| **=== End Options ===** | **=== End Options ===** | ||||
| """ | """ | ||||
| @@ -126,6 +130,8 @@ class LarkOptions(Serialize): | |||||
| 'edit_terminals': None, | 'edit_terminals': None, | ||||
| 'g_regex_flags': 0, | 'g_regex_flags': 0, | ||||
| 'use_bytes': False, | 'use_bytes': False, | ||||
| 'import_paths': [], | |||||
| 'source_path': None, | |||||
| } | } | ||||
| def __init__(self, options_dict): | def __init__(self, options_dict): | ||||
| @@ -209,10 +215,13 @@ class Lark(Serialize): | |||||
| re_module = re | re_module = re | ||||
| # Some, but not all file-like objects have a 'name' attribute | # Some, but not all file-like objects have a 'name' attribute | ||||
| try: | |||||
| self.source = grammar.name | |||||
| except AttributeError: | |||||
| self.source = '<string>' | |||||
| if self.options.source_path is None: | |||||
| try: | |||||
| self.source_path = grammar.name | |||||
| except AttributeError: | |||||
| self.source_path = '<string>' | |||||
| else: | |||||
| self.source_path = self.options.source_path | |||||
| # Drain file-like objects to get their contents | # Drain file-like objects to get their contents | ||||
| try: | try: | ||||
| @@ -223,7 +232,7 @@ class Lark(Serialize): | |||||
| grammar = read() | grammar = read() | ||||
| assert isinstance(grammar, STRING_TYPE) | assert isinstance(grammar, STRING_TYPE) | ||||
| self.grammar_source = grammar | |||||
| self.source_grammar = grammar | |||||
| if self.options.use_bytes: | if self.options.use_bytes: | ||||
| if not isascii(grammar): | if not isascii(grammar): | ||||
| raise ValueError("Grammar must be ascii only, when use_bytes=True") | raise ValueError("Grammar must be ascii only, when use_bytes=True") | ||||
| @@ -286,7 +295,7 @@ class Lark(Serialize): | |||||
| raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) | raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) | ||||
| # Parse the grammar file and compose the grammars (TODO) | # Parse the grammar file and compose the grammars (TODO) | ||||
| self.grammar = load_grammar(grammar, self.source, re_module, self.options.keep_all_tokens) | |||||
| self.grammar = load_grammar(grammar, self.source, self.options.import_paths, self.options.keep_all_tokens) | |||||
| if self.options.postlex is not None: | if self.options.postlex is not None: | ||||
| terminals_to_keep = set(self.options.postlex.always_accept) | terminals_to_keep = set(self.options.postlex.always_accept) | ||||
| @@ -395,7 +404,7 @@ class Lark(Serialize): | |||||
| options.update(kwargs) | options.update(kwargs) | ||||
| self.options = LarkOptions.deserialize(options, memo) | self.options = LarkOptions.deserialize(options, memo) | ||||
| self.rules = [Rule.deserialize(r, memo) for r in data['rules']] | self.rules = [Rule.deserialize(r, memo) for r in data['rules']] | ||||
| self.source = '<deserialized>' | |||||
| self.source_path = '<deserialized>' | |||||
| self._prepare_callbacks() | self._prepare_callbacks() | ||||
| self.parser = self.parser_class.deserialize( | self.parser = self.parser_class.deserialize( | ||||
| data['parser'], | data['parser'], | ||||
| @@ -430,8 +439,26 @@ class Lark(Serialize): | |||||
| with open(grammar_filename, encoding='utf8') as f: | with open(grammar_filename, encoding='utf8') as f: | ||||
| return cls(f, **options) | return cls(f, **options) | ||||
| @classmethod | |||||
| def open_from_package(cls, package, grammar_path, search_paths=("",), **options): | |||||
| """Create an instance of Lark with the grammar loaded from within the package `package`. | |||||
| This allows grammar loading from zipapps. | |||||
| Imports in the grammar will use the `package` and `search_paths` provided, through `FromPackageLoader` | |||||
| Example: | |||||
| Lark.open_from_package(__name__, "example.lark", ("grammars",), parser=...) | |||||
| """ | |||||
| package = FromPackageLoader(package, search_paths) | |||||
| full_path, text = package(None, grammar_path) | |||||
| options.setdefault('source_path', full_path) | |||||
| options.setdefault('import_paths', []) | |||||
| options['import_paths'].append(package) | |||||
| return cls(text, **options) | |||||
| def __repr__(self): | def __repr__(self): | ||||
| return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer) | |||||
| return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer) | |||||
| def lex(self, text): | def lex(self, text): | ||||
| @@ -491,5 +518,23 @@ class Lark(Serialize): | |||||
| except UnexpectedCharacters as e2: | except UnexpectedCharacters as e2: | ||||
| e = e2 | e = e2 | ||||
| @property | |||||
| def source(self): | |||||
| warn("Lark.source attribute has been renamed to Lark.source_path", DeprecationWarning) | |||||
| return self.source_path | |||||
| @source.setter | |||||
| def source(self, value): | |||||
| self.source_path = value | |||||
| @property | |||||
| def grammar_source(self): | |||||
| warn("Lark.grammar_source attribute has been renamed to Lark.source_grammar", DeprecationWarning) | |||||
| return self.source_grammar | |||||
| @grammar_source.setter | |||||
| def grammar_source(self, value): | |||||
| self.source_grammar = value | |||||
| ###} | ###} | ||||
| @@ -4,6 +4,7 @@ import os.path | |||||
| import sys | import sys | ||||
| from copy import copy, deepcopy | from copy import copy, deepcopy | ||||
| from io import open | from io import open | ||||
| import pkgutil | |||||
| from .utils import bfs, eval_escaping, Py36, logger, classify_bool | from .utils import bfs, eval_escaping, Py36, logger, classify_bool | ||||
| from .lexer import Token, TerminalDef, PatternStr, PatternRE | from .lexer import Token, TerminalDef, PatternStr, PatternRE | ||||
| @@ -20,7 +21,7 @@ from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transfo | |||||
| inline_args = v_args(inline=True) | inline_args = v_args(inline=True) | ||||
| __path__ = os.path.dirname(__file__) | __path__ = os.path.dirname(__file__) | ||||
| IMPORT_PATHS = [os.path.join(__path__, 'grammars')] | |||||
| IMPORT_PATHS = ['grammars'] | |||||
| EXT = '.lark' | EXT = '.lark' | ||||
| @@ -648,6 +649,58 @@ class Grammar: | |||||
| return terminals, compiled_rules, self.ignore | return terminals, compiled_rules, self.ignore | ||||
| class PackageResource(object): | |||||
| """ | |||||
| Represents a path inside a Package. Used by `FromPackageLoader` | |||||
| """ | |||||
| def __init__(self, pkg_name, path): | |||||
| self.pkg_name = pkg_name | |||||
| self.path = path | |||||
| def __str__(self): | |||||
| return "<%s: %s>" % (self.pkg_name, self.path) | |||||
| def __repr__(self): | |||||
| return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.path) | |||||
| class FromPackageLoader(object): | |||||
| """ | |||||
| Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`. | |||||
| This allows them to be compatible even from within zip files. | |||||
| Relative imports are handled, so you can just freely use them. | |||||
| pkg_name: The name of the package. You can probably provide `__name__` most of the time | |||||
| search_paths: All the path that will be search on absolute imports. | |||||
| """ | |||||
| def __init__(self, pkg_name, search_paths=("", )): | |||||
| self.pkg_name = pkg_name | |||||
| self.search_paths = search_paths | |||||
| def __repr__(self): | |||||
| return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) | |||||
| def __call__(self, base_path, grammar_path): | |||||
| if base_path is None: | |||||
| to_try = self.search_paths | |||||
| else: | |||||
| # Check whether or not the importing grammar was loaded by this module. | |||||
| if not isinstance(base_path, PackageResource) or base_path.pkg_name != self.pkg_name: | |||||
| # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway | |||||
| raise IOError() | |||||
| to_try = [base_path.path] | |||||
| for path in to_try: | |||||
| full_path = os.path.join(path, grammar_path) | |||||
| try: | |||||
| text = pkgutil.get_data(self.pkg_name, full_path) | |||||
| except IOError: | |||||
| continue | |||||
| else: | |||||
| return PackageResource(self.pkg_name, full_path), text.decode() | |||||
| raise IOError() | |||||
| stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) | |||||
| _imported_grammars = {} | _imported_grammars = {} | ||||
| @@ -787,39 +840,47 @@ class GrammarLoader: | |||||
| ('%ignore expects a value', ['%ignore %import\n']), | ('%ignore expects a value', ['%ignore %import\n']), | ||||
| ] | ] | ||||
| def __init__(self, re_module, global_keep_all_tokens): | |||||
| def __init__(self, global_keep_all_tokens): | |||||
| terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | ||||
| rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | ||||
| rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, _p, xs, o in rules for i, x in enumerate(xs)] | rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, _p, xs, o in rules for i, x in enumerate(xs)] | ||||
| callback = ParseTreeBuilder(rules, ST).create_callback() | callback = ParseTreeBuilder(rules, ST).create_callback() | ||||
| lexer_conf = LexerConf(terminals, re_module, ['WS', 'COMMENT']) | |||||
| import re | |||||
| lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT']) | |||||
| parser_conf = ParserConf(rules, callback, ['start']) | parser_conf = ParserConf(rules, callback, ['start']) | ||||
| self.parser = LALR_TraditionalLexer(lexer_conf, parser_conf) | self.parser = LALR_TraditionalLexer(lexer_conf, parser_conf) | ||||
| self.canonize_tree = CanonizeTree() | self.canonize_tree = CanonizeTree() | ||||
| self.re_module = re_module | |||||
| self.global_keep_all_tokens = global_keep_all_tokens | self.global_keep_all_tokens = global_keep_all_tokens | ||||
| def import_grammar(self, grammar_path, base_paths=[]): | |||||
| def import_grammar(self, grammar_path, base_path=None, import_paths=[]): | |||||
| if grammar_path not in _imported_grammars: | if grammar_path not in _imported_grammars: | ||||
| import_paths = base_paths + IMPORT_PATHS | |||||
| for import_path in import_paths: | |||||
| with suppress(IOError): | |||||
| joined_path = os.path.join(import_path, grammar_path) | |||||
| with open(joined_path, encoding='utf8') as f: | |||||
| text = f.read() | |||||
| grammar = self.load_grammar(text, joined_path) | |||||
| # import_paths take priority over base_path since they should handle relative imports and ignore everything else. | |||||
| to_try = import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader] | |||||
| for source in to_try: | |||||
| try: | |||||
| if callable(source): | |||||
| joined_path, text = source(base_path, grammar_path) | |||||
| else: | |||||
| joined_path = os.path.join(source, grammar_path) | |||||
| with open(joined_path, encoding='utf8') as f: | |||||
| text = f.read() | |||||
| except IOError: | |||||
| continue | |||||
| else: | |||||
| grammar = self.load_grammar(text, joined_path, import_paths) | |||||
| _imported_grammars[grammar_path] = grammar | _imported_grammars[grammar_path] = grammar | ||||
| break | break | ||||
| else: | else: | ||||
| open(grammar_path, encoding='utf8') # Force a file not found error | |||||
| # Search failed. Make Python throw a nice error. | |||||
| open(grammar_path, encoding='utf8') | |||||
| assert False | assert False | ||||
| return _imported_grammars[grammar_path] | return _imported_grammars[grammar_path] | ||||
| def load_grammar(self, grammar_text, grammar_name='<?>'): | |||||
| def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]): | |||||
| "Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | "Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | ||||
| try: | try: | ||||
| @@ -873,7 +934,7 @@ class GrammarLoader: | |||||
| aliases = {name: arg1 or name} # Aliases if exist | aliases = {name: arg1 or name} # Aliases if exist | ||||
| if path_node.data == 'import_lib': # Import from library | if path_node.data == 'import_lib': # Import from library | ||||
| base_paths = [] | |||||
| base_path = None | |||||
| else: # Relative import | else: # Relative import | ||||
| if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script | if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script | ||||
| try: | try: | ||||
| @@ -883,16 +944,19 @@ class GrammarLoader: | |||||
| else: | else: | ||||
| base_file = grammar_name # Import relative to grammar file path if external grammar file | base_file = grammar_name # Import relative to grammar file path if external grammar file | ||||
| if base_file: | if base_file: | ||||
| base_paths = [os.path.split(base_file)[0]] | |||||
| if isinstance(base_file, PackageResource): | |||||
| base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0]) | |||||
| else: | |||||
| base_path = os.path.split(base_file)[0] | |||||
| else: | else: | ||||
| base_paths = [os.path.abspath(os.path.curdir)] | |||||
| base_path = os.path.abspath(os.path.curdir) | |||||
| try: | try: | ||||
| import_base_paths, import_aliases = imports[dotted_path] | |||||
| assert base_paths == import_base_paths, 'Inconsistent base_paths for %s.' % '.'.join(dotted_path) | |||||
| import_base_path, import_aliases = imports[dotted_path] | |||||
| assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path) | |||||
| import_aliases.update(aliases) | import_aliases.update(aliases) | ||||
| except KeyError: | except KeyError: | ||||
| imports[dotted_path] = base_paths, aliases | |||||
| imports[dotted_path] = base_path, aliases | |||||
| elif stmt.data == 'declare': | elif stmt.data == 'declare': | ||||
| for t in stmt.children: | for t in stmt.children: | ||||
| @@ -901,9 +965,9 @@ class GrammarLoader: | |||||
| assert False, stmt | assert False, stmt | ||||
| # import grammars | # import grammars | ||||
| for dotted_path, (base_paths, aliases) in imports.items(): | |||||
| for dotted_path, (base_path, aliases) in imports.items(): | |||||
| grammar_path = os.path.join(*dotted_path) + EXT | grammar_path = os.path.join(*dotted_path) + EXT | ||||
| g = self.import_grammar(grammar_path, base_paths=base_paths) | |||||
| g = self.import_grammar(grammar_path, base_path=base_path, import_paths=import_paths) | |||||
| new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | ||||
| term_defs += new_td | term_defs += new_td | ||||
| @@ -987,5 +1051,5 @@ class GrammarLoader: | |||||
| def load_grammar(grammar, source, re_, global_keep_all_tokens): | |||||
| return GrammarLoader(re_, global_keep_all_tokens).load_grammar(grammar, source) | |||||
| def load_grammar(grammar, source, import_paths, global_keep_all_tokens): | |||||
| return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths) | |||||
| @@ -11,6 +11,7 @@ from copy import copy, deepcopy | |||||
| from lark.utils import Py36, isascii | from lark.utils import Py36, isascii | ||||
| from lark import Token | from lark import Token | ||||
| from lark.load_grammar import FromPackageLoader | |||||
| try: | try: | ||||
| from cStringIO import StringIO as cStringIO | from cStringIO import StringIO as cStringIO | ||||
| @@ -1805,6 +1806,37 @@ def _make_parser_test(LEXER, PARSER): | |||||
| tree = parser.parse(test_file) | tree = parser.parse(test_file) | ||||
| self.assertEqual(tree.children, [Token('B', 'A')]) | self.assertEqual(tree.children, [Token('B', 'A')]) | ||||
| def test_import_custom_sources(self): | |||||
| custom_loader = FromPackageLoader('tests', ('grammars', )) | |||||
| grammar = """ | |||||
| start: startab | |||||
| %import ab.startab | |||||
| """ | |||||
| p = _Lark(grammar, import_paths=[custom_loader]) | |||||
| self.assertEqual(p.parse('ab'), | |||||
| Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])])) | |||||
| grammar = """ | |||||
| start: rule_to_import | |||||
| %import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import | |||||
| """ | |||||
| p = _Lark(grammar, import_paths=[custom_loader]) | |||||
| x = p.parse('N') | |||||
| self.assertEqual(next(x.find_data('rule_to_import')).children, ['N']) | |||||
| custom_loader2 = FromPackageLoader('tests') | |||||
| grammar = """ | |||||
| %import .test_relative_import (start, WS) | |||||
| %ignore WS | |||||
| """ | |||||
| p = _Lark(grammar, import_paths=[custom_loader2]) | |||||
| x = p.parse('12 capybaras') | |||||
| self.assertEqual(x.children, ['12', 'capybaras']) | |||||
| @unittest.skipIf(PARSER == 'cyk', "Doesn't work for CYK") | @unittest.skipIf(PARSER == 'cyk', "Doesn't work for CYK") | ||||
| def test_prioritization(self): | def test_prioritization(self): | ||||
| "Tests effect of priority on result" | "Tests effect of priority on result" | ||||