| @@ -33,6 +33,13 @@ class LarkOptions: | |||||
| g_regex_flags: int | g_regex_flags: int | ||||
| use_bytes: bool | use_bytes: bool | ||||
| import_sources: List[Union[str, Callable[[str, str], str]]] | import_sources: List[Union[str, Callable[[str, str], str]]] | ||||
| source: Optional[str] | |||||
| class FromPackageLoader: | |||||
| def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... | |||||
| def __call__(self, base_paths: List[str], grammar_path: str) -> Tuple[str, str]: ... | |||||
| class Lark: | class Lark: | ||||
| @@ -62,6 +69,7 @@ class Lark: | |||||
| g_regex_flags: int = ..., | g_regex_flags: int = ..., | ||||
| use_bytes: bool = False, | use_bytes: bool = False, | ||||
| import_sources: List[Union[str, Callable[[List[str], str], Tuple[str, str]]]] = ..., | import_sources: List[Union[str, Callable[[List[str], str], Tuple[str, str]]]] = ..., | ||||
| source: Optional[str], | |||||
| ): | ): | ||||
| ... | ... | ||||
| @@ -71,6 +79,10 @@ class Lark: | |||||
| @classmethod | @classmethod | ||||
| def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: | def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: | ||||
| ... | ... | ||||
| @classmethod | |||||
| def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T: | |||||
| ... | |||||
| def lex(self, text: str) -> Iterator[Token]: | def lex(self, text: str) -> Iterator[Token]: | ||||
| ... | ... | ||||
| @@ -5,7 +5,7 @@ from io import open | |||||
| from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger | from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger | ||||
| from .load_grammar import load_grammar | |||||
| from .load_grammar import load_grammar, FromPackageLoader | |||||
| from .tree import Tree | from .tree import Tree | ||||
| from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
| @@ -92,6 +92,8 @@ class LarkOptions(Serialize): | |||||
| A callback for editing the terminals before parse. | A callback for editing the terminals before parse. | ||||
| import_sources | import_sources | ||||
| A List of either paths or loader functions to specify from where grammars are imported | A List of either paths or loader functions to specify from where grammars are imported | ||||
| source | |||||
| Override the source of from where the grammar was loaded. Usefull for relative imports and unconventional grammar loading | |||||
| **=== End Options ===** | **=== End Options ===** | ||||
| """ | """ | ||||
| @@ -118,6 +120,7 @@ class LarkOptions(Serialize): | |||||
| 'g_regex_flags': 0, | 'g_regex_flags': 0, | ||||
| 'use_bytes': False, | 'use_bytes': False, | ||||
| 'import_sources': [], | 'import_sources': [], | ||||
| 'source': None, | |||||
| } | } | ||||
| def __init__(self, options_dict): | def __init__(self, options_dict): | ||||
| @@ -193,10 +196,13 @@ class Lark(Serialize): | |||||
| re_module = re | re_module = re | ||||
| # Some, but not all file-like objects have a 'name' attribute | # Some, but not all file-like objects have a 'name' attribute | ||||
| try: | |||||
| self.source = grammar.name | |||||
| except AttributeError: | |||||
| self.source = '<string>' | |||||
| if self.options.source is None: | |||||
| try: | |||||
| self.source = grammar.name | |||||
| except AttributeError: | |||||
| self.source = '<string>' | |||||
| else: | |||||
| self.source = self.options.source | |||||
| # Drain file-like objects to get their contents | # Drain file-like objects to get their contents | ||||
| try: | try: | ||||
| @@ -404,6 +410,28 @@ class Lark(Serialize): | |||||
| grammar_filename = os.path.join(basepath, grammar_filename) | grammar_filename = os.path.join(basepath, grammar_filename) | ||||
| with open(grammar_filename, encoding='utf8') as f: | with open(grammar_filename, encoding='utf8') as f: | ||||
| return cls(f, **options) | return cls(f, **options) | ||||
| @classmethod | |||||
| def open_from_package(cls, package, grammar_path, search_paths=("",), **options): | |||||
| """Create an instance of Lark with the grammar loaded from within the package `package`. | |||||
| This allows grammar loading from zipapps. | |||||
| Will also create a `FromPackageLoader` instance and add it to the `import_sources` to simplify importing | |||||
| ``search_paths`` is passed to `FromPackageLoader` | |||||
| Example: | |||||
| Lark.open_from_package(__name__, "example.lark", ("grammars",), parser=...) | |||||
| """ | |||||
| package = FromPackageLoader(package, search_paths) | |||||
| full_path, text = package([], grammar_path) | |||||
| options.setdefault('source', full_path) | |||||
| if 'import_sources' in options: | |||||
| options['import_sources'].append(package) | |||||
| else: | |||||
| options['import_sources'] = [package] | |||||
| return cls(text, **options) | |||||
| def __repr__(self): | def __repr__(self): | ||||
| return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer) | return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer) | ||||
| @@ -4,6 +4,7 @@ import os.path | |||||
| import sys | import sys | ||||
| from copy import copy, deepcopy | from copy import copy, deepcopy | ||||
| from io import open | from io import open | ||||
| import pkgutil | |||||
| from .utils import bfs, eval_escaping, Py36, logger, classify_bool | from .utils import bfs, eval_escaping, Py36, logger, classify_bool | ||||
| from .lexer import Token, TerminalDef, PatternStr, PatternRE | from .lexer import Token, TerminalDef, PatternStr, PatternRE | ||||
| @@ -648,35 +649,69 @@ class Grammar: | |||||
| return terminals, compiled_rules, self.ignore | return terminals, compiled_rules, self.ignore | ||||
| def stdlib_loader(base_paths, grammar_path): | |||||
| import pkgutil | |||||
| for path in IMPORT_PATHS: | |||||
| text = pkgutil.get_data('lark', path + '/' + grammar_path) | |||||
| if text is None: | |||||
| continue | |||||
| return '<stdlib:' + grammar_path + '>', text.decode() | |||||
| raise FileNotFoundError() | |||||
| class FromPackageLoader(object): | |||||
| """ | |||||
| Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`. | |||||
| This allows them to be compatible even from within zip files. | |||||
| Relative imports are handled, so you can just freely use them. | |||||
| pkg_name: The name of the package. You can probably provide `__name__` most of the time | |||||
| search_paths: All the path that will be search on absolute imports. | |||||
| """ | |||||
| def __init__(self, pkg_name, search_paths=("", )): | |||||
| self.pkg_name = pkg_name | |||||
| self.search_paths = search_paths | |||||
| def __repr__(self): | |||||
| return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) | |||||
| def __call__(self, base_paths, grammar_path): | |||||
| if len(base_paths) == 0: | |||||
| to_try = self.search_paths | |||||
| else: | |||||
| assert len(base_paths) == 1 | |||||
| if not base_paths[0].startswith('<%s:' % (self.pkg_name,)): | |||||
| # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway | |||||
| raise IOError() | |||||
| base_path = base_paths[0].partition(':')[2] | |||||
| if base_path and base_path[0] == '/': | |||||
| base_path = base_path[1:] | |||||
| to_try = [base_path] | |||||
| for path in to_try: | |||||
| full_path = os.path.join(path, grammar_path) | |||||
| text = None | |||||
| with suppress(IOError): | |||||
| text = pkgutil.get_data(self.pkg_name, full_path) | |||||
| if text is None: | |||||
| continue | |||||
| return '<%s:/%s>' % (self.pkg_name, full_path), text.decode() | |||||
| raise IOError() | |||||
| stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) | |||||
| _imported_grammars = {} | _imported_grammars = {} | ||||
| def import_grammar(grammar_path, re_, base_paths=(), import_sources=()): | |||||
| def import_grammar(grammar_path, re_, base_paths=[], import_sources=[]): | |||||
| if grammar_path not in _imported_grammars: | if grammar_path not in _imported_grammars: | ||||
| import_paths = import_sources + base_paths + [stdlib_loader] | |||||
| # import_sources take priority over base_paths since they should handle relative imports and ignore everthing else. | |||||
| # Question: should the stdlib_loader really be pushed to the end? | |||||
| import_paths = import_sources + base_paths + [stdlib_loader] | |||||
| for source in import_paths: | for source in import_paths: | ||||
| if callable(source): | |||||
| with suppress(IOError): | |||||
| text = None | |||||
| with suppress(IOError): | |||||
| if callable(source): | |||||
| joined_path, text = source(base_paths, grammar_path) | joined_path, text = source(base_paths, grammar_path) | ||||
| grammar = load_grammar(text, joined_path, re_, import_sources) | |||||
| _imported_grammars[grammar_path] = grammar | |||||
| break | |||||
| else: | |||||
| with suppress(IOError): | |||||
| else: | |||||
| joined_path = os.path.join(source, grammar_path) | joined_path = os.path.join(source, grammar_path) | ||||
| with open(joined_path, encoding='utf8') as f: | with open(joined_path, encoding='utf8') as f: | ||||
| text = f.read() | text = f.read() | ||||
| grammar = load_grammar(text, joined_path, re_, import_sources) | |||||
| _imported_grammars[grammar_path] = grammar | |||||
| break | |||||
| if text is not None: | |||||
| # Don't load the grammar from within the suppress statement. Otherwise the underlying error message will be swallowed | |||||
| # and the wrong file will be reported as missing | |||||
| grammar = load_grammar(text, joined_path, re_, import_sources) | |||||
| _imported_grammars[grammar_path] = grammar | |||||
| break | |||||
| else: | else: | ||||
| open(grammar_path, encoding='utf8') | open(grammar_path, encoding='utf8') | ||||
| assert False | assert False | ||||
| @@ -11,6 +11,7 @@ from copy import copy, deepcopy | |||||
| from lark.utils import Py36, isascii | from lark.utils import Py36, isascii | ||||
| from lark import Token | from lark import Token | ||||
| from lark.load_grammar import FromPackageLoader | |||||
| try: | try: | ||||
| from cStringIO import StringIO as cStringIO | from cStringIO import StringIO as cStringIO | ||||
| @@ -1783,12 +1784,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
| self.assertRaises(IOError, _Lark, grammar) | self.assertRaises(IOError, _Lark, grammar) | ||||
| def test_import_custom_sources(self): | def test_import_custom_sources(self): | ||||
| def custom_loader(base_paths, grammar_path): | |||||
| import pkgutil | |||||
| text = pkgutil.get_data('tests', 'grammars/' + grammar_path) | |||||
| if text is None: | |||||
| raise FileNotFoundError() | |||||
| return '<tests.grammars:' + grammar_path + '>', text.decode() | |||||
| custom_loader = FromPackageLoader('tests', ('grammars', )) | |||||
| grammar = """ | grammar = """ | ||||
| start: startab | start: startab | ||||
| @@ -1800,6 +1796,24 @@ def _make_parser_test(LEXER, PARSER): | |||||
| self.assertEqual(p.parse('ab'), | self.assertEqual(p.parse('ab'), | ||||
| Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])])) | Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])])) | ||||
| grammar = """ | |||||
| start: rule_to_import | |||||
| %import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import | |||||
| """ | |||||
| p = _Lark(grammar, import_sources=[custom_loader]) | |||||
| x = p.parse('N') | |||||
| self.assertEqual(next(x.find_data('rule_to_import')).children, ['N']) | |||||
| custom_loader2 = FromPackageLoader('tests') | |||||
| grammar = """ | |||||
| %import .test_relative_import (start, WS) | |||||
| %ignore WS | |||||
| """ | |||||
| p = _Lark(grammar, import_sources=[custom_loader2]) | |||||
| x = p.parse('12 capybaras') | |||||
| self.assertEqual(x.children, ['12', 'capybaras']) | |||||
| @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | ||||
| def test_earley_prioritization(self): | def test_earley_prioritization(self): | ||||
| "Tests effect of priority on result" | "Tests effect of priority on result" | ||||