@@ -32,19 +32,19 @@ class LarkOptions: | |||||
cache: Union[bool, str] | cache: Union[bool, str] | ||||
g_regex_flags: int | g_regex_flags: int | ||||
use_bytes: bool | use_bytes: bool | ||||
import_sources: List[Union[str, Callable[[str, str], str]]] | |||||
source: Optional[str] | |||||
import_paths: List[Union[str, Callable[[Optional[str], str], Tuple[str, str]]]] | |||||
source_path: Optional[str] | |||||
class FromPackageLoader: | class FromPackageLoader: | ||||
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... | def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... | ||||
def __call__(self, base_paths: List[str], grammar_path: str) -> Tuple[str, str]: ... | |||||
def __call__(self, base_paths: str, grammar_path: str) -> Tuple[str, str]: ... | |||||
class Lark: | class Lark: | ||||
source: str | |||||
grammar_source: str | |||||
source_path: str | |||||
source_code: str | |||||
options: LarkOptions | options: LarkOptions | ||||
lexer: Lexer | lexer: Lexer | ||||
terminals: List[TerminalDef] | terminals: List[TerminalDef] | ||||
@@ -68,8 +68,8 @@ class Lark: | |||||
cache: Union[bool, str] = False, | cache: Union[bool, str] = False, | ||||
g_regex_flags: int = ..., | g_regex_flags: int = ..., | ||||
use_bytes: bool = False, | use_bytes: bool = False, | ||||
import_sources: List[Union[str, Callable[[List[str], str], Tuple[str, str]]]] = ..., | |||||
source: Optional[str], | |||||
import_paths: List[Union[str, Callable[[Optional[str], str], Tuple[str, str]]]] = ..., | |||||
source_path: Optional[str], | |||||
): | ): | ||||
... | ... | ||||
@@ -2,7 +2,7 @@ from __future__ import absolute_import | |||||
import sys, os, pickle, hashlib | import sys, os, pickle, hashlib | ||||
from io import open | from io import open | ||||
from warnings import warn | |||||
from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger | from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger | ||||
from .load_grammar import load_grammar, FromPackageLoader | from .load_grammar import load_grammar, FromPackageLoader | ||||
@@ -90,10 +90,10 @@ class LarkOptions(Serialize): | |||||
Accept an input of type ``bytes`` instead of ``str`` (Python 3 only). | Accept an input of type ``bytes`` instead of ``str`` (Python 3 only). | ||||
edit_terminals | edit_terminals | ||||
A callback for editing the terminals before parse. | A callback for editing the terminals before parse. | ||||
import_sources | |||||
import_paths | |||||
A List of either paths or loader functions to specify from where grammars are imported | A List of either paths or loader functions to specify from where grammars are imported | ||||
source | |||||
Override the source of from where the grammar was loaded. Usefull for relative imports and unconventional grammar loading | |||||
source_path | |||||
Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading | |||||
**=== End Options ===** | **=== End Options ===** | ||||
""" | """ | ||||
@@ -119,8 +119,8 @@ class LarkOptions(Serialize): | |||||
'edit_terminals': None, | 'edit_terminals': None, | ||||
'g_regex_flags': 0, | 'g_regex_flags': 0, | ||||
'use_bytes': False, | 'use_bytes': False, | ||||
'import_sources': [], | |||||
'source': None, | |||||
'import_paths': [], | |||||
'source_path': None, | |||||
} | } | ||||
def __init__(self, options_dict): | def __init__(self, options_dict): | ||||
@@ -196,13 +196,13 @@ class Lark(Serialize): | |||||
re_module = re | re_module = re | ||||
# Some, but not all file-like objects have a 'name' attribute | # Some, but not all file-like objects have a 'name' attribute | ||||
if self.options.source is None: | |||||
if self.options.source_path is None: | |||||
try: | try: | ||||
self.source = grammar.name | |||||
self.source_path = grammar.name | |||||
except AttributeError: | except AttributeError: | ||||
self.source = '<string>' | |||||
self.source_path = '<string>' | |||||
else: | else: | ||||
self.source = self.options.source | |||||
self.source_path = self.options.source_path | |||||
# Drain file-like objects to get their contents | # Drain file-like objects to get their contents | ||||
try: | try: | ||||
@@ -213,7 +213,7 @@ class Lark(Serialize): | |||||
grammar = read() | grammar = read() | ||||
assert isinstance(grammar, STRING_TYPE) | assert isinstance(grammar, STRING_TYPE) | ||||
self.grammar_source = grammar | |||||
self.source_code = grammar | |||||
if self.options.use_bytes: | if self.options.use_bytes: | ||||
if not isascii(grammar): | if not isascii(grammar): | ||||
raise ValueError("Grammar must be ascii only, when use_bytes=True") | raise ValueError("Grammar must be ascii only, when use_bytes=True") | ||||
@@ -276,7 +276,7 @@ class Lark(Serialize): | |||||
assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', ) | assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', ) | ||||
# Parse the grammar file and compose the grammars (TODO) | # Parse the grammar file and compose the grammars (TODO) | ||||
self.grammar = load_grammar(grammar, self.source, re_module, self.options.import_sources) | |||||
self.grammar = load_grammar(grammar, self.source_path, re_module, self.options.import_paths) | |||||
# Compile the EBNF grammar into BNF | # Compile the EBNF grammar into BNF | ||||
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) | self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) | ||||
@@ -374,7 +374,7 @@ class Lark(Serialize): | |||||
self.options = LarkOptions.deserialize(options, memo) | self.options = LarkOptions.deserialize(options, memo) | ||||
re_module = regex if self.options.regex else re | re_module = regex if self.options.regex else re | ||||
self.rules = [Rule.deserialize(r, memo) for r in data['rules']] | self.rules = [Rule.deserialize(r, memo) for r in data['rules']] | ||||
self.source = '<deserialized>' | |||||
self.source_path = '<deserialized>' | |||||
self._prepare_callbacks() | self._prepare_callbacks() | ||||
self.parser = self.parser_class.deserialize( | self.parser = self.parser_class.deserialize( | ||||
data['parser'], | data['parser'], | ||||
@@ -416,9 +416,7 @@ class Lark(Serialize): | |||||
"""Create an instance of Lark with the grammar loaded from within the package `package`. | """Create an instance of Lark with the grammar loaded from within the package `package`. | ||||
This allows grammar loading from zipapps. | This allows grammar loading from zipapps. | ||||
Will also create a `FromPackageLoader` instance and add it to the `import_sources` to simplify importing | |||||
``search_paths`` is passed to `FromPackageLoader` | |||||
Imports in the grammar will use the `package` and `search_paths` provided, through `FromPackageLoader` | |||||
Example: | Example: | ||||
@@ -426,15 +424,15 @@ class Lark(Serialize): | |||||
""" | """ | ||||
package = FromPackageLoader(package, search_paths) | package = FromPackageLoader(package, search_paths) | ||||
full_path, text = package([], grammar_path) | full_path, text = package([], grammar_path) | ||||
options.setdefault('source', full_path) | |||||
if 'import_sources' in options: | |||||
options['import_sources'].append(package) | |||||
options.setdefault('source_path', full_path) | |||||
if 'import_paths' in options: | |||||
options['import_paths'].append(package) | |||||
else: | else: | ||||
options['import_sources'] = [package] | |||||
options['import_paths'] = [package] | |||||
return cls(text, **options) | return cls(text, **options) | ||||
def __repr__(self): | def __repr__(self): | ||||
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer) | |||||
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer) | |||||
def lex(self, text): | def lex(self, text): | ||||
@@ -481,6 +479,15 @@ class Lark(Serialize): | |||||
# Prevent infinite loop | # Prevent infinite loop | ||||
raise e2 | raise e2 | ||||
e = e2 | e = e2 | ||||
@property | |||||
def source(self): | |||||
warn("Lark.source attribute has been renamed to Lark.source_path", DeprecationWarning) | |||||
return self.source_path | |||||
@source.setter | |||||
def source(self, value): | |||||
self.source_path = value | |||||
###} | ###} |
@@ -666,50 +666,61 @@ class FromPackageLoader(object): | |||||
def __repr__(self): | def __repr__(self): | ||||
return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) | return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) | ||||
def __call__(self, base_paths, grammar_path): | |||||
if len(base_paths) == 0: | |||||
def __call__(self, base_path, grammar_path): | |||||
if base_path is None: | |||||
to_try = self.search_paths | to_try = self.search_paths | ||||
else: | else: | ||||
assert len(base_paths) == 1 | |||||
if not base_paths[0].startswith('<%s:' % (self.pkg_name,)): | |||||
# Check whether or not the importing grammar was loaded by this module. | |||||
if not base_path.startswith('<%s:' % (self.pkg_name,)): | |||||
# Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway | # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway | ||||
raise IOError() | raise IOError() | ||||
base_path = base_paths[0].partition(':')[2] | |||||
if base_path and base_path[0] == '/': | |||||
base_path = base_path[1:] | |||||
# Separate the path and the pkg_name and throw away the slash. `pkgutil.get_data` doesn't like it. (see below) | |||||
base_path = base_path.partition(':')[2].lstrip('/') | |||||
to_try = [base_path] | to_try = [base_path] | ||||
for path in to_try: | for path in to_try: | ||||
full_path = os.path.join(path, grammar_path) | full_path = os.path.join(path, grammar_path) | ||||
text = None | |||||
with suppress(IOError): | |||||
try: | |||||
text = pkgutil.get_data(self.pkg_name, full_path) | text = pkgutil.get_data(self.pkg_name, full_path) | ||||
if text is None: | |||||
except IOError: | |||||
continue | continue | ||||
return '<%s:/%s>' % (self.pkg_name, full_path), text.decode() | |||||
else: | |||||
# Custom format `<{pkg_name}:/{full_path}>` | |||||
# These are the arguments to `pkgutil.get_data(pkg_name, full_path)` | |||||
# Required since we can not easily provided a actual file path for all package data (e.g. from inside a zip) | |||||
# The additional slash after the `:` is to allow `os.path.split` to work on this without accidentally | |||||
# throwing away the `pkg_name`. (As it would inside of `GrammarLoader.load_grammar` otherwise when relative imports | |||||
# are resolved. | |||||
# Without the slash `"<lark:common.lark>"` would turn into `""`, losing the pacakge information | |||||
# With the slash `"<lark:/common.lark>"` turns into `"<lark:"` without the slash, but | |||||
# `"<lark:/grammars/common.lark>"` into `"<lark:/grammars"`, so we have to strip it away when we look at the path (see above) | |||||
return '<%s:/%s>' % (self.pkg_name, full_path), text.decode() | |||||
raise IOError() | raise IOError() | ||||
stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) | stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) | ||||
_imported_grammars = {} | _imported_grammars = {} | ||||
def import_grammar(grammar_path, re_, base_paths=[], import_sources=[]): | |||||
def import_grammar(grammar_path, re_, base_path=None, import_paths=[]): | |||||
if grammar_path not in _imported_grammars: | if grammar_path not in _imported_grammars: | ||||
# import_sources take priority over base_paths since they should handle relative imports and ignore everthing else. | |||||
# import_paths take priority over base_path since they should handle relative imports and ignore everything else. | |||||
# Question: should the stdlib_loader really be pushed to the end? | # Question: should the stdlib_loader really be pushed to the end? | ||||
import_paths = import_sources + base_paths + [stdlib_loader] | |||||
for source in import_paths: | |||||
text = None | |||||
with suppress(IOError): | |||||
to_try = import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader] | |||||
for source in to_try: | |||||
try: | |||||
if callable(source): | if callable(source): | ||||
joined_path, text = source(base_paths, grammar_path) | |||||
joined_path, text = source(base_path, grammar_path) | |||||
else: | else: | ||||
joined_path = os.path.join(source, grammar_path) | joined_path = os.path.join(source, grammar_path) | ||||
with open(joined_path, encoding='utf8') as f: | with open(joined_path, encoding='utf8') as f: | ||||
text = f.read() | text = f.read() | ||||
if text is not None: | |||||
# Don't load the grammar from within the suppress statement. Otherwise the underlying error message will be swallowed | |||||
except IOError: | |||||
continue | |||||
else: | |||||
# Don't load the grammar from within the try statement. Otherwise the underlying error message will be swallowed | |||||
# and the wrong file will be reported as missing | # and the wrong file will be reported as missing | ||||
grammar = load_grammar(text, joined_path, re_, import_sources) | |||||
grammar = load_grammar(text, joined_path, re_, import_paths) | |||||
_imported_grammars[grammar_path] = grammar | _imported_grammars[grammar_path] = grammar | ||||
break | break | ||||
else: | else: | ||||
@@ -868,7 +879,7 @@ class GrammarLoader: | |||||
self.canonize_tree = CanonizeTree() | self.canonize_tree = CanonizeTree() | ||||
self.re_module = re_module | self.re_module = re_module | ||||
def load_grammar(self, grammar_text, grammar_name='<?>', import_sources=[]): | |||||
def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]): | |||||
"Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | "Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | ||||
try: | try: | ||||
@@ -922,7 +933,7 @@ class GrammarLoader: | |||||
aliases = {name: arg1 or name} # Aliases if exist | aliases = {name: arg1 or name} # Aliases if exist | ||||
if path_node.data == 'import_lib': # Import from library | if path_node.data == 'import_lib': # Import from library | ||||
base_paths = [] | |||||
base_path = None | |||||
else: # Relative import | else: # Relative import | ||||
if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script | if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script | ||||
try: | try: | ||||
@@ -932,16 +943,16 @@ class GrammarLoader: | |||||
else: | else: | ||||
base_file = grammar_name # Import relative to grammar file path if external grammar file | base_file = grammar_name # Import relative to grammar file path if external grammar file | ||||
if base_file: | if base_file: | ||||
base_paths = [os.path.split(base_file)[0]] | |||||
base_path = os.path.split(base_file)[0] | |||||
else: | else: | ||||
base_paths = [os.path.abspath(os.path.curdir)] | |||||
base_path = os.path.abspath(os.path.curdir) | |||||
try: | try: | ||||
import_base_paths, import_aliases = imports[dotted_path] | |||||
assert base_paths == import_base_paths, 'Inconsistent base_paths for %s.' % '.'.join(dotted_path) | |||||
import_base_path, import_aliases = imports[dotted_path] | |||||
assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path) | |||||
import_aliases.update(aliases) | import_aliases.update(aliases) | ||||
except KeyError: | except KeyError: | ||||
imports[dotted_path] = base_paths, aliases | |||||
imports[dotted_path] = base_path, aliases | |||||
elif stmt.data == 'declare': | elif stmt.data == 'declare': | ||||
for t in stmt.children: | for t in stmt.children: | ||||
@@ -950,9 +961,9 @@ class GrammarLoader: | |||||
assert False, stmt | assert False, stmt | ||||
# import grammars | # import grammars | ||||
for dotted_path, (base_paths, aliases) in imports.items(): | |||||
for dotted_path, (base_path, aliases) in imports.items(): | |||||
grammar_path = os.path.join(*dotted_path) + EXT | grammar_path = os.path.join(*dotted_path) + EXT | ||||
g = import_grammar(grammar_path, self.re_module, base_paths=base_paths, import_sources=import_sources) | |||||
g = import_grammar(grammar_path, self.re_module, base_path=base_path, import_paths=import_paths) | |||||
new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | ||||
term_defs += new_td | term_defs += new_td | ||||
@@ -1032,5 +1043,5 @@ class GrammarLoader: | |||||
def load_grammar(grammar, source, re_, import_sources): | |||||
return GrammarLoader(re_).load_grammar(grammar, source, import_sources) | |||||
def load_grammar(grammar, source, re_, import_paths): | |||||
return GrammarLoader(re_).load_grammar(grammar, source, import_paths) |
@@ -1792,7 +1792,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
%import ab.startab | %import ab.startab | ||||
""" | """ | ||||
p = _Lark(grammar, import_sources=[custom_loader]) | |||||
p = _Lark(grammar, import_paths=[custom_loader]) | |||||
self.assertEqual(p.parse('ab'), | self.assertEqual(p.parse('ab'), | ||||
Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])])) | Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])])) | ||||
@@ -1801,7 +1801,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
%import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import | %import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import | ||||
""" | """ | ||||
p = _Lark(grammar, import_sources=[custom_loader]) | |||||
p = _Lark(grammar, import_paths=[custom_loader]) | |||||
x = p.parse('N') | x = p.parse('N') | ||||
self.assertEqual(next(x.find_data('rule_to_import')).children, ['N']) | self.assertEqual(next(x.find_data('rule_to_import')).children, ['N']) | ||||
@@ -1810,7 +1810,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
%import .test_relative_import (start, WS) | %import .test_relative_import (start, WS) | ||||
%ignore WS | %ignore WS | ||||
""" | """ | ||||
p = _Lark(grammar, import_sources=[custom_loader2]) | |||||
p = _Lark(grammar, import_paths=[custom_loader2]) | |||||
x = p.parse('12 capybaras') | x = p.parse('12 capybaras') | ||||
self.assertEqual(x.children, ['12', 'capybaras']) | self.assertEqual(x.children, ['12', 'capybaras']) | ||||