Browse Source

import_paths->sources, source->source_path, various implementation changes

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
MegaIng1 4 years ago
parent
commit
a50fc10773
4 changed files with 81 additions and 63 deletions
  1. +7
    -7
      lark-stubs/lark.pyi
  2. +28
    -21
      lark/lark.py
  3. +43
    -32
      lark/load_grammar.py
  4. +3
    -3
      tests/test_parser.py

+ 7
- 7
lark-stubs/lark.pyi View File

@@ -32,19 +32,19 @@ class LarkOptions:
cache: Union[bool, str] cache: Union[bool, str]
g_regex_flags: int g_regex_flags: int
use_bytes: bool use_bytes: bool
import_sources: List[Union[str, Callable[[str, str], str]]]
source: Optional[str]
import_paths: List[Union[str, Callable[[Optional[str], str], Tuple[str, str]]]]
source_path: Optional[str]




class FromPackageLoader: class FromPackageLoader:
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ...
def __call__(self, base_paths: List[str], grammar_path: str) -> Tuple[str, str]: ...
def __call__(self, base_paths: str, grammar_path: str) -> Tuple[str, str]: ...




class Lark: class Lark:
source: str
grammar_source: str
source_path: str
source_code: str
options: LarkOptions options: LarkOptions
lexer: Lexer lexer: Lexer
terminals: List[TerminalDef] terminals: List[TerminalDef]
@@ -68,8 +68,8 @@ class Lark:
cache: Union[bool, str] = False, cache: Union[bool, str] = False,
g_regex_flags: int = ..., g_regex_flags: int = ...,
use_bytes: bool = False, use_bytes: bool = False,
import_sources: List[Union[str, Callable[[List[str], str], Tuple[str, str]]]] = ...,
source: Optional[str],
import_paths: List[Union[str, Callable[[Optional[str], str], Tuple[str, str]]]] = ...,
source_path: Optional[str],
): ):
... ...




+ 28
- 21
lark/lark.py View File

@@ -2,7 +2,7 @@ from __future__ import absolute_import


import sys, os, pickle, hashlib import sys, os, pickle, hashlib
from io import open from io import open
from warnings import warn


from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger
from .load_grammar import load_grammar, FromPackageLoader from .load_grammar import load_grammar, FromPackageLoader
@@ -90,10 +90,10 @@ class LarkOptions(Serialize):
Accept an input of type ``bytes`` instead of ``str`` (Python 3 only). Accept an input of type ``bytes`` instead of ``str`` (Python 3 only).
edit_terminals edit_terminals
A callback for editing the terminals before parse. A callback for editing the terminals before parse.
import_sources
import_paths
A List of either paths or loader functions to specify from where grammars are imported A List of either paths or loader functions to specify from where grammars are imported
source
Override the source of from where the grammar was loaded. Usefull for relative imports and unconventional grammar loading
source_path
Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading


**=== End Options ===** **=== End Options ===**
""" """
@@ -119,8 +119,8 @@ class LarkOptions(Serialize):
'edit_terminals': None, 'edit_terminals': None,
'g_regex_flags': 0, 'g_regex_flags': 0,
'use_bytes': False, 'use_bytes': False,
'import_sources': [],
'source': None,
'import_paths': [],
'source_path': None,
} }


def __init__(self, options_dict): def __init__(self, options_dict):
@@ -196,13 +196,13 @@ class Lark(Serialize):
re_module = re re_module = re


# Some, but not all file-like objects have a 'name' attribute # Some, but not all file-like objects have a 'name' attribute
if self.options.source is None:
if self.options.source_path is None:
try: try:
self.source = grammar.name
self.source_path = grammar.name
except AttributeError: except AttributeError:
self.source = '<string>'
self.source_path = '<string>'
else: else:
self.source = self.options.source
self.source_path = self.options.source_path


# Drain file-like objects to get their contents # Drain file-like objects to get their contents
try: try:
@@ -213,7 +213,7 @@ class Lark(Serialize):
grammar = read() grammar = read()


assert isinstance(grammar, STRING_TYPE) assert isinstance(grammar, STRING_TYPE)
self.grammar_source = grammar
self.source_code = grammar
if self.options.use_bytes: if self.options.use_bytes:
if not isascii(grammar): if not isascii(grammar):
raise ValueError("Grammar must be ascii only, when use_bytes=True") raise ValueError("Grammar must be ascii only, when use_bytes=True")
@@ -276,7 +276,7 @@ class Lark(Serialize):
assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', ) assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', )


# Parse the grammar file and compose the grammars (TODO) # Parse the grammar file and compose the grammars (TODO)
self.grammar = load_grammar(grammar, self.source, re_module, self.options.import_sources)
self.grammar = load_grammar(grammar, self.source_path, re_module, self.options.import_paths)


# Compile the EBNF grammar into BNF # Compile the EBNF grammar into BNF
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)
@@ -374,7 +374,7 @@ class Lark(Serialize):
self.options = LarkOptions.deserialize(options, memo) self.options = LarkOptions.deserialize(options, memo)
re_module = regex if self.options.regex else re re_module = regex if self.options.regex else re
self.rules = [Rule.deserialize(r, memo) for r in data['rules']] self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
self.source = '<deserialized>'
self.source_path = '<deserialized>'
self._prepare_callbacks() self._prepare_callbacks()
self.parser = self.parser_class.deserialize( self.parser = self.parser_class.deserialize(
data['parser'], data['parser'],
@@ -416,9 +416,7 @@ class Lark(Serialize):
"""Create an instance of Lark with the grammar loaded from within the package `package`. """Create an instance of Lark with the grammar loaded from within the package `package`.
This allows grammar loading from zipapps. This allows grammar loading from zipapps.
Will also create a `FromPackageLoader` instance and add it to the `import_sources` to simplify importing
``search_paths`` is passed to `FromPackageLoader`
Imports in the grammar will use the `package` and `search_paths` provided, through `FromPackageLoader`
Example: Example:
@@ -426,15 +424,15 @@ class Lark(Serialize):
""" """
package = FromPackageLoader(package, search_paths) package = FromPackageLoader(package, search_paths)
full_path, text = package([], grammar_path) full_path, text = package([], grammar_path)
options.setdefault('source', full_path)
if 'import_sources' in options:
options['import_sources'].append(package)
options.setdefault('source_path', full_path)
if 'import_paths' in options:
options['import_paths'].append(package)
else: else:
options['import_sources'] = [package]
options['import_paths'] = [package]
return cls(text, **options) return cls(text, **options)


def __repr__(self): def __repr__(self):
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer)
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer)




def lex(self, text): def lex(self, text):
@@ -481,6 +479,15 @@ class Lark(Serialize):
# Prevent infinite loop # Prevent infinite loop
raise e2 raise e2
e = e2 e = e2
@property
def source(self):
warn("Lark.source attribute has been renamed to Lark.source_path", DeprecationWarning)
return self.source_path
@source.setter
def source(self, value):
self.source_path = value




###} ###}

+ 43
- 32
lark/load_grammar.py View File

@@ -666,50 +666,61 @@ class FromPackageLoader(object):
def __repr__(self): def __repr__(self):
return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)


def __call__(self, base_paths, grammar_path):
if len(base_paths) == 0:
def __call__(self, base_path, grammar_path):
if base_path is None:
to_try = self.search_paths to_try = self.search_paths
else: else:
assert len(base_paths) == 1
if not base_paths[0].startswith('<%s:' % (self.pkg_name,)):
# Check whether or not the importing grammar was loaded by this module.
if not base_path.startswith('<%s:' % (self.pkg_name,)):
# Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway
raise IOError() raise IOError()
base_path = base_paths[0].partition(':')[2]
if base_path and base_path[0] == '/':
base_path = base_path[1:]
# Separate the path and the pkg_name and throw away the slash. `pkgutil.get_data` doesn't like it. (see below)
base_path = base_path.partition(':')[2].lstrip('/')
to_try = [base_path] to_try = [base_path]
for path in to_try: for path in to_try:
full_path = os.path.join(path, grammar_path) full_path = os.path.join(path, grammar_path)
text = None
with suppress(IOError):
try:
text = pkgutil.get_data(self.pkg_name, full_path) text = pkgutil.get_data(self.pkg_name, full_path)
if text is None:
except IOError:
continue continue
return '<%s:/%s>' % (self.pkg_name, full_path), text.decode()
else:
# Custom format `<{pkg_name}:/{full_path}>`
# These are the arguments to `pkgutil.get_data(pkg_name, full_path)`
# Required since we can not easily provided a actual file path for all package data (e.g. from inside a zip)
# The additional slash after the `:` is to allow `os.path.split` to work on this without accidentally
# throwing away the `pkg_name`. (As it would inside of `GrammarLoader.load_grammar` otherwise when relative imports
# are resolved.
# Without the slash `"<lark:common.lark>"` would turn into `""`, losing the pacakge information
# With the slash `"<lark:/common.lark>"` turns into `"<lark:"` without the slash, but
# `"<lark:/grammars/common.lark>"` into `"<lark:/grammars"`, so we have to strip it away when we look at the path (see above)

return '<%s:/%s>' % (self.pkg_name, full_path), text.decode()
raise IOError() raise IOError()


stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)




_imported_grammars = {} _imported_grammars = {}
def import_grammar(grammar_path, re_, base_paths=[], import_sources=[]):
def import_grammar(grammar_path, re_, base_path=None, import_paths=[]):
if grammar_path not in _imported_grammars: if grammar_path not in _imported_grammars:
# import_sources take priority over base_paths since they should handle relative imports and ignore everthing else.
# import_paths take priority over base_path since they should handle relative imports and ignore everything else.
# Question: should the stdlib_loader really be pushed to the end? # Question: should the stdlib_loader really be pushed to the end?
import_paths = import_sources + base_paths + [stdlib_loader]
for source in import_paths:
text = None
with suppress(IOError):
to_try = import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader]
for source in to_try:
try:
if callable(source): if callable(source):
joined_path, text = source(base_paths, grammar_path)
joined_path, text = source(base_path, grammar_path)
else: else:
joined_path = os.path.join(source, grammar_path) joined_path = os.path.join(source, grammar_path)
with open(joined_path, encoding='utf8') as f: with open(joined_path, encoding='utf8') as f:
text = f.read() text = f.read()
if text is not None:
# Don't load the grammar from within the suppress statement. Otherwise the underlying error message will be swallowed
except IOError:
continue
else:
# Don't load the grammar from within the try statement. Otherwise the underlying error message will be swallowed
# and the wrong file will be reported as missing # and the wrong file will be reported as missing
grammar = load_grammar(text, joined_path, re_, import_sources)
grammar = load_grammar(text, joined_path, re_, import_paths)
_imported_grammars[grammar_path] = grammar _imported_grammars[grammar_path] = grammar
break break
else: else:
@@ -868,7 +879,7 @@ class GrammarLoader:
self.canonize_tree = CanonizeTree() self.canonize_tree = CanonizeTree()
self.re_module = re_module self.re_module = re_module


def load_grammar(self, grammar_text, grammar_name='<?>', import_sources=[]):
def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]):
"Parse grammar_text, verify, and create Grammar object. Display nice messages on error." "Parse grammar_text, verify, and create Grammar object. Display nice messages on error."


try: try:
@@ -922,7 +933,7 @@ class GrammarLoader:
aliases = {name: arg1 or name} # Aliases if exist aliases = {name: arg1 or name} # Aliases if exist


if path_node.data == 'import_lib': # Import from library if path_node.data == 'import_lib': # Import from library
base_paths = []
base_path = None
else: # Relative import else: # Relative import
if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script
try: try:
@@ -932,16 +943,16 @@ class GrammarLoader:
else: else:
base_file = grammar_name # Import relative to grammar file path if external grammar file base_file = grammar_name # Import relative to grammar file path if external grammar file
if base_file: if base_file:
base_paths = [os.path.split(base_file)[0]]
base_path = os.path.split(base_file)[0]
else: else:
base_paths = [os.path.abspath(os.path.curdir)]
base_path = os.path.abspath(os.path.curdir)


try: try:
import_base_paths, import_aliases = imports[dotted_path]
assert base_paths == import_base_paths, 'Inconsistent base_paths for %s.' % '.'.join(dotted_path)
import_base_path, import_aliases = imports[dotted_path]
assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path)
import_aliases.update(aliases) import_aliases.update(aliases)
except KeyError: except KeyError:
imports[dotted_path] = base_paths, aliases
imports[dotted_path] = base_path, aliases


elif stmt.data == 'declare': elif stmt.data == 'declare':
for t in stmt.children: for t in stmt.children:
@@ -950,9 +961,9 @@ class GrammarLoader:
assert False, stmt assert False, stmt


# import grammars # import grammars
for dotted_path, (base_paths, aliases) in imports.items():
for dotted_path, (base_path, aliases) in imports.items():
grammar_path = os.path.join(*dotted_path) + EXT grammar_path = os.path.join(*dotted_path) + EXT
g = import_grammar(grammar_path, self.re_module, base_paths=base_paths, import_sources=import_sources)
g = import_grammar(grammar_path, self.re_module, base_path=base_path, import_paths=import_paths)
new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases)


term_defs += new_td term_defs += new_td
@@ -1032,5 +1043,5 @@ class GrammarLoader:






def load_grammar(grammar, source, re_, import_sources):
return GrammarLoader(re_).load_grammar(grammar, source, import_sources)
def load_grammar(grammar, source, re_, import_paths):
return GrammarLoader(re_).load_grammar(grammar, source, import_paths)

+ 3
- 3
tests/test_parser.py View File

@@ -1792,7 +1792,7 @@ def _make_parser_test(LEXER, PARSER):
%import ab.startab %import ab.startab
""" """


p = _Lark(grammar, import_sources=[custom_loader])
p = _Lark(grammar, import_paths=[custom_loader])
self.assertEqual(p.parse('ab'), self.assertEqual(p.parse('ab'),
Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])])) Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])]))


@@ -1801,7 +1801,7 @@ def _make_parser_test(LEXER, PARSER):


%import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import %import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import
""" """
p = _Lark(grammar, import_sources=[custom_loader])
p = _Lark(grammar, import_paths=[custom_loader])
x = p.parse('N') x = p.parse('N')
self.assertEqual(next(x.find_data('rule_to_import')).children, ['N']) self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
@@ -1810,7 +1810,7 @@ def _make_parser_test(LEXER, PARSER):
%import .test_relative_import (start, WS) %import .test_relative_import (start, WS)
%ignore WS %ignore WS
""" """
p = _Lark(grammar, import_sources=[custom_loader2])
p = _Lark(grammar, import_paths=[custom_loader2])
x = p.parse('12 capybaras') x = p.parse('12 capybaras')
self.assertEqual(x.children, ['12', 'capybaras']) self.assertEqual(x.children, ['12', 'capybaras'])




Loading…
Cancel
Save