Bläddra i källkod

import_paths->sources, source->source_path, various implementation changes

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
MegaIng1 4 år sedan
förälder
incheckning
a50fc10773
4 ändrade filer med 81 tillägg och 63 borttagningar
  1. +7
    -7
      lark-stubs/lark.pyi
  2. +28
    -21
      lark/lark.py
  3. +43
    -32
      lark/load_grammar.py
  4. +3
    -3
      tests/test_parser.py

+ 7
- 7
lark-stubs/lark.pyi Visa fil

@@ -32,19 +32,19 @@ class LarkOptions:
cache: Union[bool, str]
g_regex_flags: int
use_bytes: bool
import_sources: List[Union[str, Callable[[str, str], str]]]
source: Optional[str]
import_paths: List[Union[str, Callable[[Optional[str], str], Tuple[str, str]]]]
source_path: Optional[str]


class FromPackageLoader:
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ...
def __call__(self, base_paths: List[str], grammar_path: str) -> Tuple[str, str]: ...
def __call__(self, base_paths: str, grammar_path: str) -> Tuple[str, str]: ...


class Lark:
source: str
grammar_source: str
source_path: str
source_code: str
options: LarkOptions
lexer: Lexer
terminals: List[TerminalDef]
@@ -68,8 +68,8 @@ class Lark:
cache: Union[bool, str] = False,
g_regex_flags: int = ...,
use_bytes: bool = False,
import_sources: List[Union[str, Callable[[List[str], str], Tuple[str, str]]]] = ...,
source: Optional[str],
import_paths: List[Union[str, Callable[[Optional[str], str], Tuple[str, str]]]] = ...,
source_path: Optional[str],
):
...



+ 28
- 21
lark/lark.py Visa fil

@@ -2,7 +2,7 @@ from __future__ import absolute_import

import sys, os, pickle, hashlib
from io import open
from warnings import warn

from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger
from .load_grammar import load_grammar, FromPackageLoader
@@ -90,10 +90,10 @@ class LarkOptions(Serialize):
Accept an input of type ``bytes`` instead of ``str`` (Python 3 only).
edit_terminals
A callback for editing the terminals before parse.
import_sources
import_paths
A List of either paths or loader functions to specify from where grammars are imported
source
Override the source of from where the grammar was loaded. Usefull for relative imports and unconventional grammar loading
source_path
Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading

**=== End Options ===**
"""
@@ -119,8 +119,8 @@ class LarkOptions(Serialize):
'edit_terminals': None,
'g_regex_flags': 0,
'use_bytes': False,
'import_sources': [],
'source': None,
'import_paths': [],
'source_path': None,
}

def __init__(self, options_dict):
@@ -196,13 +196,13 @@ class Lark(Serialize):
re_module = re

# Some, but not all file-like objects have a 'name' attribute
if self.options.source is None:
if self.options.source_path is None:
try:
self.source = grammar.name
self.source_path = grammar.name
except AttributeError:
self.source = '<string>'
self.source_path = '<string>'
else:
self.source = self.options.source
self.source_path = self.options.source_path

# Drain file-like objects to get their contents
try:
@@ -213,7 +213,7 @@ class Lark(Serialize):
grammar = read()

assert isinstance(grammar, STRING_TYPE)
self.grammar_source = grammar
self.source_code = grammar
if self.options.use_bytes:
if not isascii(grammar):
raise ValueError("Grammar must be ascii only, when use_bytes=True")
@@ -276,7 +276,7 @@ class Lark(Serialize):
assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', )

# Parse the grammar file and compose the grammars (TODO)
self.grammar = load_grammar(grammar, self.source, re_module, self.options.import_sources)
self.grammar = load_grammar(grammar, self.source_path, re_module, self.options.import_paths)

# Compile the EBNF grammar into BNF
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)
@@ -374,7 +374,7 @@ class Lark(Serialize):
self.options = LarkOptions.deserialize(options, memo)
re_module = regex if self.options.regex else re
self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
self.source = '<deserialized>'
self.source_path = '<deserialized>'
self._prepare_callbacks()
self.parser = self.parser_class.deserialize(
data['parser'],
@@ -416,9 +416,7 @@ class Lark(Serialize):
"""Create an instance of Lark with the grammar loaded from within the package `package`.
This allows grammar loading from zipapps.
Will also create a `FromPackageLoader` instance and add it to the `import_sources` to simplify importing
``search_paths`` is passed to `FromPackageLoader`
Imports in the grammar will use the `package` and `search_paths` provided, through `FromPackageLoader`
Example:
@@ -426,15 +424,15 @@ class Lark(Serialize):
"""
package = FromPackageLoader(package, search_paths)
full_path, text = package([], grammar_path)
options.setdefault('source', full_path)
if 'import_sources' in options:
options['import_sources'].append(package)
options.setdefault('source_path', full_path)
if 'import_paths' in options:
options['import_paths'].append(package)
else:
options['import_sources'] = [package]
options['import_paths'] = [package]
return cls(text, **options)

def __repr__(self):
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer)
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer)


def lex(self, text):
@@ -481,6 +479,15 @@ class Lark(Serialize):
# Prevent infinite loop
raise e2
e = e2
@property
def source(self):
warn("Lark.source attribute has been renamed to Lark.source_path", DeprecationWarning)
return self.source_path
@source.setter
def source(self, value):
self.source_path = value


###}

+ 43
- 32
lark/load_grammar.py Visa fil

@@ -666,50 +666,61 @@ class FromPackageLoader(object):
def __repr__(self):
return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)

def __call__(self, base_paths, grammar_path):
if len(base_paths) == 0:
def __call__(self, base_path, grammar_path):
if base_path is None:
to_try = self.search_paths
else:
assert len(base_paths) == 1
if not base_paths[0].startswith('<%s:' % (self.pkg_name,)):
# Check whether or not the importing grammar was loaded by this module.
if not base_path.startswith('<%s:' % (self.pkg_name,)):
# Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway
raise IOError()
base_path = base_paths[0].partition(':')[2]
if base_path and base_path[0] == '/':
base_path = base_path[1:]
# Separate the path and the pkg_name and throw away the slash. `pkgutil.get_data` doesn't like it. (see below)
base_path = base_path.partition(':')[2].lstrip('/')
to_try = [base_path]
for path in to_try:
full_path = os.path.join(path, grammar_path)
text = None
with suppress(IOError):
try:
text = pkgutil.get_data(self.pkg_name, full_path)
if text is None:
except IOError:
continue
return '<%s:/%s>' % (self.pkg_name, full_path), text.decode()
else:
# Custom format `<{pkg_name}:/{full_path}>`
# These are the arguments to `pkgutil.get_data(pkg_name, full_path)`
# Required since we can not easily provided a actual file path for all package data (e.g. from inside a zip)
# The additional slash after the `:` is to allow `os.path.split` to work on this without accidentally
# throwing away the `pkg_name`. (As it would inside of `GrammarLoader.load_grammar` otherwise when relative imports
# are resolved.
# Without the slash `"<lark:common.lark>"` would turn into `""`, losing the pacakge information
# With the slash `"<lark:/common.lark>"` turns into `"<lark:"` without the slash, but
# `"<lark:/grammars/common.lark>"` into `"<lark:/grammars"`, so we have to strip it away when we look at the path (see above)

return '<%s:/%s>' % (self.pkg_name, full_path), text.decode()
raise IOError()

stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)


_imported_grammars = {}
def import_grammar(grammar_path, re_, base_paths=[], import_sources=[]):
def import_grammar(grammar_path, re_, base_path=None, import_paths=[]):
if grammar_path not in _imported_grammars:
# import_sources take priority over base_paths since they should handle relative imports and ignore everthing else.
# import_paths take priority over base_path since they should handle relative imports and ignore everything else.
# Question: should the stdlib_loader really be pushed to the end?
import_paths = import_sources + base_paths + [stdlib_loader]
for source in import_paths:
text = None
with suppress(IOError):
to_try = import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader]
for source in to_try:
try:
if callable(source):
joined_path, text = source(base_paths, grammar_path)
joined_path, text = source(base_path, grammar_path)
else:
joined_path = os.path.join(source, grammar_path)
with open(joined_path, encoding='utf8') as f:
text = f.read()
if text is not None:
# Don't load the grammar from within the suppress statement. Otherwise the underlying error message will be swallowed
except IOError:
continue
else:
# Don't load the grammar from within the try statement. Otherwise the underlying error message will be swallowed
# and the wrong file will be reported as missing
grammar = load_grammar(text, joined_path, re_, import_sources)
grammar = load_grammar(text, joined_path, re_, import_paths)
_imported_grammars[grammar_path] = grammar
break
else:
@@ -868,7 +879,7 @@ class GrammarLoader:
self.canonize_tree = CanonizeTree()
self.re_module = re_module

def load_grammar(self, grammar_text, grammar_name='<?>', import_sources=[]):
def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]):
"Parse grammar_text, verify, and create Grammar object. Display nice messages on error."

try:
@@ -922,7 +933,7 @@ class GrammarLoader:
aliases = {name: arg1 or name} # Aliases if exist

if path_node.data == 'import_lib': # Import from library
base_paths = []
base_path = None
else: # Relative import
if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script
try:
@@ -932,16 +943,16 @@ class GrammarLoader:
else:
base_file = grammar_name # Import relative to grammar file path if external grammar file
if base_file:
base_paths = [os.path.split(base_file)[0]]
base_path = os.path.split(base_file)[0]
else:
base_paths = [os.path.abspath(os.path.curdir)]
base_path = os.path.abspath(os.path.curdir)

try:
import_base_paths, import_aliases = imports[dotted_path]
assert base_paths == import_base_paths, 'Inconsistent base_paths for %s.' % '.'.join(dotted_path)
import_base_path, import_aliases = imports[dotted_path]
assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path)
import_aliases.update(aliases)
except KeyError:
imports[dotted_path] = base_paths, aliases
imports[dotted_path] = base_path, aliases

elif stmt.data == 'declare':
for t in stmt.children:
@@ -950,9 +961,9 @@ class GrammarLoader:
assert False, stmt

# import grammars
for dotted_path, (base_paths, aliases) in imports.items():
for dotted_path, (base_path, aliases) in imports.items():
grammar_path = os.path.join(*dotted_path) + EXT
g = import_grammar(grammar_path, self.re_module, base_paths=base_paths, import_sources=import_sources)
g = import_grammar(grammar_path, self.re_module, base_path=base_path, import_paths=import_paths)
new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases)

term_defs += new_td
@@ -1032,5 +1043,5 @@ class GrammarLoader:



def load_grammar(grammar, source, re_, import_sources):
return GrammarLoader(re_).load_grammar(grammar, source, import_sources)
def load_grammar(grammar, source, re_, import_paths):
return GrammarLoader(re_).load_grammar(grammar, source, import_paths)

+ 3
- 3
tests/test_parser.py Visa fil

@@ -1792,7 +1792,7 @@ def _make_parser_test(LEXER, PARSER):
%import ab.startab
"""

p = _Lark(grammar, import_sources=[custom_loader])
p = _Lark(grammar, import_paths=[custom_loader])
self.assertEqual(p.parse('ab'),
Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])]))

@@ -1801,7 +1801,7 @@ def _make_parser_test(LEXER, PARSER):

%import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import
"""
p = _Lark(grammar, import_sources=[custom_loader])
p = _Lark(grammar, import_paths=[custom_loader])
x = p.parse('N')
self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
@@ -1810,7 +1810,7 @@ def _make_parser_test(LEXER, PARSER):
%import .test_relative_import (start, WS)
%ignore WS
"""
p = _Lark(grammar, import_sources=[custom_loader2])
p = _Lark(grammar, import_paths=[custom_loader2])
x = p.parse('12 capybaras')
self.assertEqual(x.children, ['12', 'capybaras'])



Laddar…
Avbryt
Spara