Bläddra i källkod

Added `FromPackageLoader` and `open_from_package`

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
MegaIng1 4 år sedan
förälder
incheckning
009cc10590
4 ändrade filer med 120 tillägg och 31 borttagningar
  1. +12
    -0
      lark-stubs/lark.pyi
  2. +33
    -5
      lark/lark.py
  3. +55
    -20
      lark/load_grammar.py
  4. +20
    -6
      tests/test_parser.py

+ 12
- 0
lark-stubs/lark.pyi Visa fil

@@ -33,6 +33,13 @@ class LarkOptions:
g_regex_flags: int
use_bytes: bool
import_sources: List[Union[str, Callable[[str, str], str]]]
source: Optional[str]


class FromPackageLoader:
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ...
def __call__(self, base_paths: List[str], grammar_path: str) -> Tuple[str, str]: ...


class Lark:
@@ -62,6 +69,7 @@ class Lark:
g_regex_flags: int = ...,
use_bytes: bool = False,
import_sources: List[Union[str, Callable[[List[str], str], Tuple[str, str]]]] = ...,
source: Optional[str],
):
...

@@ -71,6 +79,10 @@ class Lark:
@classmethod
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T:
...
@classmethod
def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T:
...

def lex(self, text: str) -> Iterator[Token]:
...


+ 33
- 5
lark/lark.py Visa fil

@@ -5,7 +5,7 @@ from io import open


from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger
from .load_grammar import load_grammar
from .load_grammar import load_grammar, FromPackageLoader
from .tree import Tree
from .common import LexerConf, ParserConf

@@ -92,6 +92,8 @@ class LarkOptions(Serialize):
A callback for editing the terminals before parse.
import_sources
A List of either paths or loader functions to specify from where grammars are imported
source
Override the source of from where the grammar was loaded. Usefull for relative imports and unconventional grammar loading

**=== End Options ===**
"""
@@ -118,6 +120,7 @@ class LarkOptions(Serialize):
'g_regex_flags': 0,
'use_bytes': False,
'import_sources': [],
'source': None,
}

def __init__(self, options_dict):
@@ -193,10 +196,13 @@ class Lark(Serialize):
re_module = re

# Some, but not all file-like objects have a 'name' attribute
try:
self.source = grammar.name
except AttributeError:
self.source = '<string>'
if self.options.source is None:
try:
self.source = grammar.name
except AttributeError:
self.source = '<string>'
else:
self.source = self.options.source

# Drain file-like objects to get their contents
try:
@@ -404,6 +410,28 @@ class Lark(Serialize):
grammar_filename = os.path.join(basepath, grammar_filename)
with open(grammar_filename, encoding='utf8') as f:
return cls(f, **options)
@classmethod
def open_from_package(cls, package, grammar_path, search_paths=("",), **options):
"""Create an instance of Lark with the grammar loaded from within the package `package`.
This allows grammar loading from zipapps.
Will also create a `FromPackageLoader` instance and add it to the `import_sources` to simplify importing
``search_paths`` is passed to `FromPackageLoader`
Example:
Lark.open_from_package(__name__, "example.lark", ("grammars",), parser=...)
"""
package = FromPackageLoader(package, search_paths)
full_path, text = package([], grammar_path)
options.setdefault('source', full_path)
if 'import_sources' in options:
options['import_sources'].append(package)
else:
options['import_sources'] = [package]
return cls(text, **options)

def __repr__(self):
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer)


+ 55
- 20
lark/load_grammar.py Visa fil

@@ -4,6 +4,7 @@ import os.path
import sys
from copy import copy, deepcopy
from io import open
import pkgutil

from .utils import bfs, eval_escaping, Py36, logger, classify_bool
from .lexer import Token, TerminalDef, PatternStr, PatternRE
@@ -648,35 +649,69 @@ class Grammar:
return terminals, compiled_rules, self.ignore


def stdlib_loader(base_paths, grammar_path):
import pkgutil
for path in IMPORT_PATHS:
text = pkgutil.get_data('lark', path + '/' + grammar_path)
if text is None:
continue
return '<stdlib:' + grammar_path + '>', text.decode()
raise FileNotFoundError()
class FromPackageLoader(object):
"""
Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`.
This allows them to be compatible even from within zip files.
Relative imports are handled, so you can just freely use them.
pkg_name: The name of the package. You can probably provide `__name__` most of the time
search_paths: All the path that will be search on absolute imports.
"""
def __init__(self, pkg_name, search_paths=("", )):
self.pkg_name = pkg_name
self.search_paths = search_paths
def __repr__(self):
return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)

def __call__(self, base_paths, grammar_path):
if len(base_paths) == 0:
to_try = self.search_paths
else:
assert len(base_paths) == 1
if not base_paths[0].startswith('<%s:' % (self.pkg_name,)):
# Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway
raise IOError()
base_path = base_paths[0].partition(':')[2]
if base_path and base_path[0] == '/':
base_path = base_path[1:]
to_try = [base_path]
for path in to_try:
full_path = os.path.join(path, grammar_path)
text = None
with suppress(IOError):
text = pkgutil.get_data(self.pkg_name, full_path)
if text is None:
continue
return '<%s:/%s>' % (self.pkg_name, full_path), text.decode()
raise IOError()

stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)


_imported_grammars = {}
def import_grammar(grammar_path, re_, base_paths=(), import_sources=()):
def import_grammar(grammar_path, re_, base_paths=[], import_sources=[]):
if grammar_path not in _imported_grammars:
import_paths = import_sources + base_paths + [stdlib_loader]
# import_sources take priority over base_paths since they should handle relative imports and ignore everthing else.
# Question: should the stdlib_loader really be pushed to the end?
import_paths = import_sources + base_paths + [stdlib_loader]
for source in import_paths:
if callable(source):
with suppress(IOError):
text = None
with suppress(IOError):
if callable(source):
joined_path, text = source(base_paths, grammar_path)
grammar = load_grammar(text, joined_path, re_, import_sources)
_imported_grammars[grammar_path] = grammar
break
else:
with suppress(IOError):
else:
joined_path = os.path.join(source, grammar_path)
with open(joined_path, encoding='utf8') as f:
text = f.read()
grammar = load_grammar(text, joined_path, re_, import_sources)
_imported_grammars[grammar_path] = grammar
break
if text is not None:
# Don't load the grammar from within the suppress statement. Otherwise the underlying error message will be swallowed
# and the wrong file will be reported as missing
grammar = load_grammar(text, joined_path, re_, import_sources)
_imported_grammars[grammar_path] = grammar
break
else:
open(grammar_path, encoding='utf8')
assert False


+ 20
- 6
tests/test_parser.py Visa fil

@@ -11,6 +11,7 @@ from copy import copy, deepcopy
from lark.utils import Py36, isascii

from lark import Token
from lark.load_grammar import FromPackageLoader

try:
from cStringIO import StringIO as cStringIO
@@ -1783,12 +1784,7 @@ def _make_parser_test(LEXER, PARSER):
self.assertRaises(IOError, _Lark, grammar)

def test_import_custom_sources(self):
def custom_loader(base_paths, grammar_path):
import pkgutil
text = pkgutil.get_data('tests', 'grammars/' + grammar_path)
if text is None:
raise FileNotFoundError()
return '<tests.grammars:' + grammar_path + '>', text.decode()
custom_loader = FromPackageLoader('tests', ('grammars', ))

grammar = """
start: startab
@@ -1800,6 +1796,24 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(p.parse('ab'),
Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])]))

grammar = """
start: rule_to_import

%import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import
"""
p = _Lark(grammar, import_sources=[custom_loader])
x = p.parse('N')
self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
custom_loader2 = FromPackageLoader('tests')
grammar = """
%import .test_relative_import (start, WS)
%ignore WS
"""
p = _Lark(grammar, import_sources=[custom_loader2])
x = p.parse('12 capybaras')
self.assertEqual(x.children, ['12', 'capybaras'])

@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
def test_earley_prioritization(self):
"Tests effect of priority on result"


Laddar…
Avbryt
Spara