Browse Source

Added `FromPackageLoader` and `open_from_package`

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
MegaIng1 5 years ago
parent
commit
009cc10590
4 changed files with 120 additions and 31 deletions
  1. +12
    -0
      lark-stubs/lark.pyi
  2. +33
    -5
      lark/lark.py
  3. +55
    -20
      lark/load_grammar.py
  4. +20
    -6
      tests/test_parser.py

+ 12
- 0
lark-stubs/lark.pyi View File

@@ -33,6 +33,13 @@ class LarkOptions:
g_regex_flags: int g_regex_flags: int
use_bytes: bool use_bytes: bool
import_sources: List[Union[str, Callable[[str, str], str]]] import_sources: List[Union[str, Callable[[str, str], str]]]
source: Optional[str]


class FromPackageLoader:
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ...
def __call__(self, base_paths: List[str], grammar_path: str) -> Tuple[str, str]: ...




class Lark: class Lark:
@@ -62,6 +69,7 @@ class Lark:
g_regex_flags: int = ..., g_regex_flags: int = ...,
use_bytes: bool = False, use_bytes: bool = False,
import_sources: List[Union[str, Callable[[List[str], str], Tuple[str, str]]]] = ..., import_sources: List[Union[str, Callable[[List[str], str], Tuple[str, str]]]] = ...,
source: Optional[str],
): ):
... ...


@@ -71,6 +79,10 @@ class Lark:
@classmethod @classmethod
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T:
... ...
@classmethod
def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T:
...


def lex(self, text: str) -> Iterator[Token]: def lex(self, text: str) -> Iterator[Token]:
... ...


+ 33
- 5
lark/lark.py View File

@@ -5,7 +5,7 @@ from io import open




from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger
from .load_grammar import load_grammar
from .load_grammar import load_grammar, FromPackageLoader
from .tree import Tree from .tree import Tree
from .common import LexerConf, ParserConf from .common import LexerConf, ParserConf


@@ -92,6 +92,8 @@ class LarkOptions(Serialize):
A callback for editing the terminals before parse. A callback for editing the terminals before parse.
import_sources import_sources
A List of either paths or loader functions to specify from where grammars are imported A List of either paths or loader functions to specify from where grammars are imported
source
Override the source of from where the grammar was loaded. Usefull for relative imports and unconventional grammar loading


**=== End Options ===** **=== End Options ===**
""" """
@@ -118,6 +120,7 @@ class LarkOptions(Serialize):
'g_regex_flags': 0, 'g_regex_flags': 0,
'use_bytes': False, 'use_bytes': False,
'import_sources': [], 'import_sources': [],
'source': None,
} }


def __init__(self, options_dict): def __init__(self, options_dict):
@@ -193,10 +196,13 @@ class Lark(Serialize):
re_module = re re_module = re


# Some, but not all file-like objects have a 'name' attribute # Some, but not all file-like objects have a 'name' attribute
try:
self.source = grammar.name
except AttributeError:
self.source = '<string>'
if self.options.source is None:
try:
self.source = grammar.name
except AttributeError:
self.source = '<string>'
else:
self.source = self.options.source


# Drain file-like objects to get their contents # Drain file-like objects to get their contents
try: try:
@@ -404,6 +410,28 @@ class Lark(Serialize):
grammar_filename = os.path.join(basepath, grammar_filename) grammar_filename = os.path.join(basepath, grammar_filename)
with open(grammar_filename, encoding='utf8') as f: with open(grammar_filename, encoding='utf8') as f:
return cls(f, **options) return cls(f, **options)
@classmethod
def open_from_package(cls, package, grammar_path, search_paths=("",), **options):
"""Create an instance of Lark with the grammar loaded from within the package `package`.
This allows grammar loading from zipapps.
Will also create a `FromPackageLoader` instance and add it to the `import_sources` to simplify importing
``search_paths`` is passed to `FromPackageLoader`
Example:
Lark.open_from_package(__name__, "example.lark", ("grammars",), parser=...)
"""
package = FromPackageLoader(package, search_paths)
full_path, text = package([], grammar_path)
options.setdefault('source', full_path)
if 'import_sources' in options:
options['import_sources'].append(package)
else:
options['import_sources'] = [package]
return cls(text, **options)


def __repr__(self): def __repr__(self):
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer) return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer)


+ 55
- 20
lark/load_grammar.py View File

@@ -4,6 +4,7 @@ import os.path
import sys import sys
from copy import copy, deepcopy from copy import copy, deepcopy
from io import open from io import open
import pkgutil


from .utils import bfs, eval_escaping, Py36, logger, classify_bool from .utils import bfs, eval_escaping, Py36, logger, classify_bool
from .lexer import Token, TerminalDef, PatternStr, PatternRE from .lexer import Token, TerminalDef, PatternStr, PatternRE
@@ -648,35 +649,69 @@ class Grammar:
return terminals, compiled_rules, self.ignore return terminals, compiled_rules, self.ignore




def stdlib_loader(base_paths, grammar_path):
import pkgutil
for path in IMPORT_PATHS:
text = pkgutil.get_data('lark', path + '/' + grammar_path)
if text is None:
continue
return '<stdlib:' + grammar_path + '>', text.decode()
raise FileNotFoundError()
class FromPackageLoader(object):
"""
Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`.
This allows them to be compatible even from within zip files.
Relative imports are handled, so you can just freely use them.
pkg_name: The name of the package. You can probably provide `__name__` most of the time
search_paths: All the path that will be search on absolute imports.
"""
def __init__(self, pkg_name, search_paths=("", )):
self.pkg_name = pkg_name
self.search_paths = search_paths
def __repr__(self):
return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)

def __call__(self, base_paths, grammar_path):
if len(base_paths) == 0:
to_try = self.search_paths
else:
assert len(base_paths) == 1
if not base_paths[0].startswith('<%s:' % (self.pkg_name,)):
# Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway
raise IOError()
base_path = base_paths[0].partition(':')[2]
if base_path and base_path[0] == '/':
base_path = base_path[1:]
to_try = [base_path]
for path in to_try:
full_path = os.path.join(path, grammar_path)
text = None
with suppress(IOError):
text = pkgutil.get_data(self.pkg_name, full_path)
if text is None:
continue
return '<%s:/%s>' % (self.pkg_name, full_path), text.decode()
raise IOError()

stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)




_imported_grammars = {} _imported_grammars = {}
def import_grammar(grammar_path, re_, base_paths=(), import_sources=()):
def import_grammar(grammar_path, re_, base_paths=[], import_sources=[]):
if grammar_path not in _imported_grammars: if grammar_path not in _imported_grammars:
import_paths = import_sources + base_paths + [stdlib_loader]
# import_sources take priority over base_paths since they should handle relative imports and ignore everthing else.
# Question: should the stdlib_loader really be pushed to the end?
import_paths = import_sources + base_paths + [stdlib_loader]
for source in import_paths: for source in import_paths:
if callable(source):
with suppress(IOError):
text = None
with suppress(IOError):
if callable(source):
joined_path, text = source(base_paths, grammar_path) joined_path, text = source(base_paths, grammar_path)
grammar = load_grammar(text, joined_path, re_, import_sources)
_imported_grammars[grammar_path] = grammar
break
else:
with suppress(IOError):
else:
joined_path = os.path.join(source, grammar_path) joined_path = os.path.join(source, grammar_path)
with open(joined_path, encoding='utf8') as f: with open(joined_path, encoding='utf8') as f:
text = f.read() text = f.read()
grammar = load_grammar(text, joined_path, re_, import_sources)
_imported_grammars[grammar_path] = grammar
break
if text is not None:
# Don't load the grammar from within the suppress statement. Otherwise the underlying error message will be swallowed
# and the wrong file will be reported as missing
grammar = load_grammar(text, joined_path, re_, import_sources)
_imported_grammars[grammar_path] = grammar
break
else: else:
open(grammar_path, encoding='utf8') open(grammar_path, encoding='utf8')
assert False assert False


+ 20
- 6
tests/test_parser.py View File

@@ -11,6 +11,7 @@ from copy import copy, deepcopy
from lark.utils import Py36, isascii from lark.utils import Py36, isascii


from lark import Token from lark import Token
from lark.load_grammar import FromPackageLoader


try: try:
from cStringIO import StringIO as cStringIO from cStringIO import StringIO as cStringIO
@@ -1783,12 +1784,7 @@ def _make_parser_test(LEXER, PARSER):
self.assertRaises(IOError, _Lark, grammar) self.assertRaises(IOError, _Lark, grammar)


def test_import_custom_sources(self): def test_import_custom_sources(self):
def custom_loader(base_paths, grammar_path):
import pkgutil
text = pkgutil.get_data('tests', 'grammars/' + grammar_path)
if text is None:
raise FileNotFoundError()
return '<tests.grammars:' + grammar_path + '>', text.decode()
custom_loader = FromPackageLoader('tests', ('grammars', ))


grammar = """ grammar = """
start: startab start: startab
@@ -1800,6 +1796,24 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(p.parse('ab'), self.assertEqual(p.parse('ab'),
Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])])) Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])]))


grammar = """
start: rule_to_import

%import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import
"""
p = _Lark(grammar, import_sources=[custom_loader])
x = p.parse('N')
self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
custom_loader2 = FromPackageLoader('tests')
grammar = """
%import .test_relative_import (start, WS)
%ignore WS
"""
p = _Lark(grammar, import_sources=[custom_loader2])
x = p.parse('12 capybaras')
self.assertEqual(x.children, ['12', 'capybaras'])

@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
def test_earley_prioritization(self): def test_earley_prioritization(self):
"Tests effect of priority on result" "Tests effect of priority on result"


Loading…
Cancel
Save