Преглед на файлове

Added `import_sources`

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
MegaIng1 преди 4 години
родител
ревизия
53b3e12bba
променени са 4 файла, в които са добавени 56 реда и са изтрити 17 реда
  1. +3
    -1
      lark-stubs/lark.pyi
  2. +4
    -1
      lark/lark.py
  3. +31
    -15
      lark/load_grammar.py
  4. +18
    -0
      tests/test_parser.py

+ 3
- 1
lark-stubs/lark.pyi Целия файл

@@ -2,7 +2,7 @@

from typing import (
TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional,
Literal, Protocol,
Literal, Protocol, Tuple,
)
from .visitors import Transformer
from .lexer import Token, Lexer, TerminalDef
@@ -32,6 +32,7 @@ class LarkOptions:
cache: Union[bool, str]
g_regex_flags: int
use_bytes: bool
import_sources: List[Union[str, Callable[[str, str], str]]]


class Lark:
@@ -60,6 +61,7 @@ class Lark:
cache: Union[bool, str] = False,
g_regex_flags: int = ...,
use_bytes: bool = False,
import_sources: List[Union[str, Callable[[List[str], str], Tuple[str, str]]]] = ...,
):
...



+ 4
- 1
lark/lark.py Целия файл

@@ -90,6 +90,8 @@ class LarkOptions(Serialize):
Accept an input of type ``bytes`` instead of ``str`` (Python 3 only).
edit_terminals
A callback for editing the terminals before parse.
import_sources
A List of either paths or loader functions to specify from where grammars are imported

**=== End Options ===**
"""
@@ -115,6 +117,7 @@ class LarkOptions(Serialize):
'edit_terminals': None,
'g_regex_flags': 0,
'use_bytes': False,
'import_sources': [],
}

def __init__(self, options_dict):
@@ -267,7 +270,7 @@ class Lark(Serialize):
assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', )

# Parse the grammar file and compose the grammars (TODO)
self.grammar = load_grammar(grammar, self.source, re_module)
self.grammar = load_grammar(grammar, self.source, re_module, self.options.import_sources)

# Compile the EBNF grammar into BNF
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)


+ 31
- 15
lark/load_grammar.py Целия файл

@@ -20,7 +20,7 @@ from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transfo
inline_args = v_args(inline=True)

__path__ = os.path.dirname(__file__)
IMPORT_PATHS = [os.path.join(__path__, 'grammars')]
IMPORT_PATHS = ['grammars']

EXT = '.lark'

@@ -648,19 +648,35 @@ class Grammar:
return terminals, compiled_rules, self.ignore


def stdlib_loader(base_paths, grammar_path):
import pkgutil
for path in IMPORT_PATHS:
text = pkgutil.get_data('lark', path + '/' + grammar_path)
if text is None:
continue
return '<stdlib:' + grammar_path + '>', text.decode()
raise FileNotFoundError()


_imported_grammars = {}
def import_grammar(grammar_path, re_, base_paths=[]):
def import_grammar(grammar_path, re_, base_paths=(), import_sources=()):
if grammar_path not in _imported_grammars:
import_paths = base_paths + IMPORT_PATHS
for import_path in import_paths:
with suppress(IOError):
joined_path = os.path.join(import_path, grammar_path)
with open(joined_path, encoding='utf8') as f:
text = f.read()
grammar = load_grammar(text, joined_path, re_)
_imported_grammars[grammar_path] = grammar
break
import_paths = import_sources + base_paths + [stdlib_loader]
for source in import_paths:
if isinstance(source, str):
with suppress(IOError):
joined_path = os.path.join(source, grammar_path)
with open(joined_path, encoding='utf8') as f:
text = f.read()
grammar = load_grammar(text, joined_path, re_, import_sources)
_imported_grammars[grammar_path] = grammar
break
else:
with suppress(IOError):
joined_path, text = source(base_paths, grammar_path)
grammar = load_grammar(text, joined_path, re_, import_sources)
_imported_grammars[grammar_path] = grammar
break
else:
open(grammar_path, encoding='utf8')
assert False
@@ -817,7 +833,7 @@ class GrammarLoader:
self.canonize_tree = CanonizeTree()
self.re_module = re_module

def load_grammar(self, grammar_text, grammar_name='<?>'):
def load_grammar(self, grammar_text, grammar_name='<?>', import_sources=[]):
"Parse grammar_text, verify, and create Grammar object. Display nice messages on error."

try:
@@ -901,7 +917,7 @@ class GrammarLoader:
# import grammars
for dotted_path, (base_paths, aliases) in imports.items():
grammar_path = os.path.join(*dotted_path) + EXT
g = import_grammar(grammar_path, self.re_module, base_paths=base_paths)
g = import_grammar(grammar_path, self.re_module, base_paths=base_paths, import_sources=import_sources)
new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases)

term_defs += new_td
@@ -981,5 +997,5 @@ class GrammarLoader:



def load_grammar(grammar, source, re_):
return GrammarLoader(re_).load_grammar(grammar, source)
def load_grammar(grammar, source, re_, import_sources):
return GrammarLoader(re_).load_grammar(grammar, source, import_sources)

+ 18
- 0
tests/test_parser.py Целия файл

@@ -1782,6 +1782,24 @@ def _make_parser_test(LEXER, PARSER):
"""
self.assertRaises(IOError, _Lark, grammar)

def test_import_custom_sources(self):
def custom_loader(base_paths, grammar_path):
import pkgutil
text = pkgutil.get_data('tests', 'grammars/' + grammar_path)
if text is None:
raise FileNotFoundError()
return '<tests.grammars:' + grammar_path + '>', text.decode()

grammar = """
start: startab

%import ab.startab
"""

p = _Lark(grammar, import_sources=[custom_loader])
self.assertEqual(p.parse('ab'),
Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])]))

@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
def test_earley_prioritization(self):
"Tests effect of priority on result"


Зареждане…
Отказ
Запис