Преглед на файлове

Merge branch 'master' into custom_import_sources

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
MegaIng преди 4 години
committed by GitHub
родител
ревизия
46339c112e
No known key found for this signature in database GPG ключ ID: 4AEE18F83AFDEB23
променени са 14 файла, в които са добавени 91 реда и са изтрити 36 реда
  1. +2
    -0
      lark-stubs/__init__.pyi
  2. +3
    -1
      lark-stubs/lark.pyi
  3. +1
    -1
      lark/__init__.py
  4. +25
    -15
      lark/lark.py
  5. +2
    -2
      lark/load_grammar.py
  6. +13
    -6
      lark/parser_frontends.py
  7. +8
    -1
      lark/parsers/lalr_analysis.py
  8. +2
    -2
      lark/parsers/lalr_parser.py
  9. +2
    -2
      lark/tools/standalone.py
  10. +2
    -1
      lark/tree_matcher.py
  11. +6
    -3
      tests/test_cache.py
  12. +23
    -0
      tests/test_parser.py
  13. +1
    -1
      tests/test_tools.py
  14. +1
    -1
      tests/test_trees.py

+ 2
- 0
lark-stubs/__init__.pyi Целия файл

@@ -5,5 +5,7 @@ from .visitors import *
from .exceptions import *
from .lexer import *
from .lark import *
from logging import Logger as _Logger

logger: _Logger
__version__: str = ...

+ 3
- 1
lark-stubs/lark.pyi Целия файл

@@ -2,7 +2,7 @@

from typing import (
TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional,
Literal, Protocol, Tuple,
Literal, Protocol, Tuple, Iterable,
)
from .visitors import Transformer
from .lexer import Token, Lexer, TerminalDef
@@ -14,6 +14,8 @@ class PostLex(Protocol):

def process(self, stream: Iterator[Token]) -> Iterator[Token]:
...
always_accept: Iterable[str]


class LarkOptions:


+ 1
- 1
lark/__init__.py Целия файл

@@ -7,4 +7,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken,
from .lexer import Token
from .lark import Lark

__version__ = "0.10.0"
__version__ = "0.10.1"

+ 25
- 15
lark/lark.py Целия файл

@@ -169,6 +169,10 @@ class LarkOptions(Serialize):
return cls(data)


_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags',
'regex', 'propagate_positions', 'keep_all_tokens', 'tree_class'}


class Lark(Serialize):
"""Main interface for the library.

@@ -239,8 +243,11 @@ class Lark(Serialize):

if FS.exists(cache_fn):
logger.debug('Loading grammar from cache: %s', cache_fn)
# Remove options that aren't relevant for loading from cache
for name in (set(options) - _LOAD_ALLOWED_OPTIONS):
del options[name]
with FS.open(cache_fn, 'rb') as f:
self._load(f, self.options.transformer, self.options.postlex)
self._load(f, **options)
return

if self.options.lexer == 'auto':
@@ -278,8 +285,13 @@ class Lark(Serialize):
# Parse the grammar file and compose the grammars (TODO)
self.grammar = load_grammar(grammar, self.source_path, re_module, self.options.import_paths)

if self.options.postlex is not None:
terminals_to_keep = set(self.options.postlex.always_accept)
else:
terminals_to_keep = set()

# Compile the EBNF grammar into BNF
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start, terminals_to_keep)

if self.options.edit_terminals:
for t in self.terminals:
@@ -319,7 +331,8 @@ class Lark(Serialize):
with FS.open(cache_fn, 'wb') as f:
self.save(f)

__doc__ += "\n\n" + LarkOptions.OPTIONS_DOC
if __doc__:
__doc__ += "\n\n" + LarkOptions.OPTIONS_DOC

__serialize_fields__ = 'parser', 'rules', 'options'

@@ -345,7 +358,7 @@ class Lark(Serialize):
Useful for caching and multiprocessing.
"""
data, m = self.memo_serialize([TerminalDef, Rule])
pickle.dump({'data': data, 'memo': m}, f)
pickle.dump({'data': data, 'memo': m}, f, protocol=pickle.HIGHEST_PROTOCOL)

@classmethod
def load(cls, f):
@@ -356,7 +369,7 @@ class Lark(Serialize):
inst = cls.__new__(cls)
return inst._load(f)

def _load(self, f, transformer=None, postlex=None):
def _load(self, f, **kwargs):
if isinstance(f, dict):
d = f
else:
@@ -367,12 +380,11 @@ class Lark(Serialize):
assert memo
memo = SerializeMemoizer.deserialize(memo, {'Rule': Rule, 'TerminalDef': TerminalDef}, {})
options = dict(data['options'])
if transformer is not None:
options['transformer'] = transformer
if postlex is not None:
options['postlex'] = postlex
if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults):
raise ValueError("Some options are not allowed when loading a Parser: {}"
.format(set(kwargs) - _LOAD_ALLOWED_OPTIONS))
options.update(kwargs)
self.options = LarkOptions.deserialize(options, memo)
re_module = regex if self.options.regex else re
self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
self.source_path = '<deserialized>'
self._prepare_callbacks()
@@ -380,18 +392,16 @@ class Lark(Serialize):
data['parser'],
memo,
self._callbacks,
self.options.postlex,
self.options.transformer,
re_module
self.options, # Not all, but multiple attributes are used
)
self.terminals = self.parser.lexer_conf.tokens
self._terminals_dict = {t.name: t for t in self.terminals}
return self

@classmethod
def _load_from_dict(cls, data, memo, transformer=None, postlex=None):
def _load_from_dict(cls, data, memo, **kwargs):
inst = cls.__new__(cls)
return inst._load({'data': data, 'memo': memo}, transformer, postlex)
return inst._load({'data': data, 'memo': memo}, **kwargs)

@classmethod
def open(cls, grammar_filename, rel_to=None, **options):


+ 2
- 2
lark/load_grammar.py Целия файл

@@ -527,7 +527,7 @@ class Grammar:
self.rule_defs = rule_defs
self.ignore = ignore

def compile(self, start):
def compile(self, start, terminals_to_keep):
# We change the trees in-place (to support huge grammars)
# So deepcopy allows calling compile more than once.
term_defs = deepcopy(list(self.term_defs))
@@ -642,7 +642,7 @@ class Grammar:
used_terms = {t.name for r in compiled_rules
for t in r.expansion
if isinstance(t, Terminal)}
terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore)
terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore or t.name in terminals_to_keep)
if unused:
logger.debug("Unused terminals: %s", [t.name for t in unused])



+ 13
- 6
lark/parser_frontends.py Целия файл

@@ -6,6 +6,11 @@ from .parsers.lalr_parser import LALR_Parser
from .grammar import Rule
from .tree import Tree
from .common import LexerConf
try:
import regex
except ImportError:
regex = None
import re

###{standalone

@@ -82,16 +87,18 @@ class WithLexer(_ParserFrontend):
self.postlex = lexer_conf.postlex

@classmethod
def deserialize(cls, data, memo, callbacks, postlex, transformer, re_module):
def deserialize(cls, data, memo, callbacks, options):
inst = super(WithLexer, cls).deserialize(data, memo)

inst.postlex = postlex
inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks)
inst.postlex = options.postlex
inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks, options.debug)

terminals = [item for item in memo.values() if isinstance(item, TerminalDef)]
inst.lexer_conf.callbacks = _get_lexer_callbacks(transformer, terminals)
inst.lexer_conf.re_module = re_module
inst.lexer_conf.skip_validation=True
inst.lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals)
inst.lexer_conf.re_module = regex if options.regex else re
inst.lexer_conf.use_bytes = options.use_bytes
inst.lexer_conf.g_regex_flags = options.g_regex_flags
inst.lexer_conf.skip_validation = True
inst.init_lexer()

return inst


+ 8
- 1
lark/parsers/lalr_analysis.py Целия файл

@@ -246,13 +246,14 @@ class LALR_Analyzer(GrammarAnalyzer):

def compute_lalr1_states(self):
m = {}
reduce_reduce = []
for state in self.lr0_states:
actions = {}
for la, next_state in state.transitions.items():
actions[la] = (Shift, next_state.closure)
for la, rules in state.lookaheads.items():
if len(rules) > 1:
raise GrammarError('Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ])))
reduce_reduce.append((la, rules))
if la in actions:
if self.debug:
logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
@@ -261,6 +262,12 @@ class LALR_Analyzer(GrammarAnalyzer):
actions[la] = (Reduce, list(rules)[0])
m[state] = { k.name: v for k, v in actions.items() }

if reduce_reduce:
msgs = [ 'Reduce/Reduce collision in %s between the following rules: %s'
% (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ]))
for la, rules in reduce_reduce]
raise GrammarError('\n\n'.join(msgs))

states = { k.closure: v for k, v in m.items() }

# compute end states


+ 2
- 2
lark/parsers/lalr_parser.py Целия файл

@@ -23,10 +23,10 @@ class LALR_Parser(object):
self.parser = _Parser(analysis.parse_table, callbacks, debug)

@classmethod
def deserialize(cls, data, memo, callbacks):
def deserialize(cls, data, memo, callbacks, debug=False):
inst = cls.__new__(cls)
inst._parse_table = IntParseTable.deserialize(data, memo)
inst.parser = _Parser(inst._parse_table, callbacks)
inst.parser = _Parser(inst._parse_table, callbacks, debug)
return inst

def serialize(self, memo):


+ 2
- 2
lark/tools/standalone.py Целия файл

@@ -145,8 +145,8 @@ def main(fobj, start, print=print):

print('Shift = 0')
print('Reduce = 1')
print("def Lark_StandAlone(transformer=None, postlex=None):")
print(" return Lark._load_from_dict(DATA, MEMO, transformer=transformer, postlex=postlex)")
print("def Lark_StandAlone(**kwargs):")
print(" return Lark._load_from_dict(DATA, MEMO, **kwargs)")





+ 2
- 1
lark/tree_matcher.py Целия файл

@@ -81,7 +81,8 @@ class TreeMatcher:
def __init__(self, parser):
# XXX TODO calling compile twice returns different results!
assert parser.options.maybe_placeholders == False
self.tokens, rules, _extra = parser.grammar.compile(parser.options.start)
# XXX TODO: we just ignore the potential existence of a postlexer
self.tokens, rules, _extra = parser.grammar.compile(parser.options.start, set())

self.rules_for_root = defaultdict(list)



+ 6
- 3
tests/test_cache.py Целия файл

@@ -86,6 +86,12 @@ class TestCache(TestCase):
parser = Lark(g, parser='lalr', lexer=CustomLexer, cache=True)
assert len(mock_fs.files) == 1
assert parser.parse('a') == Tree('start', [])

# Test options persistence
mock_fs.files = {}
Lark(g, parser="lalr", debug=True, cache=True)
parser = Lark(g, parser="lalr", debug=True, cache=True)
assert parser.options.options['debug']
finally:
lark_module.FS = fs

@@ -93,6 +99,3 @@ class TestCache(TestCase):

if __name__ == '__main__':
main()




+ 23
- 0
tests/test_parser.py Целия файл

@@ -1782,6 +1782,29 @@ def _make_parser_test(LEXER, PARSER):
%import bad_test.NUMBER
"""
self.assertRaises(IOError, _Lark, grammar)
@unittest.skipIf(LEXER=='dynamic', "%declare/postlex doesn't work with dynamic")
def test_postlex_declare(self): # Note: this test does a lot. maybe split it up?
class TestPostLexer:
def process(self, stream):
for t in stream:
if t.type == 'A':
t.type = 'B'
yield t
else:
yield t

always_accept = ('A',)

parser = _Lark("""
start: B
A: "A"
%declare B
""", postlex=TestPostLexer())

test_file = "A"
tree = parser.parse(test_file)
self.assertEqual(tree.children, [Token('B', 'A')])

def test_import_custom_sources(self):
custom_loader = FromPackageLoader('tests', ('grammars', ))


+ 1
- 1
tests/test_tools.py Целия файл

@@ -25,7 +25,7 @@ class TestStandalone(TestCase):
standalone.main(StringIO(grammar), 'start', print=pr)
code = code_buf.getvalue()

context = {}
context = {'__doc__': None}
exec(code, context)
return context



+ 1
- 1
tests/test_trees.py Целия файл

@@ -20,7 +20,7 @@ class TestTrees(TestCase):

def test_pickle(self):
s = copy.deepcopy(self.tree1)
data = pickle.dumps(s)
data = pickle.dumps(s, protocol=pickle.HIGHEST_PROTOCOL)
assert pickle.loads(data) == s

def test_repr_runnable(self):


Зареждане…
Отказ
Запис