Pārlūkot izejas kodu

Merge branch 'MegaIng-keep_all_maybe'

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
Erez Sh pirms 4 gadiem
vecāks
revīzija
131012b893
4 mainītis faili ar 41 papildinājumiem un 26 dzēšanām
  1. +8
    -2
      lark/lark.py
  2. +27
    -21
      lark/load_grammar.py
  3. +2
    -3
      lark/parse_tree_builder.py
  4. +4
    -0
      tests/test_parser.py

+ 8
- 2
lark/lark.py Parādīt failu

@@ -274,7 +274,7 @@ class Lark(Serialize):
assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', )

# Parse the grammar file and compose the grammars (TODO)
self.grammar = load_grammar(grammar, self.source, re_module)
self.grammar = load_grammar(grammar, self.source, re_module, self.options.keep_all_tokens)

if self.options.postlex is not None:
terminals_to_keep = set(self.options.postlex.always_accept)
@@ -335,7 +335,13 @@ class Lark(Serialize):
self._callbacks = None
# we don't need these callbacks if we aren't building a tree
if self.options.ambiguity != 'forest':
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
self._parse_tree_builder = ParseTreeBuilder(
self.rules,
self.options.tree_class or Tree,
self.options.propagate_positions,
self.options.parser!='lalr' and self.options.ambiguity=='explicit',
self.options.maybe_placeholders
)
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)

def _build_parser(self):


+ 27
- 21
lark/load_grammar.py Parādīt failu

@@ -650,22 +650,6 @@ class Grammar:


_imported_grammars = {}
def import_grammar(grammar_path, re_, base_paths=[]):
if grammar_path not in _imported_grammars:
import_paths = base_paths + IMPORT_PATHS
for import_path in import_paths:
with suppress(IOError):
joined_path = os.path.join(import_path, grammar_path)
with open(joined_path, encoding='utf8') as f:
text = f.read()
grammar = load_grammar(text, joined_path, re_)
_imported_grammars[grammar_path] = grammar
break
else:
open(grammar_path, encoding='utf8')
assert False

return _imported_grammars[grammar_path]

def import_from_grammar_into_namespace(grammar, namespace, aliases):
"""Returns all rules and terminals of grammar, prepended
@@ -803,7 +787,7 @@ class GrammarLoader:
('%ignore expects a value', ['%ignore %import\n']),
]

def __init__(self, re_module):
def __init__(self, re_module, global_keep_all_tokens):
terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]

rules = [options_from_rule(name, None, x) for name, x in RULES.items()]
@@ -816,6 +800,24 @@ class GrammarLoader:

self.canonize_tree = CanonizeTree()
self.re_module = re_module
self.global_keep_all_tokens = global_keep_all_tokens

def import_grammar(self, grammar_path, base_paths=[]):
if grammar_path not in _imported_grammars:
import_paths = base_paths + IMPORT_PATHS
for import_path in import_paths:
with suppress(IOError):
joined_path = os.path.join(import_path, grammar_path)
with open(joined_path, encoding='utf8') as f:
text = f.read()
grammar = self.load_grammar(text, joined_path)
_imported_grammars[grammar_path] = grammar
break
else:
open(grammar_path, encoding='utf8') # Force a file not found error
assert False

return _imported_grammars[grammar_path]

def load_grammar(self, grammar_text, grammar_name='<?>'):
"Parse grammar_text, verify, and create Grammar object. Display nice messages on error."
@@ -901,7 +903,7 @@ class GrammarLoader:
# import grammars
for dotted_path, (base_paths, aliases) in imports.items():
grammar_path = os.path.join(*dotted_path) + EXT
g = import_grammar(grammar_path, self.re_module, base_paths=base_paths)
g = self.import_grammar(grammar_path, base_paths=base_paths)
new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases)

term_defs += new_td
@@ -946,7 +948,11 @@ class GrammarLoader:
rules = rule_defs

rule_names = {}
for name, params, _x, _o in rules:
for name, params, _x, option in rules:
# We can't just simply not throw away the tokens later, we need option.keep_all_tokens to correctly generate maybe_placeholders
if self.global_keep_all_tokens:
option.keep_all_tokens = True

if name.startswith('__'):
raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name)
if name in rule_names:
@@ -981,5 +987,5 @@ class GrammarLoader:



def load_grammar(grammar, source, re_):
return GrammarLoader(re_).load_grammar(grammar, source)
def load_grammar(grammar, source, re_, global_keep_all_tokens):
return GrammarLoader(re_, global_keep_all_tokens).load_grammar(grammar, source)

+ 2
- 3
lark/parse_tree_builder.py Parādīt failu

@@ -299,10 +299,9 @@ def apply_visit_wrapper(func, name, wrapper):


class ParseTreeBuilder:
def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False):
def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False):
self.tree_class = tree_class
self.propagate_positions = propagate_positions
self.always_keep_all_tokens = keep_all_tokens
self.ambiguous = ambiguous
self.maybe_placeholders = maybe_placeholders

@@ -311,7 +310,7 @@ class ParseTreeBuilder:
def _init_builders(self, rules):
for rule in rules:
options = rule.options
keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens
keep_all_tokens = options.keep_all_tokens
expand_single_child = options.expand1

wrapper_chain = list(filter(None, [


+ 4
- 0
tests/test_parser.py Parādīt failu

@@ -2064,6 +2064,10 @@ def _make_parser_test(LEXER, PARSER):
# Anonymous tokens shouldn't count
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
self.assertEqual(p.parse("").children, [])
# Unless keep_all_tokens=True
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True, keep_all_tokens=True)
self.assertEqual(p.parse("").children, [None, None, None])

# All invisible constructs shouldn't count
p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]


Notiek ielāde…
Atcelt
Saglabāt