Browse Source

Expand rules reconstructor test passes

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.10.0
julienmalard 4 years ago
parent
commit
bca6cfa458
2 changed files with 39 additions and 9 deletions
  1. +38
    -8
      lark/reconstruct.py
  2. +1
    -1
      tests/test_nearley/nearley

+ 38
- 8
lark/reconstruct.py View File

@@ -86,6 +86,14 @@ def best_from_group(seq, group_key, cmp_key):
d[key] = item d[key] = item
return list(d.values()) return list(d.values())



def make_recons_rule(origin, expansion, old_expansion):
return Rule(origin, expansion, alias=MakeMatchTree(origin.name, old_expansion))

def make_recons_rule_to_term(origin, term):
return make_recons_rule(origin, [Terminal(term.name)], [term])


class Reconstructor: class Reconstructor:
def __init__(self, parser, term_subs={}): def __init__(self, parser, term_subs={}):
# XXX TODO calling compile twice returns different results! # XXX TODO calling compile twice returns different results!
@@ -93,6 +101,8 @@ class Reconstructor:
tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start) tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start)


self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}, term_subs) self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}, term_subs)
self.rules_for_root = defaultdict(list)

self.rules = list(self._build_recons_rules(rules)) self.rules = list(self._build_recons_rules(rules))
self.rules.reverse() self.rules.reverse()


@@ -100,9 +110,8 @@ class Reconstructor:
self.rules = best_from_group(self.rules, lambda r: r, lambda r: -len(r.expansion)) self.rules = best_from_group(self.rules, lambda r: r, lambda r: -len(r.expansion))


self.rules.sort(key=lambda r: len(r.expansion)) self.rules.sort(key=lambda r: len(r.expansion))
callbacks = {rule: rule.alias for rule in self.rules} # TODO pass callbacks through dict, instead of alias?
self.parser = earley.Parser(ParserConf(self.rules, callbacks, parser.options.start),
self._match, resolve_ambiguity=True)
self.parser = parser
self._parser_cache = {}


def _build_recons_rules(self, rules): def _build_recons_rules(self, rules):
expand1s = {r.origin for r in rules if r.options.expand1} expand1s = {r.origin for r in rules if r.options.expand1}
@@ -116,22 +125,35 @@ class Reconstructor:
nonterminals = {sym for sym in rule_names nonterminals = {sym for sym in rule_names
if sym.name.startswith('_') or sym in expand1s or sym in aliases } if sym.name.startswith('_') or sym in expand1s or sym in aliases }


seen = set()
for r in rules: for r in rules:
recons_exp = [sym if sym in nonterminals else Terminal(sym.name) recons_exp = [sym if sym in nonterminals else Terminal(sym.name)
for sym in r.expansion if not is_discarded_terminal(sym)] for sym in r.expansion if not is_discarded_terminal(sym)]


# Skip self-recursive constructs # Skip self-recursive constructs
if recons_exp == [r.origin]:
if recons_exp == [r.origin] and r.alias is None:
continue continue


sym = NonTerminal(r.alias) if r.alias else r.origin sym = NonTerminal(r.alias) if r.alias else r.origin
rule = make_recons_rule(sym, recons_exp, r.expansion)

if sym in expand1s and len(recons_exp) != 1:
self.rules_for_root[sym.name].append(rule)


yield Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion))
if sym.name not in seen:
yield make_recons_rule_to_term(sym, sym)
seen.add(sym.name)
else:
if sym.name.startswith('_') or sym in expand1s:
yield rule
else:
self.rules_for_root[sym.name].append(rule)
# yield rule # Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion))


for origin, rule_aliases in aliases.items(): for origin, rule_aliases in aliases.items():
for alias in rule_aliases: for alias in rule_aliases:
yield Rule(origin, [Terminal(alias)], alias=MakeMatchTree(origin.name, [NonTerminal(alias)]))
yield Rule(origin, [Terminal(origin.name)], alias=MakeMatchTree(origin.name, [origin]))
yield make_recons_rule_to_term(origin, NonTerminal(alias))
yield make_recons_rule_to_term(origin, origin)


def _match(self, term, token): def _match(self, term, token):
if isinstance(token, Tree): if isinstance(token, Tree):
@@ -142,7 +164,15 @@ class Reconstructor:


def _reconstruct(self, tree): def _reconstruct(self, tree):
# TODO: ambiguity? # TODO: ambiguity?
unreduced_tree = self.parser.parse(tree.children, tree.data) # find a full derivation
try:
parser = self._parser_cache[tree.data]
except KeyError:
rules = self.rules + self.rules_for_root[tree.data]
callbacks = {rule: rule.alias for rule in rules} # TODO pass callbacks through dict, instead of alias?
parser = earley.Parser(ParserConf(rules, callbacks, [tree.data]), self._match, resolve_ambiguity=True)
self._parser_cache[tree.data] = parser

unreduced_tree = parser.parse(tree.children, tree.data) # find a full derivation
assert unreduced_tree.data == tree.data assert unreduced_tree.data == tree.data
res = self.write_tokens.transform(unreduced_tree) res = self.write_tokens.transform(unreduced_tree)
for item in res: for item in res:


+ 1
- 1
tests/test_nearley/nearley

@@ -1 +1 @@
Subproject commit a46b37471db486db0f6e1ce6a2934fb238346b44
Subproject commit cf8925f729bde741a3076c5856c0c0862bc7f5de

Loading…
Cancel
Save