diff --git a/lark/reconstruct.py b/lark/reconstruct.py index 1e3adc7..d6eccf5 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -86,6 +86,14 @@ def best_from_group(seq, group_key, cmp_key): d[key] = item return list(d.values()) + +def make_recons_rule(origin, expansion, old_expansion): + return Rule(origin, expansion, alias=MakeMatchTree(origin.name, old_expansion)) + +def make_recons_rule_to_term(origin, term): + return make_recons_rule(origin, [Terminal(term.name)], [term]) + + class Reconstructor: def __init__(self, parser, term_subs={}): # XXX TODO calling compile twice returns different results! @@ -93,6 +101,8 @@ class Reconstructor: tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start) self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}, term_subs) + self.rules_for_root = defaultdict(list) + self.rules = list(self._build_recons_rules(rules)) self.rules.reverse() @@ -100,9 +110,8 @@ class Reconstructor: self.rules = best_from_group(self.rules, lambda r: r, lambda r: -len(r.expansion)) self.rules.sort(key=lambda r: len(r.expansion)) - callbacks = {rule: rule.alias for rule in self.rules} # TODO pass callbacks through dict, instead of alias? - self.parser = earley.Parser(ParserConf(self.rules, callbacks, parser.options.start), - self._match, resolve_ambiguity=True) + self.parser = parser + self._parser_cache = {} def _build_recons_rules(self, rules): expand1s = {r.origin for r in rules if r.options.expand1} @@ -116,22 +125,35 @@ class Reconstructor: nonterminals = {sym for sym in rule_names if sym.name.startswith('_') or sym in expand1s or sym in aliases } + seen = set() for r in rules: recons_exp = [sym if sym in nonterminals else Terminal(sym.name) for sym in r.expansion if not is_discarded_terminal(sym)] # Skip self-recursive constructs - if recons_exp == [r.origin]: + if recons_exp == [r.origin] and r.alias is None: continue sym = NonTerminal(r.alias) if r.alias else r.origin + rule = make_recons_rule(sym, recons_exp, r.expansion) + + if sym in expand1s and len(recons_exp) != 1: + self.rules_for_root[sym.name].append(rule) - yield Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion)) + if sym.name not in seen: + yield make_recons_rule_to_term(sym, sym) + seen.add(sym.name) + else: + if sym.name.startswith('_') or sym in expand1s: + yield rule + else: + self.rules_for_root[sym.name].append(rule) + # yield rule # Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion)) for origin, rule_aliases in aliases.items(): for alias in rule_aliases: - yield Rule(origin, [Terminal(alias)], alias=MakeMatchTree(origin.name, [NonTerminal(alias)])) - yield Rule(origin, [Terminal(origin.name)], alias=MakeMatchTree(origin.name, [origin])) + yield make_recons_rule_to_term(origin, NonTerminal(alias)) + yield make_recons_rule_to_term(origin, origin) def _match(self, term, token): if isinstance(token, Tree): @@ -142,7 +164,15 @@ class Reconstructor: def _reconstruct(self, tree): # TODO: ambiguity? - unreduced_tree = self.parser.parse(tree.children, tree.data) # find a full derivation + try: + parser = self._parser_cache[tree.data] + except KeyError: + rules = self.rules + self.rules_for_root[tree.data] + callbacks = {rule: rule.alias for rule in rules} # TODO pass callbacks through dict, instead of alias? + parser = earley.Parser(ParserConf(rules, callbacks, [tree.data]), self._match, resolve_ambiguity=True) + self._parser_cache[tree.data] = parser + + unreduced_tree = parser.parse(tree.children, tree.data) # find a full derivation assert unreduced_tree.data == tree.data res = self.write_tokens.transform(unreduced_tree) for item in res: diff --git a/tests/test_nearley/nearley b/tests/test_nearley/nearley index a46b374..cf8925f 160000 --- a/tests/test_nearley/nearley +++ b/tests/test_nearley/nearley @@ -1 +1 @@ -Subproject commit a46b37471db486db0f6e1ce6a2934fb238346b44 +Subproject commit cf8925f729bde741a3076c5856c0c0862bc7f5de