diff --git a/lark/reconstruct.py b/lark/reconstruct.py index be1a13a..08fcdc6 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -1,9 +1,9 @@ from collections import defaultdict -from .tree import Tree +from .tree import Tree, Transformer_NoRecurse from .common import is_terminal, ParserConf, PatternStr from .lexer import Token -from .parsers import earley +from .parsers import earley, resolve_ambig from .grammar import Rule @@ -18,57 +18,82 @@ def is_iter_empty(i): except StopIteration: return True +class WriteTokensTransformer(Transformer_NoRecurse): + def __init__(self, tokens): + self.tokens = tokens + + def __default__(self, t): + if not isinstance(t, MatchTree): + return t + + iter_args = iter(t.children) + to_write = [] + for sym in t.orig_expansion: + if is_discarded_terminal(sym): + t = self.tokens[sym] + assert isinstance(t.pattern, PatternStr) + to_write.append(t.pattern.value) + else: + x = next(iter_args) + if isinstance(x, list): + to_write += x + else: + if isinstance(x, Token): + assert x.type == sym, x + else: + assert x.data == sym, (sym, x) + to_write.append(x) + + assert is_iter_empty(iter_args) + return to_write + + +class MatchTree(Tree): + pass + +class MakeMatchTree: + def __init__(self, name, expansion): + self.name = name + self.expansion = expansion + + def __call__(self, args): + t = MatchTree(self.name, args) + t.orig_expansion = self.expansion + return t + class Reconstructor: def __init__(self, parser): # Recreate the rules to assume a standard lexer _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever') - tokens = {t.name:t for t in _tokens} - - - class WriteTokens: - def __init__(self, name, expansion): - self.name = name - self.expansion = expansion - - def f(self, args): - iter_args = iter(args) - to_write = [] - for sym in self.expansion: - if is_discarded_terminal(sym): - t = tokens[sym] - assert isinstance(t.pattern, PatternStr) - to_write.append(t.pattern.value) - else: - x = next(iter_args) - if isinstance(x, list): - to_write += x - else: - if isinstance(x, Token): - assert x.type == sym, x - else: - assert x.data == sym, x - to_write.append(x) - - assert is_iter_empty(iter_args) - return to_write expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1} d = defaultdict(list) for r in rules: + # Rules can match their alias if r.alias: d[r.alias].append(r.expansion) d[r.origin].append([r.alias]) else: d[r.origin].append(r.expansion) - self.rules = [] + # Expanded rules can match their own terminal + for sym in r.expansion: + if sym in expand1s: + d[sym].append([sym.upper()]) + + reduced_rules = defaultdict(list) for name, expansions in d.items(): for expansion in expansions: reduced = [sym if sym.startswith('_') or sym in expand1s else sym.upper() for sym in expansion if not is_discarded_terminal(sym)] - self.rules.append(Rule(name, reduced, WriteTokens(name, expansion).f, None)) + reduced_rules[name, tuple(reduced)].append(expansion) + + self.rules = [Rule(name, list(reduced), MakeMatchTree(name, expansions[0]), None) + for (name, reduced), expansions in reduced_rules.items()] + + self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens}) def _match(self, term, token): @@ -80,8 +105,10 @@ class Reconstructor: def _reconstruct(self, tree): # TODO: ambiguity? - parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match) - res = parser.parse(tree.children) + parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match, resolve_ambiguity=resolve_ambig.standard_resolve_ambig) + unreduced_tree = parser.parse(tree.children) # find a full derivation + assert unreduced_tree.data == tree.data + res = self.write_tokens.transform(unreduced_tree) for item in res: if isinstance(item, Tree): for x in self._reconstruct(item):