from collections import defaultdict from .tree import Tree from .visitors import Transformer_InPlace from .common import ParserConf from .lexer import Token, PatternStr from .parsers import earley, resolve_ambig from .grammar import Rule, Terminal, NonTerminal def is_discarded_terminal(t): return t.is_term and t.filter_out def is_iter_empty(i): try: _ = next(i) return False except StopIteration: return True class WriteTokensTransformer(Transformer_InPlace): def __init__(self, tokens): self.tokens = tokens def __default__(self, data, children, meta): # if not isinstance(t, MatchTree): # return t if not getattr(meta, 'match_tree', False): return Tree(data, children) iter_args = iter(children) to_write = [] for sym in meta.orig_expansion: if is_discarded_terminal(sym): t = self.tokens[sym.name] assert isinstance(t.pattern, PatternStr) to_write.append(t.pattern.value) else: x = next(iter_args) if isinstance(x, list): to_write += x else: if isinstance(x, Token): assert Terminal(x.type) == sym, x else: assert NonTerminal(x.data) == sym, (sym, x) to_write.append(x) assert is_iter_empty(iter_args) return to_write class MatchTree(Tree): pass class MakeMatchTree: def __init__(self, name, expansion): self.name = name self.expansion = expansion def __call__(self, args): t = MatchTree(self.name, args) t.meta.match_tree = True t.meta.orig_expansion = self.expansion return t class Reconstructor: def __init__(self, parser): # XXX TODO calling compile twice returns different results! tokens, rules, _grammar_extra = parser.grammar.compile() self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}) self.rules = list(self._build_recons_rules(rules)) def _build_recons_rules(self, rules): expand1s = {r.origin for r in rules if r.options and r.options.expand1} aliases = defaultdict(list) for r in rules: if r.alias: aliases[r.origin].append( r.alias ) rule_names = {r.origin for r in rules} nonterminals = {sym for sym in rule_names if sym.name.startswith('_') or sym in expand1s or sym in aliases } for r in rules: recons_exp = [sym if sym in nonterminals else Terminal(sym.name) for sym in r.expansion if not is_discarded_terminal(sym)] # Skip self-recursive constructs if recons_exp == [r.origin]: continue sym = NonTerminal(r.alias) if r.alias else r.origin yield Rule(sym, recons_exp, MakeMatchTree(sym.name, r.expansion)) for origin, rule_aliases in aliases.items(): for alias in rule_aliases: yield Rule(origin, [Terminal(alias)], MakeMatchTree(origin.name, [NonTerminal(alias)])) yield Rule(origin, [Terminal(origin.name)], MakeMatchTree(origin.name, [origin])) def _match(self, term, token): if isinstance(token, Tree): return Terminal(token.data) == term elif isinstance(token, Token): return term == Terminal(token.type) assert False def _reconstruct(self, tree): # TODO: ambiguity? parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match, resolve_ambiguity=resolve_ambig.standard_resolve_ambig) unreduced_tree = parser.parse(tree.children) # find a full derivation assert unreduced_tree.data == tree.data res = self.write_tokens.transform(unreduced_tree) for item in res: if isinstance(item, Tree): for x in self._reconstruct(item): yield x else: yield item def reconstruct(self, tree): return ''.join(self._reconstruct(tree))