from collections import defaultdict from .tree import Tree, Transformer_NoRecurse from .common import is_terminal, ParserConf, PatternStr from .lexer import Token from .parsers import earley, resolve_ambig from .grammar import Rule def is_discarded_terminal(t): return is_terminal(t) and t.startswith('_') def is_iter_empty(i): try: _ = next(i) return False except StopIteration: return True class WriteTokensTransformer(Transformer_NoRecurse): def __init__(self, tokens): self.tokens = tokens def __default__(self, t): if not isinstance(t, MatchTree): return t iter_args = iter(t.children) to_write = [] for sym in t.orig_expansion: if is_discarded_terminal(sym): t = self.tokens[sym] assert isinstance(t.pattern, PatternStr) to_write.append(t.pattern.value) else: x = next(iter_args) if isinstance(x, list): to_write += x else: if isinstance(x, Token): assert x.type == sym, x else: assert x.data == sym, (sym, x) to_write.append(x) assert is_iter_empty(iter_args) return to_write class MatchTree(Tree): pass class MakeMatchTree: def __init__(self, name, expansion): self.name = name self.expansion = expansion def __call__(self, args): t = MatchTree(self.name, args) t.orig_expansion = self.expansion return t class Reconstructor: def __init__(self, parser): # Recreate the rules to assume a standard lexer _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever') expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1} d = defaultdict(list) for r in rules: # Rules can match their alias if r.alias: d[r.alias].append(r.expansion) d[r.origin].append([r.alias]) else: d[r.origin].append(r.expansion) # Expanded rules can match their own terminal for sym in r.expansion: if sym in expand1s: d[sym].append([sym.upper()]) reduced_rules = defaultdict(list) for name, expansions in d.items(): for expansion in expansions: reduced = [sym if sym.startswith('_') or sym in expand1s else sym.upper() for sym in expansion if not is_discarded_terminal(sym)] reduced_rules[name, tuple(reduced)].append(expansion) self.rules = [Rule(name, list(reduced), MakeMatchTree(name, expansions[0]), None) for (name, reduced), expansions in reduced_rules.items()] self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens}) def _match(self, term, token): if isinstance(token, Tree): return token.data.upper() == term elif isinstance(token, Token): return term == token.type assert False def _reconstruct(self, tree): # TODO: ambiguity? parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match, resolve_ambiguity=resolve_ambig.standard_resolve_ambig) unreduced_tree = parser.parse(tree.children) # find a full derivation assert unreduced_tree.data == tree.data res = self.write_tokens.transform(unreduced_tree) for item in res: if isinstance(item, Tree): for x in self._reconstruct(item): yield x else: yield item def reconstruct(self, tree): return ''.join(self._reconstruct(tree))