|
- from collections import defaultdict
-
- from .tree import Tree
- from .visitors import Transformer_InPlace
- from .common import ParserConf
- from .lexer import Token, PatternStr
- from .parsers import earley
- from .grammar import Rule, Terminal, NonTerminal
-
-
-
- def is_discarded_terminal(t):
- return t.is_term and t.filter_out
-
- def is_iter_empty(i):
- try:
- _ = next(i)
- return False
- except StopIteration:
- return True
-
-
- class WriteTokensTransformer(Transformer_InPlace):
- "Inserts discarded tokens into their correct place, according to the rules of grammar"
-
- def __init__(self, tokens, term_subs):
- self.tokens = tokens
- self.term_subs = term_subs
-
- def __default__(self, data, children, meta):
- if not getattr(meta, 'match_tree', False):
- return Tree(data, children)
-
- iter_args = iter(children)
- to_write = []
- for sym in meta.orig_expansion:
- if is_discarded_terminal(sym):
- try:
- v = self.term_subs[sym.name](sym)
- except KeyError:
- t = self.tokens[sym.name]
- if not isinstance(t.pattern, PatternStr):
- raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
-
- v = t.pattern.value
- to_write.append(v)
- else:
- x = next(iter_args)
- if isinstance(x, list):
- to_write += x
- else:
- if isinstance(x, Token):
- assert Terminal(x.type) == sym, x
- else:
- assert NonTerminal(x.data) == sym, (sym, x)
- to_write.append(x)
-
- assert is_iter_empty(iter_args)
- return to_write
-
-
- class MatchTree(Tree):
- pass
-
- class MakeMatchTree:
- def __init__(self, name, expansion):
- self.name = name
- self.expansion = expansion
-
- def __call__(self, args):
- t = MatchTree(self.name, args)
- t.meta.match_tree = True
- t.meta.orig_expansion = self.expansion
- return t
-
- def best_from_group(seq, group_key, cmp_key):
- d = {}
- for item in seq:
- key = group_key(item)
- if key in d:
- v1 = cmp_key(item)
- v2 = cmp_key(d[key])
- if v2 > v1:
- d[key] = item
- else:
- d[key] = item
- return list(d.values())
-
-
- def make_recons_rule(origin, expansion, old_expansion):
- return Rule(origin, expansion, alias=MakeMatchTree(origin.name, old_expansion))
-
- def make_recons_rule_to_term(origin, term):
- return make_recons_rule(origin, [Terminal(term.name)], [term])
-
-
- class Reconstructor:
- """
- A Reconstructor that will, given a full parse Tree, generate source code.
- Pass `term_subs`, a dictionary of [Terminal name as str] to [output text as str]
- to say what discarded Terminals should be written as.
- """
- def __init__(self, parser, term_subs=None):
- # XXX TODO calling compile twice returns different results!
- assert parser.options.maybe_placeholders == False
- if term_subs is None:
- term_subs = {}
- tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start)
-
- self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}, term_subs)
- self.rules_for_root = defaultdict(list)
-
- self.rules = list(self._build_recons_rules(rules))
- self.rules.reverse()
-
- # Choose the best rule from each group of {rule => [rule.alias]}, since we only really need one derivation.
- self.rules = best_from_group(self.rules, lambda r: r, lambda r: -len(r.expansion))
-
- self.rules.sort(key=lambda r: len(r.expansion))
- self.parser = parser
- self._parser_cache = {}
-
- def _build_recons_rules(self, rules):
- expand1s = {r.origin for r in rules if r.options.expand1}
-
- aliases = defaultdict(list)
- for r in rules:
- if r.alias:
- aliases[r.origin].append( r.alias )
-
- rule_names = {r.origin for r in rules}
- nonterminals = {sym for sym in rule_names
- if sym.name.startswith('_') or sym in expand1s or sym in aliases }
-
- seen = set()
- for r in rules:
- recons_exp = [sym if sym in nonterminals else Terminal(sym.name)
- for sym in r.expansion if not is_discarded_terminal(sym)]
-
- # Skip self-recursive constructs
- if recons_exp == [r.origin] and r.alias is None:
- continue
-
- sym = NonTerminal(r.alias) if r.alias else r.origin
- rule = make_recons_rule(sym, recons_exp, r.expansion)
-
- if sym in expand1s and len(recons_exp) != 1:
- self.rules_for_root[sym.name].append(rule)
-
- if sym.name not in seen:
- yield make_recons_rule_to_term(sym, sym)
- seen.add(sym.name)
- else:
- if sym.name.startswith('_') or sym in expand1s:
- yield rule
- else:
- self.rules_for_root[sym.name].append(rule)
-
- for origin, rule_aliases in aliases.items():
- for alias in rule_aliases:
- yield make_recons_rule_to_term(origin, NonTerminal(alias))
- yield make_recons_rule_to_term(origin, origin)
-
- def _match(self, term, token):
- if isinstance(token, Tree):
- return Terminal(token.data) == term
- elif isinstance(token, Token):
- return term == Terminal(token.type)
- assert False
-
- def _reconstruct(self, tree):
- # TODO: ambiguity?
- try:
- parser = self._parser_cache[tree.data]
- except KeyError:
- rules = self.rules + best_from_group(
- self.rules_for_root[tree.data], lambda r: r, lambda r: -len(r.expansion)
- )
-
- rules.sort(key=lambda r: len(r.expansion))
-
- callbacks = {rule: rule.alias for rule in rules} # TODO pass callbacks through dict, instead of alias?
- parser = earley.Parser(ParserConf(rules, callbacks, [tree.data]), self._match, resolve_ambiguity=True)
- self._parser_cache[tree.data] = parser
-
- unreduced_tree = parser.parse(tree.children, tree.data) # find a full derivation
- assert unreduced_tree.data == tree.data
- res = self.write_tokens.transform(unreduced_tree)
- for item in res:
- if isinstance(item, Tree):
- for x in self._reconstruct(item):
- yield x
- else:
- yield item
-
- def reconstruct(self, tree):
- x = self._reconstruct(tree)
- y = []
- prev_item = ''
- for item in x:
- if prev_item and item and prev_item[-1].isalnum() and item[0].isalnum():
- y.append(' ')
- y.append(item)
- prev_item = item
- return ''.join(y)
|