"""Reconstruct text from a tree, based on Lark grammar""" import unicodedata from .tree import Tree from .visitors import Transformer_InPlace from .lexer import Token, PatternStr from .grammar import Terminal, NonTerminal from .tree_matcher import TreeMatcher, is_discarded_terminal from .utils import is_id_continue def is_iter_empty(i): try: _ = next(i) return False except StopIteration: return True class WriteTokensTransformer(Transformer_InPlace): "Inserts discarded tokens into their correct place, according to the rules of grammar" def __init__(self, tokens, term_subs): self.tokens = tokens self.term_subs = term_subs def __default__(self, data, children, meta): if not getattr(meta, 'match_tree', False): return Tree(data, children) iter_args = iter(children) to_write = [] for sym in meta.orig_expansion: if is_discarded_terminal(sym): try: v = self.term_subs[sym.name](sym) except KeyError: t = self.tokens[sym.name] if not isinstance(t.pattern, PatternStr): raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t) v = t.pattern.value to_write.append(v) else: x = next(iter_args) if isinstance(x, list): to_write += x else: if isinstance(x, Token): assert Terminal(x.type) == sym, x else: assert NonTerminal(x.data) == sym, (sym, x) to_write.append(x) assert is_iter_empty(iter_args) return to_write class Reconstructor(TreeMatcher): """ A Reconstructor that will, given a full parse Tree, generate source code. Note: The reconstructor cannot generate values from regexps. If you need to produce discarded regexes, such as newlines, use `term_subs` and provide default values for them. Paramters: parser: a Lark instance term_subs: a dictionary of [Terminal name as str] to [output text as str] """ def __init__(self, parser, term_subs=None): TreeMatcher.__init__(self, parser) self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) def _reconstruct(self, tree): unreduced_tree = self.match_tree(tree, tree.data) res = self.write_tokens.transform(unreduced_tree) for item in res: if isinstance(item, Tree): # TODO use orig_expansion.rulename to support templates for x in self._reconstruct(item): yield x else: yield item def reconstruct(self, tree, postproc=None, insert_spaces=True): x = self._reconstruct(tree) if postproc: x = postproc(x) y = [] prev_item = '' for item in x: if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]): y.append(' ') y.append(item) prev_item = item return ''.join(y)