"""Reconstruct text from a tree, based on Lark grammar""" from typing import List, Dict, Union, Callable, Iterable, Optional import unicodedata from .lark import Lark from .tree import Tree from .visitors import Transformer_InPlace from .lexer import Token, PatternStr, TerminalDef from .grammar import Terminal, NonTerminal, Symbol from .tree_matcher import TreeMatcher, is_discarded_terminal from .utils import is_id_continue def is_iter_empty(i): try: _ = next(i) return False except StopIteration: return True class WriteTokensTransformer(Transformer_InPlace): "Inserts discarded tokens into their correct place, according to the rules of grammar" tokens: Dict[str, TerminalDef] term_subs: Dict[str, Callable[[Symbol], str]] def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None: self.tokens = tokens self.term_subs = term_subs def __default__(self, data, children, meta): if not getattr(meta, 'match_tree', False): return Tree(data, children) iter_args = iter(children) to_write = [] for sym in meta.orig_expansion: if is_discarded_terminal(sym): try: v = self.term_subs[sym.name](sym) except KeyError: t = self.tokens[sym.name] if not isinstance(t.pattern, PatternStr): raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t) v = t.pattern.value to_write.append(v) else: x = next(iter_args) if isinstance(x, list): to_write += x else: if isinstance(x, Token): assert Terminal(x.type) == sym, x else: assert NonTerminal(x.data) == sym, (sym, x) to_write.append(x) assert is_iter_empty(iter_args) return to_write class Reconstructor(TreeMatcher): """ A Reconstructor that will, given a full parse Tree, generate source code. Note: The reconstructor cannot generate values from regexps. If you need to produce discarded regexes, such as newlines, use `term_subs` and provide default values for them. Paramters: parser: a Lark instance term_subs: a dictionary of [Terminal name as str] to [output text as str] """ write_tokens: WriteTokensTransformer def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None: TreeMatcher.__init__(self, parser) self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) def _reconstruct(self, tree): unreduced_tree = self.match_tree(tree, tree.data) res = self.write_tokens.transform(unreduced_tree) for item in res: if isinstance(item, Tree): # TODO use orig_expansion.rulename to support templates for x in self._reconstruct(item): yield x else: yield item def reconstruct(self, tree: Tree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str: x = self._reconstruct(tree) if postproc: x = postproc(x) y = [] prev_item = '' for item in x: if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]): y.append(' ') y.append(item) prev_item = item return ''.join(y)