This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

102 lines
3.2 KiB

  1. """Reconstruct text from a tree, based on Lark grammar"""
  2. import unicodedata
  3. from .tree import Tree
  4. from .visitors import Transformer_InPlace
  5. from .lexer import Token, PatternStr
  6. from .grammar import Terminal, NonTerminal
  7. from .tree_matcher import TreeMatcher, is_discarded_terminal
  8. from .utils import is_id_continue
  9. def is_iter_empty(i):
  10. try:
  11. _ = next(i)
  12. return False
  13. except StopIteration:
  14. return True
  15. class WriteTokensTransformer(Transformer_InPlace):
  16. "Inserts discarded tokens into their correct place, according to the rules of grammar"
  17. def __init__(self, tokens, term_subs):
  18. self.tokens = tokens
  19. self.term_subs = term_subs
  20. def __default__(self, data, children, meta):
  21. if not getattr(meta, 'match_tree', False):
  22. return Tree(data, children)
  23. iter_args = iter(children)
  24. to_write = []
  25. for sym in meta.orig_expansion:
  26. if is_discarded_terminal(sym):
  27. try:
  28. v = self.term_subs[sym.name](sym)
  29. except KeyError:
  30. t = self.tokens[sym.name]
  31. if not isinstance(t.pattern, PatternStr):
  32. raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
  33. v = t.pattern.value
  34. to_write.append(v)
  35. else:
  36. x = next(iter_args)
  37. if isinstance(x, list):
  38. to_write += x
  39. else:
  40. if isinstance(x, Token):
  41. assert Terminal(x.type) == sym, x
  42. else:
  43. assert NonTerminal(x.data) == sym, (sym, x)
  44. to_write.append(x)
  45. assert is_iter_empty(iter_args)
  46. return to_write
  47. class Reconstructor(TreeMatcher):
  48. """
  49. A Reconstructor that will, given a full parse Tree, generate source code.
  50. Note:
  51. The reconstructor cannot generate values from regexps. If you need to produce discarded
  52. regexes, such as newlines, use `term_subs` and provide default values for them.
  53. Paramters:
  54. parser: a Lark instance
  55. term_subs: a dictionary of [Terminal name as str] to [output text as str]
  56. """
  57. def __init__(self, parser, term_subs=None):
  58. TreeMatcher.__init__(self, parser)
  59. self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {})
  60. def _reconstruct(self, tree):
  61. unreduced_tree = self.match_tree(tree, tree.data)
  62. res = self.write_tokens.transform(unreduced_tree)
  63. for item in res:
  64. if isinstance(item, Tree):
  65. # TODO use orig_expansion.rulename to support templates
  66. for x in self._reconstruct(item):
  67. yield x
  68. else:
  69. yield item
  70. def reconstruct(self, tree, postproc=None, insert_spaces=True):
  71. x = self._reconstruct(tree)
  72. if postproc:
  73. x = postproc(x)
  74. y = []
  75. prev_item = ''
  76. for item in x:
  77. if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]):
  78. y.append(' ')
  79. y.append(item)
  80. prev_item = item
  81. return ''.join(y)