This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Non puoi selezionare più di 25 argomenti Gli argomenti devono iniziare con una lettera o un numero, possono includere trattini ('-') e possono essere lunghi fino a 35 caratteri.

105 righe
3.3 KiB

  1. """Reconstruct text from a tree, based on Lark grammar"""
  2. import unicodedata
  3. from .tree import Tree
  4. from .visitors import Transformer_InPlace
  5. from .lexer import Token, PatternStr
  6. from .grammar import Terminal, NonTerminal
  7. from .tree_matcher import TreeMatcher, is_discarded_terminal
  8. def is_iter_empty(i):
  9. try:
  10. _ = next(i)
  11. return False
  12. except StopIteration:
  13. return True
  14. class WriteTokensTransformer(Transformer_InPlace):
  15. "Inserts discarded tokens into their correct place, according to the rules of grammar"
  16. def __init__(self, tokens, term_subs):
  17. self.tokens = tokens
  18. self.term_subs = term_subs
  19. def __default__(self, data, children, meta):
  20. if not getattr(meta, 'match_tree', False):
  21. return Tree(data, children)
  22. iter_args = iter(children)
  23. to_write = []
  24. for sym in meta.orig_expansion:
  25. if is_discarded_terminal(sym):
  26. try:
  27. v = self.term_subs[sym.name](sym)
  28. except KeyError:
  29. t = self.tokens[sym.name]
  30. if not isinstance(t.pattern, PatternStr):
  31. raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
  32. v = t.pattern.value
  33. to_write.append(v)
  34. else:
  35. x = next(iter_args)
  36. if isinstance(x, list):
  37. to_write += x
  38. else:
  39. if isinstance(x, Token):
  40. assert Terminal(x.type) == sym, x
  41. else:
  42. assert NonTerminal(x.data) == sym, (sym, x)
  43. to_write.append(x)
  44. assert is_iter_empty(iter_args)
  45. return to_write
  46. def _isalnum(x):
  47. # Categories defined here: https://www.python.org/dev/peps/pep-3131/
  48. return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc']
  49. class Reconstructor(TreeMatcher):
  50. """
  51. A Reconstructor that will, given a full parse Tree, generate source code.
  52. Note:
  53. The reconstructor cannot generate values from regexps. If you need to produce discarded
  54. regexes, such as newlines, use `term_subs` and provide default values for them.
  55. Paramters:
  56. parser: a Lark instance
  57. term_subs: a dictionary of [Terminal name as str] to [output text as str]
  58. """
  59. def __init__(self, parser, term_subs=None):
  60. TreeMatcher.__init__(self, parser)
  61. self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {})
  62. def _reconstruct(self, tree):
  63. unreduced_tree = self.match_tree(tree, tree.data)
  64. res = self.write_tokens.transform(unreduced_tree)
  65. for item in res:
  66. if isinstance(item, Tree):
  67. # TODO use orig_expansion.rulename to support templates
  68. for x in self._reconstruct(item):
  69. yield x
  70. else:
  71. yield item
  72. def reconstruct(self, tree, postproc=None):
  73. x = self._reconstruct(tree)
  74. if postproc:
  75. x = postproc(x)
  76. y = []
  77. prev_item = ''
  78. for item in x:
  79. if prev_item and item and _isalnum(prev_item[-1]) and _isalnum(item[0]):
  80. y.append(' ')
  81. y.append(item)
  82. prev_item = item
  83. return ''.join(y)