This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

105 satır
3.3 KiB

  1. """Reconstruct text from a tree, based on Lark grammar"""
  2. import unicodedata
  3. from .tree import Tree
  4. from .visitors import Transformer_InPlace
  5. from .lexer import Token, PatternStr
  6. from .grammar import Terminal, NonTerminal
  7. from .tree_matcher import TreeMatcher, is_discarded_terminal
  8. def is_iter_empty(i):
  9. try:
  10. _ = next(i)
  11. return False
  12. except StopIteration:
  13. return True
  14. class WriteTokensTransformer(Transformer_InPlace):
  15. "Inserts discarded tokens into their correct place, according to the rules of grammar"
  16. def __init__(self, tokens, term_subs):
  17. self.tokens = tokens
  18. self.term_subs = term_subs
  19. def __default__(self, data, children, meta):
  20. if not getattr(meta, 'match_tree', False):
  21. return Tree(data, children)
  22. iter_args = iter(children)
  23. to_write = []
  24. for sym in meta.orig_expansion:
  25. if is_discarded_terminal(sym):
  26. try:
  27. v = self.term_subs[sym.name](sym)
  28. except KeyError:
  29. t = self.tokens[sym.name]
  30. if not isinstance(t.pattern, PatternStr):
  31. raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
  32. v = t.pattern.value
  33. to_write.append(v)
  34. else:
  35. x = next(iter_args)
  36. if isinstance(x, list):
  37. to_write += x
  38. else:
  39. if isinstance(x, Token):
  40. assert Terminal(x.type) == sym, x
  41. else:
  42. assert NonTerminal(x.data) == sym, (sym, x)
  43. to_write.append(x)
  44. assert is_iter_empty(iter_args)
  45. return to_write
  46. def _isalnum(x):
  47. # Categories defined here: https://www.python.org/dev/peps/pep-3131/
  48. return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc']
  49. class Reconstructor(TreeMatcher):
  50. """
  51. A Reconstructor that will, given a full parse Tree, generate source code.
  52. Note:
  53. The reconstructor cannot generate values from regexps. If you need to produce discarded
  54. regexes, such as newlines, use `term_subs` and provide default values for them.
  55. Paramters:
  56. parser: a Lark instance
  57. term_subs: a dictionary of [Terminal name as str] to [output text as str]
  58. """
  59. def __init__(self, parser, term_subs=None):
  60. TreeMatcher.__init__(self, parser)
  61. self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {})
  62. def _reconstruct(self, tree):
  63. unreduced_tree = self.match_tree(tree, tree.data)
  64. res = self.write_tokens.transform(unreduced_tree)
  65. for item in res:
  66. if isinstance(item, Tree):
  67. # TODO use orig_expansion.rulename to support templates
  68. for x in self._reconstruct(item):
  69. yield x
  70. else:
  71. yield item
  72. def reconstruct(self, tree, postproc=None):
  73. x = self._reconstruct(tree)
  74. if postproc:
  75. x = postproc(x)
  76. y = []
  77. prev_item = ''
  78. for item in x:
  79. if prev_item and item and _isalnum(prev_item[-1]) and _isalnum(item[0]):
  80. y.append(' ')
  81. y.append(item)
  82. prev_item = item
  83. return ''.join(y)