This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

102 rindas
3.2 KiB

  1. """Reconstruct text from a tree, based on Lark grammar"""
  2. import unicodedata
  3. from .tree import Tree
  4. from .visitors import Transformer_InPlace
  5. from .lexer import Token, PatternStr
  6. from .grammar import Terminal, NonTerminal
  7. from .tree_matcher import TreeMatcher, is_discarded_terminal
  8. from .utils import is_id_continue
  9. def is_iter_empty(i):
  10. try:
  11. _ = next(i)
  12. return False
  13. except StopIteration:
  14. return True
  15. class WriteTokensTransformer(Transformer_InPlace):
  16. "Inserts discarded tokens into their correct place, according to the rules of grammar"
  17. def __init__(self, tokens, term_subs):
  18. self.tokens = tokens
  19. self.term_subs = term_subs
  20. def __default__(self, data, children, meta):
  21. if not getattr(meta, 'match_tree', False):
  22. return Tree(data, children)
  23. iter_args = iter(children)
  24. to_write = []
  25. for sym in meta.orig_expansion:
  26. if is_discarded_terminal(sym):
  27. try:
  28. v = self.term_subs[sym.name](sym)
  29. except KeyError:
  30. t = self.tokens[sym.name]
  31. if not isinstance(t.pattern, PatternStr):
  32. raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
  33. v = t.pattern.value
  34. to_write.append(v)
  35. else:
  36. x = next(iter_args)
  37. if isinstance(x, list):
  38. to_write += x
  39. else:
  40. if isinstance(x, Token):
  41. assert Terminal(x.type) == sym, x
  42. else:
  43. assert NonTerminal(x.data) == sym, (sym, x)
  44. to_write.append(x)
  45. assert is_iter_empty(iter_args)
  46. return to_write
  47. class Reconstructor(TreeMatcher):
  48. """
  49. A Reconstructor that will, given a full parse Tree, generate source code.
  50. Note:
  51. The reconstructor cannot generate values from regexps. If you need to produce discarded
  52. regexes, such as newlines, use `term_subs` and provide default values for them.
  53. Paramters:
  54. parser: a Lark instance
  55. term_subs: a dictionary of [Terminal name as str] to [output text as str]
  56. """
  57. def __init__(self, parser, term_subs=None):
  58. TreeMatcher.__init__(self, parser)
  59. self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {})
  60. def _reconstruct(self, tree):
  61. unreduced_tree = self.match_tree(tree, tree.data)
  62. res = self.write_tokens.transform(unreduced_tree)
  63. for item in res:
  64. if isinstance(item, Tree):
  65. # TODO use orig_expansion.rulename to support templates
  66. for x in self._reconstruct(item):
  67. yield x
  68. else:
  69. yield item
  70. def reconstruct(self, tree, postproc=None, insert_spaces=True):
  71. x = self._reconstruct(tree)
  72. if postproc:
  73. x = postproc(x)
  74. y = []
  75. prev_item = ''
  76. for item in x:
  77. if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]):
  78. y.append(' ')
  79. y.append(item)
  80. prev_item = item
  81. return ''.join(y)