This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

109 рядки
3.7 KiB

  1. """Reconstruct text from a tree, based on Lark grammar"""
  2. from typing import List, Dict, Union, Callable, Iterable, Optional
  3. import unicodedata
  4. from .lark import Lark
  5. from .tree import Tree
  6. from .visitors import Transformer_InPlace
  7. from .lexer import Token, PatternStr, TerminalDef
  8. from .grammar import Terminal, NonTerminal, Symbol
  9. from .tree_matcher import TreeMatcher, is_discarded_terminal
  10. from .utils import is_id_continue
  11. def is_iter_empty(i):
  12. try:
  13. _ = next(i)
  14. return False
  15. except StopIteration:
  16. return True
  17. class WriteTokensTransformer(Transformer_InPlace):
  18. "Inserts discarded tokens into their correct place, according to the rules of grammar"
  19. tokens: Dict[str, TerminalDef]
  20. term_subs: Dict[str, Callable[[Symbol], str]]
  21. def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None:
  22. self.tokens = tokens
  23. self.term_subs = term_subs
  24. def __default__(self, data, children, meta):
  25. if not getattr(meta, 'match_tree', False):
  26. return Tree(data, children)
  27. iter_args = iter(children)
  28. to_write = []
  29. for sym in meta.orig_expansion:
  30. if is_discarded_terminal(sym):
  31. try:
  32. v = self.term_subs[sym.name](sym)
  33. except KeyError:
  34. t = self.tokens[sym.name]
  35. if not isinstance(t.pattern, PatternStr):
  36. raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
  37. v = t.pattern.value
  38. to_write.append(v)
  39. else:
  40. x = next(iter_args)
  41. if isinstance(x, list):
  42. to_write += x
  43. else:
  44. if isinstance(x, Token):
  45. assert Terminal(x.type) == sym, x
  46. else:
  47. assert NonTerminal(x.data) == sym, (sym, x)
  48. to_write.append(x)
  49. assert is_iter_empty(iter_args)
  50. return to_write
  51. class Reconstructor(TreeMatcher):
  52. """
  53. A Reconstructor that will, given a full parse Tree, generate source code.
  54. Note:
  55. The reconstructor cannot generate values from regexps. If you need to produce discarded
  56. regexes, such as newlines, use `term_subs` and provide default values for them.
  57. Paramters:
  58. parser: a Lark instance
  59. term_subs: a dictionary of [Terminal name as str] to [output text as str]
  60. """
  61. write_tokens: WriteTokensTransformer
  62. def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None:
  63. TreeMatcher.__init__(self, parser)
  64. self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {})
  65. def _reconstruct(self, tree):
  66. unreduced_tree = self.match_tree(tree, tree.data)
  67. res = self.write_tokens.transform(unreduced_tree)
  68. for item in res:
  69. if isinstance(item, Tree):
  70. # TODO use orig_expansion.rulename to support templates
  71. for x in self._reconstruct(item):
  72. yield x
  73. else:
  74. yield item
  75. def reconstruct(self, tree: Tree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str:
  76. x = self._reconstruct(tree)
  77. if postproc:
  78. x = postproc(x)
  79. y = []
  80. prev_item = ''
  81. for item in x:
  82. if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]):
  83. y.append(' ')
  84. y.append(item)
  85. prev_item = item
  86. return ''.join(y)