This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

129 lines
4.3 KiB

  1. from collections import defaultdict
  2. from .tree import Tree
  3. from .visitors import Transformer_InPlace
  4. from .common import ParserConf
  5. from .lexer import Token, PatternStr
  6. from .parsers import earley
  7. from .grammar import Rule, Terminal, NonTerminal
  8. def is_discarded_terminal(t):
  9. return t.is_term and t.filter_out
  10. def is_iter_empty(i):
  11. try:
  12. _ = next(i)
  13. return False
  14. except StopIteration:
  15. return True
  16. class WriteTokensTransformer(Transformer_InPlace):
  17. def __init__(self, tokens):
  18. self.tokens = tokens
  19. def __default__(self, data, children, meta):
  20. # if not isinstance(t, MatchTree):
  21. # return t
  22. if not getattr(meta, 'match_tree', False):
  23. return Tree(data, children)
  24. iter_args = iter(children)
  25. to_write = []
  26. for sym in meta.orig_expansion:
  27. if is_discarded_terminal(sym):
  28. t = self.tokens[sym.name]
  29. if not isinstance(t.pattern, PatternStr):
  30. raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
  31. to_write.append(t.pattern.value)
  32. else:
  33. x = next(iter_args)
  34. if isinstance(x, list):
  35. to_write += x
  36. else:
  37. if isinstance(x, Token):
  38. assert Terminal(x.type) == sym, x
  39. else:
  40. assert NonTerminal(x.data) == sym, (sym, x)
  41. to_write.append(x)
  42. assert is_iter_empty(iter_args)
  43. return to_write
  44. class MatchTree(Tree):
  45. pass
  46. class MakeMatchTree:
  47. def __init__(self, name, expansion):
  48. self.name = name
  49. self.expansion = expansion
  50. def __call__(self, args):
  51. t = MatchTree(self.name, args)
  52. t.meta.match_tree = True
  53. t.meta.orig_expansion = self.expansion
  54. return t
  55. class Reconstructor:
  56. def __init__(self, parser):
  57. # XXX TODO calling compile twice returns different results!
  58. tokens, rules, _grammar_extra = parser.grammar.compile()
  59. self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens})
  60. self.rules = list(self._build_recons_rules(rules))
  61. def _build_recons_rules(self, rules):
  62. expand1s = {r.origin for r in rules if r.options and r.options.expand1}
  63. aliases = defaultdict(list)
  64. for r in rules:
  65. if r.alias:
  66. aliases[r.origin].append( r.alias )
  67. rule_names = {r.origin for r in rules}
  68. nonterminals = {sym for sym in rule_names
  69. if sym.name.startswith('_') or sym in expand1s or sym in aliases }
  70. for r in rules:
  71. recons_exp = [sym if sym in nonterminals else Terminal(sym.name)
  72. for sym in r.expansion if not is_discarded_terminal(sym)]
  73. # Skip self-recursive constructs
  74. if recons_exp == [r.origin]:
  75. continue
  76. sym = NonTerminal(r.alias) if r.alias else r.origin
  77. yield Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion))
  78. for origin, rule_aliases in aliases.items():
  79. for alias in rule_aliases:
  80. yield Rule(origin, [Terminal(alias)], alias=MakeMatchTree(origin.name, [NonTerminal(alias)]))
  81. yield Rule(origin, [Terminal(origin.name)], alias=MakeMatchTree(origin.name, [origin]))
  82. def _match(self, term, token):
  83. if isinstance(token, Tree):
  84. return Terminal(token.data) == term
  85. elif isinstance(token, Token):
  86. return term == Terminal(token.type)
  87. assert False
  88. def _reconstruct(self, tree):
  89. # TODO: ambiguity?
  90. callbacks = {rule: rule.alias for rule in self.rules} # TODO pass callbacks through dict, instead of alias?
  91. parser = earley.Parser(ParserConf(self.rules, callbacks, tree.data), self._match, resolve_ambiguity=True)
  92. unreduced_tree = parser.parse(tree.children) # find a full derivation
  93. assert unreduced_tree.data == tree.data
  94. res = self.write_tokens.transform(unreduced_tree)
  95. for item in res:
  96. if isinstance(item, Tree):
  97. for x in self._reconstruct(item):
  98. yield x
  99. else:
  100. yield item
  101. def reconstruct(self, tree):
  102. return ''.join(self._reconstruct(tree))