This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 

122 líneas
3.8 KiB

  1. from collections import defaultdict
  2. from .tree import Tree, Transformer_NoRecurse
  3. from .common import is_terminal, ParserConf, PatternStr
  4. from .lexer import Token
  5. from .parsers import earley, resolve_ambig
  6. from .grammar import Rule
  7. def is_discarded_terminal(t):
  8. return is_terminal(t) and t.startswith('_')
  9. def is_iter_empty(i):
  10. try:
  11. _ = next(i)
  12. return False
  13. except StopIteration:
  14. return True
  15. class WriteTokensTransformer(Transformer_NoRecurse):
  16. def __init__(self, tokens):
  17. self.tokens = tokens
  18. def __default__(self, t):
  19. if not isinstance(t, MatchTree):
  20. return t
  21. iter_args = iter(t.children)
  22. to_write = []
  23. for sym in t.orig_expansion:
  24. if is_discarded_terminal(sym):
  25. t = self.tokens[sym]
  26. assert isinstance(t.pattern, PatternStr)
  27. to_write.append(t.pattern.value)
  28. else:
  29. x = next(iter_args)
  30. if isinstance(x, list):
  31. to_write += x
  32. else:
  33. if isinstance(x, Token):
  34. assert x.type == sym, x
  35. else:
  36. assert x.data == sym, (sym, x)
  37. to_write.append(x)
  38. assert is_iter_empty(iter_args)
  39. return to_write
  40. class MatchTree(Tree):
  41. pass
  42. class MakeMatchTree:
  43. def __init__(self, name, expansion):
  44. self.name = name
  45. self.expansion = expansion
  46. def __call__(self, args):
  47. t = MatchTree(self.name, args)
  48. t.orig_expansion = self.expansion
  49. return t
  50. class Reconstructor:
  51. def __init__(self, parser):
  52. # Recreate the rules to assume a standard lexer
  53. _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever')
  54. expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1}
  55. d = defaultdict(list)
  56. for r in rules:
  57. # Rules can match their alias
  58. if r.alias:
  59. d[r.alias].append(r.expansion)
  60. d[r.origin].append([r.alias])
  61. else:
  62. d[r.origin].append(r.expansion)
  63. # Expanded rules can match their own terminal
  64. for sym in r.expansion:
  65. if sym in expand1s:
  66. d[sym].append([sym.upper()])
  67. reduced_rules = defaultdict(list)
  68. for name, expansions in d.items():
  69. for expansion in expansions:
  70. reduced = [sym if sym.startswith('_') or sym in expand1s else sym.upper()
  71. for sym in expansion if not is_discarded_terminal(sym)]
  72. reduced_rules[name, tuple(reduced)].append(expansion)
  73. self.rules = [Rule(name, list(reduced), MakeMatchTree(name, expansions[0]), None)
  74. for (name, reduced), expansions in reduced_rules.items()]
  75. self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens})
  76. def _match(self, term, token):
  77. if isinstance(token, Tree):
  78. return token.data.upper() == term
  79. elif isinstance(token, Token):
  80. return term == token.type
  81. assert False
  82. def _reconstruct(self, tree):
  83. # TODO: ambiguity?
  84. parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match, resolve_ambiguity=resolve_ambig.standard_resolve_ambig)
  85. unreduced_tree = parser.parse(tree.children) # find a full derivation
  86. assert unreduced_tree.data == tree.data
  87. res = self.write_tokens.transform(unreduced_tree)
  88. for item in res:
  89. if isinstance(item, Tree):
  90. for x in self._reconstruct(item):
  91. yield x
  92. else:
  93. yield item
  94. def reconstruct(self, tree):
  95. return ''.join(self._reconstruct(tree))