This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 

114 líneas
3.6 KiB

  1. import re
  2. from collections import defaultdict
  3. from .tree import Tree
  4. from .common import is_terminal, ParserConf, PatternStr
  5. from .lexer import Token
  6. from .parsers import earley
  7. def is_discarded_terminal(t):
  8. return is_terminal(t) and t.startswith('_')
  9. def is_iter_empty(i):
  10. try:
  11. _ = next(i)
  12. return False
  13. except StopIteration:
  14. return True
  15. class Reconstructor:
  16. def __init__(self, parser):
  17. # Recreate the rules to assume a standard lexer
  18. _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever')
  19. tokens = {t.name:t for t in _tokens}
  20. token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in _tokens}
  21. class MatchData(object):
  22. def __init__(self, data):
  23. self.data = data
  24. def __repr__(self):
  25. return '%s(%r)' % (type(self).__name__, self.data)
  26. class MatchTerminal(MatchData):
  27. def __call__(self, other):
  28. if isinstance(other, Tree):
  29. return False
  30. return token_res[self.data].match(other) is not None
  31. class MatchTree(MatchData):
  32. def __call__(self, other):
  33. try:
  34. return self.data == other.data
  35. except AttributeError:
  36. return False
  37. class WriteTokens:
  38. def __init__(self, name, expansion):
  39. self.name = name
  40. self.expansion = expansion
  41. def f(self, args):
  42. args2 = iter(args)
  43. to_write = []
  44. for sym in self.expansion:
  45. if is_discarded_terminal(sym):
  46. t = tokens[sym]
  47. assert isinstance(t.pattern, PatternStr)
  48. to_write.append(t.pattern.value)
  49. else:
  50. x = next(args2)
  51. if isinstance(x, list):
  52. to_write += x
  53. else:
  54. if isinstance(x, Token):
  55. assert x.type == sym, x
  56. else:
  57. assert x.data == sym, x
  58. to_write.append(x)
  59. assert is_iter_empty(args2)
  60. return to_write
  61. d = defaultdict(list)
  62. for name, (expansions, _o) in rules.items():
  63. for expansion, alias in expansions:
  64. if alias:
  65. d[alias].append(expansion)
  66. d[name].append([alias])
  67. else:
  68. d[name].append(expansion)
  69. rules = []
  70. expand1s = {name for name, (_x, options) in parser.rules.items()
  71. if options and options.expand1}
  72. for name, expansions in d.items():
  73. for expansion in expansions:
  74. reduced = [sym if sym.startswith('_') or sym in expand1s else
  75. (MatchTerminal(sym) if is_terminal(sym) else MatchTree(sym),)
  76. for sym in expansion if not is_discarded_terminal(sym)]
  77. rules.append((name, reduced, WriteTokens(name, expansion).f))
  78. self.rules = rules
  79. def _reconstruct(self, tree):
  80. parser = earley.Parser(ParserConf(self.rules, {}, tree.data))
  81. res ,= parser.parse(tree.children) # XXX ambiguity?
  82. for item in res:
  83. if isinstance(item, Tree):
  84. for x in self._reconstruct(item):
  85. yield x
  86. else:
  87. yield item
  88. def reconstruct(self, tree):
  89. return ''.join(self._reconstruct(tree))