This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

114 rivejä
3.6 KiB

  1. import re
  2. from collections import defaultdict
  3. from .tree import Tree
  4. from .common import is_terminal, ParserConf, PatternStr
  5. from .lexer import Token
  6. from .parsers import earley
  7. def is_discarded_terminal(t):
  8. return is_terminal(t) and t.startswith('_')
  9. def is_iter_empty(i):
  10. try:
  11. _ = next(i)
  12. return False
  13. except StopIteration:
  14. return True
  15. class Reconstructor:
  16. def __init__(self, parser):
  17. # Recreate the rules to assume a standard lexer
  18. _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever')
  19. tokens = {t.name:t for t in _tokens}
  20. token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in _tokens}
  21. class MatchData(object):
  22. def __init__(self, data):
  23. self.data = data
  24. def __repr__(self):
  25. return '%s(%r)' % (type(self).__name__, self.data)
  26. class MatchTerminal(MatchData):
  27. def __call__(self, other):
  28. if isinstance(other, Tree):
  29. return False
  30. return token_res[self.data].match(other) is not None
  31. class MatchTree(MatchData):
  32. def __call__(self, other):
  33. try:
  34. return self.data == other.data
  35. except AttributeError:
  36. return False
  37. class WriteTokens:
  38. def __init__(self, name, expansion):
  39. self.name = name
  40. self.expansion = expansion
  41. def f(self, args):
  42. args2 = iter(args)
  43. to_write = []
  44. for sym in self.expansion:
  45. if is_discarded_terminal(sym):
  46. t = tokens[sym]
  47. assert isinstance(t.pattern, PatternStr)
  48. to_write.append(t.pattern.value)
  49. else:
  50. x = next(args2)
  51. if isinstance(x, list):
  52. to_write += x
  53. else:
  54. if isinstance(x, Token):
  55. assert x.type == sym, x
  56. else:
  57. assert x.data == sym, x
  58. to_write.append(x)
  59. assert is_iter_empty(args2)
  60. return to_write
  61. d = defaultdict(list)
  62. for name, (expansions, _o) in rules.items():
  63. for expansion, alias in expansions:
  64. if alias:
  65. d[alias].append(expansion)
  66. d[name].append([alias])
  67. else:
  68. d[name].append(expansion)
  69. rules = []
  70. expand1s = {name for name, (_x, options) in parser.rules.items()
  71. if options and options.expand1}
  72. for name, expansions in d.items():
  73. for expansion in expansions:
  74. reduced = [sym if sym.startswith('_') or sym in expand1s else
  75. (MatchTerminal(sym) if is_terminal(sym) else MatchTree(sym),)
  76. for sym in expansion if not is_discarded_terminal(sym)]
  77. rules.append((name, reduced, WriteTokens(name, expansion).f))
  78. self.rules = rules
  79. def _reconstruct(self, tree):
  80. parser = earley.Parser(ParserConf(self.rules, {}, tree.data))
  81. res ,= parser.parse(tree.children) # XXX ambiguity?
  82. for item in res:
  83. if isinstance(item, Tree):
  84. for x in self._reconstruct(item):
  85. yield x
  86. else:
  87. yield item
  88. def reconstruct(self, tree):
  89. return ''.join(self._reconstruct(tree))