This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

106 lines
3.3 KiB

  1. import re
  2. from collections import defaultdict
  3. from .tree import Tree
  4. from .common import is_terminal, ParserConf, PatternStr
  5. from .lexer import Token
  6. from .parsers import earley
  7. def is_discarded_terminal(t):
  8. return is_terminal(t) and t.startswith('_')
  9. def is_iter_empty(i):
  10. try:
  11. _ = next(i)
  12. return False
  13. except StopIteration:
  14. return True
  15. class Reconstructor:
  16. def __init__(self, parser):
  17. tokens = {t.name:t for t in parser.lexer_conf.tokens}
  18. token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in parser.lexer_conf.tokens}
  19. class MatchData:
  20. def __init__(self, data):
  21. self.data = data
  22. class MatchTerminal(MatchData):
  23. def __call__(self, other):
  24. return token_res[self.data].match(other) is not None
  25. class MatchTree(MatchData):
  26. def __call__(self, other):
  27. try:
  28. return self.data == other.data
  29. except AttributeError:
  30. return False
  31. class WriteTokens:
  32. def __init__(self, name, expansion):
  33. self.name = name
  34. self.expansion = expansion
  35. def f(self, args):
  36. args2 = iter(args)
  37. to_write = []
  38. for sym in self.expansion:
  39. if is_discarded_terminal(sym):
  40. t = tokens[sym]
  41. assert isinstance(t.pattern, PatternStr)
  42. to_write.append(t.pattern.value)
  43. else:
  44. x = next(args2)
  45. if isinstance(x, list):
  46. to_write += x
  47. else:
  48. if isinstance(x, Token):
  49. assert x.type == sym, x
  50. else:
  51. assert x.data == sym, x
  52. to_write.append(x)
  53. assert is_iter_empty(args2)
  54. return to_write
  55. d = defaultdict(list)
  56. for name, (expansions, _o) in parser.rules.items():
  57. for expansion, alias in expansions:
  58. if alias:
  59. d[alias].append(expansion)
  60. d[name].append([alias])
  61. else:
  62. d[name].append(expansion)
  63. rules = []
  64. expand1s = {name for name, (_x, options) in parser.rules.items()
  65. if options and options.expand1}
  66. for name, expansions in d.items():
  67. for expansion in expansions:
  68. reduced = [sym if sym.startswith('_') or sym in expand1s else
  69. (MatchTerminal(sym) if is_terminal(sym) else MatchTree(sym),)
  70. for sym in expansion if not is_discarded_terminal(sym)]
  71. rules.append((name, reduced, WriteTokens(name, expansion).f))
  72. self.rules = rules
  73. def _reconstruct(self, tree):
  74. parser = earley.Parser(ParserConf(self.rules, {}, tree.data))
  75. res ,= parser.parse(tree.children) # XXX ambiguity?
  76. for item in res:
  77. if isinstance(item, Tree):
  78. for x in self._reconstruct(item):
  79. yield x
  80. else:
  81. yield item
  82. def reconstruct(self, tree):
  83. return ''.join(self._reconstruct(tree))