This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

122 lines
3.8 KiB

  1. from collections import defaultdict
  2. from .tree import Tree, Transformer_NoRecurse
  3. from .common import is_terminal, ParserConf, PatternStr
  4. from .lexer import Token
  5. from .parsers import earley, resolve_ambig
  6. from .grammar import Rule
  7. def is_discarded_terminal(t):
  8. return is_terminal(t) and t.startswith('_')
  9. def is_iter_empty(i):
  10. try:
  11. _ = next(i)
  12. return False
  13. except StopIteration:
  14. return True
  15. class WriteTokensTransformer(Transformer_NoRecurse):
  16. def __init__(self, tokens):
  17. self.tokens = tokens
  18. def __default__(self, t):
  19. if not isinstance(t, MatchTree):
  20. return t
  21. iter_args = iter(t.children)
  22. to_write = []
  23. for sym in t.orig_expansion:
  24. if is_discarded_terminal(sym):
  25. t = self.tokens[sym]
  26. assert isinstance(t.pattern, PatternStr)
  27. to_write.append(t.pattern.value)
  28. else:
  29. x = next(iter_args)
  30. if isinstance(x, list):
  31. to_write += x
  32. else:
  33. if isinstance(x, Token):
  34. assert x.type == sym, x
  35. else:
  36. assert x.data == sym, (sym, x)
  37. to_write.append(x)
  38. assert is_iter_empty(iter_args)
  39. return to_write
  40. class MatchTree(Tree):
  41. pass
  42. class MakeMatchTree:
  43. def __init__(self, name, expansion):
  44. self.name = name
  45. self.expansion = expansion
  46. def __call__(self, args):
  47. t = MatchTree(self.name, args)
  48. t.orig_expansion = self.expansion
  49. return t
  50. class Reconstructor:
  51. def __init__(self, parser):
  52. # Recreate the rules to assume a standard lexer
  53. _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever')
  54. expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1}
  55. d = defaultdict(list)
  56. for r in rules:
  57. # Rules can match their alias
  58. if r.alias:
  59. d[r.alias].append(r.expansion)
  60. d[r.origin].append([r.alias])
  61. else:
  62. d[r.origin].append(r.expansion)
  63. # Expanded rules can match their own terminal
  64. for sym in r.expansion:
  65. if sym in expand1s:
  66. d[sym].append([sym.upper()])
  67. reduced_rules = defaultdict(list)
  68. for name, expansions in d.items():
  69. for expansion in expansions:
  70. reduced = [sym if sym.startswith('_') or sym in expand1s else sym.upper()
  71. for sym in expansion if not is_discarded_terminal(sym)]
  72. reduced_rules[name, tuple(reduced)].append(expansion)
  73. self.rules = [Rule(name, list(reduced), MakeMatchTree(name, expansions[0]), None)
  74. for (name, reduced), expansions in reduced_rules.items()]
  75. self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens})
  76. def _match(self, term, token):
  77. if isinstance(token, Tree):
  78. return token.data.upper() == term
  79. elif isinstance(token, Token):
  80. return term == token.type
  81. assert False
  82. def _reconstruct(self, tree):
  83. # TODO: ambiguity?
  84. parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match, resolve_ambiguity=resolve_ambig.standard_resolve_ambig)
  85. unreduced_tree = parser.parse(tree.children) # find a full derivation
  86. assert unreduced_tree.data == tree.data
  87. res = self.write_tokens.transform(unreduced_tree)
  88. for item in res:
  89. if isinstance(item, Tree):
  90. for x in self._reconstruct(item):
  91. yield x
  92. else:
  93. yield item
  94. def reconstruct(self, tree):
  95. return ''.join(self._reconstruct(tree))