This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

122 linhas
3.8 KiB

  1. from collections import defaultdict
  2. from .tree import Tree, Transformer_NoRecurse
  3. from .common import is_terminal, ParserConf, PatternStr
  4. from .lexer import Token
  5. from .parsers import earley, resolve_ambig
  6. from .grammar import Rule
  7. def is_discarded_terminal(t):
  8. return is_terminal(t) and t.startswith('_')
  9. def is_iter_empty(i):
  10. try:
  11. _ = next(i)
  12. return False
  13. except StopIteration:
  14. return True
  15. class WriteTokensTransformer(Transformer_NoRecurse):
  16. def __init__(self, tokens):
  17. self.tokens = tokens
  18. def __default__(self, t):
  19. if not isinstance(t, MatchTree):
  20. return t
  21. iter_args = iter(t.children)
  22. to_write = []
  23. for sym in t.orig_expansion:
  24. if is_discarded_terminal(sym):
  25. t = self.tokens[sym]
  26. assert isinstance(t.pattern, PatternStr)
  27. to_write.append(t.pattern.value)
  28. else:
  29. x = next(iter_args)
  30. if isinstance(x, list):
  31. to_write += x
  32. else:
  33. if isinstance(x, Token):
  34. assert x.type == sym, x
  35. else:
  36. assert x.data == sym, (sym, x)
  37. to_write.append(x)
  38. assert is_iter_empty(iter_args)
  39. return to_write
  40. class MatchTree(Tree):
  41. pass
  42. class MakeMatchTree:
  43. def __init__(self, name, expansion):
  44. self.name = name
  45. self.expansion = expansion
  46. def __call__(self, args):
  47. t = MatchTree(self.name, args)
  48. t.orig_expansion = self.expansion
  49. return t
  50. class Reconstructor:
  51. def __init__(self, parser):
  52. # Recreate the rules to assume a standard lexer
  53. _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever')
  54. expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1}
  55. d = defaultdict(list)
  56. for r in rules:
  57. # Rules can match their alias
  58. if r.alias:
  59. d[r.alias].append(r.expansion)
  60. d[r.origin].append([r.alias])
  61. else:
  62. d[r.origin].append(r.expansion)
  63. # Expanded rules can match their own terminal
  64. for sym in r.expansion:
  65. if sym in expand1s:
  66. d[sym].append([sym.upper()])
  67. reduced_rules = defaultdict(list)
  68. for name, expansions in d.items():
  69. for expansion in expansions:
  70. reduced = [sym if sym.startswith('_') or sym in expand1s else sym.upper()
  71. for sym in expansion if not is_discarded_terminal(sym)]
  72. reduced_rules[name, tuple(reduced)].append(expansion)
  73. self.rules = [Rule(name, list(reduced), MakeMatchTree(name, expansions[0]), None)
  74. for (name, reduced), expansions in reduced_rules.items()]
  75. self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens})
  76. def _match(self, term, token):
  77. if isinstance(token, Tree):
  78. return token.data.upper() == term
  79. elif isinstance(token, Token):
  80. return term == token.type
  81. assert False
  82. def _reconstruct(self, tree):
  83. # TODO: ambiguity?
  84. parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match, resolve_ambiguity=resolve_ambig.standard_resolve_ambig)
  85. unreduced_tree = parser.parse(tree.children) # find a full derivation
  86. assert unreduced_tree.data == tree.data
  87. res = self.write_tokens.transform(unreduced_tree)
  88. for item in res:
  89. if isinstance(item, Tree):
  90. for x in self._reconstruct(item):
  91. yield x
  92. else:
  93. yield item
  94. def reconstruct(self, tree):
  95. return ''.join(self._reconstruct(tree))