This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

127 lines
4.0 KiB

  1. from collections import defaultdict
  2. from .tree import Tree
  3. from .visitors import Transformer_InPlace
  4. from .common import ParserConf, PatternStr
  5. from .lexer import Token
  6. from .parsers import earley, resolve_ambig
  7. from .grammar import Rule, Terminal, NonTerminal
  8. def is_discarded_terminal(t):
  9. return t.is_term and t.filter_out
  10. def is_iter_empty(i):
  11. try:
  12. _ = next(i)
  13. return False
  14. except StopIteration:
  15. return True
  16. class WriteTokensTransformer(Transformer_InPlace):
  17. def __init__(self, tokens):
  18. self.tokens = tokens
  19. def __default__(self, data, children, meta):
  20. # if not isinstance(t, MatchTree):
  21. # return t
  22. if not getattr(meta, 'match_tree', False):
  23. return Tree(data, children)
  24. iter_args = iter(children)
  25. to_write = []
  26. for sym in meta.orig_expansion:
  27. if is_discarded_terminal(sym):
  28. t = self.tokens[sym.name]
  29. assert isinstance(t.pattern, PatternStr)
  30. to_write.append(t.pattern.value)
  31. else:
  32. x = next(iter_args)
  33. if isinstance(x, list):
  34. to_write += x
  35. else:
  36. if isinstance(x, Token):
  37. assert Terminal(x.type) == sym, x
  38. else:
  39. assert NonTerminal(x.data) == sym, (sym, x)
  40. to_write.append(x)
  41. assert is_iter_empty(iter_args)
  42. return to_write
  43. class MatchTree(Tree):
  44. pass
  45. class MakeMatchTree:
  46. def __init__(self, name, expansion):
  47. self.name = name
  48. self.expansion = expansion
  49. def __call__(self, args):
  50. t = MatchTree(self.name, args)
  51. t.meta.match_tree = True
  52. t.meta.orig_expansion = self.expansion
  53. return t
  54. class Reconstructor:
  55. def __init__(self, parser):
  56. # Recreate the rules to assume a standard lexer
  57. _tokens, rules, _grammar_extra = parser.grammar.compile()
  58. expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1}
  59. d = defaultdict(list)
  60. for r in rules:
  61. # Rules can match their alias
  62. if r.alias:
  63. alias = NonTerminal(r.alias)
  64. d[alias].append(r.expansion)
  65. d[r.origin].append([alias])
  66. else:
  67. d[r.origin].append(r.expansion)
  68. # Expanded rules can match their own terminal
  69. for sym in r.expansion:
  70. if sym in expand1s:
  71. d[sym].append([Terminal(sym.name)])
  72. reduced_rules = defaultdict(list)
  73. for name, expansions in d.items():
  74. for expansion in expansions:
  75. reduced = [sym if sym.name.startswith('_') or sym in expand1s else Terminal(sym.name)
  76. for sym in expansion if not is_discarded_terminal(sym)]
  77. reduced_rules[name, tuple(reduced)].append(expansion)
  78. self.rules = [Rule(name, list(reduced), MakeMatchTree(name.name, expansions[0]), None)
  79. for (name, reduced), expansions in reduced_rules.items()]
  80. self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens})
  81. def _match(self, term, token):
  82. if isinstance(token, Tree):
  83. return Terminal(token.data) == term
  84. elif isinstance(token, Token):
  85. return term == Terminal(token.type)
  86. assert False
  87. def _reconstruct(self, tree):
  88. # TODO: ambiguity?
  89. parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match, resolve_ambiguity=resolve_ambig.standard_resolve_ambig)
  90. unreduced_tree = parser.parse(tree.children) # find a full derivation
  91. assert unreduced_tree.data == tree.data
  92. res = self.write_tokens.transform(unreduced_tree)
  93. for item in res:
  94. if isinstance(item, Tree):
  95. for x in self._reconstruct(item):
  96. yield x
  97. else:
  98. yield item
  99. def reconstruct(self, tree):
  100. return ''.join(self._reconstruct(tree))