This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

107 rader
3.4 KiB

  1. import re
  2. from collections import defaultdict
  3. from .tree import Tree
  4. from .common import is_terminal, ParserConf, PatternStr, Terminal
  5. from .lexer import Token
  6. from .parsers import earley
  7. def is_discarded_terminal(t):
  8. return is_terminal(t) and t.startswith('_')
  9. def is_iter_empty(i):
  10. try:
  11. _ = next(i)
  12. return False
  13. except StopIteration:
  14. return True
  15. class Reconstructor:
  16. def __init__(self, parser):
  17. # Recreate the rules to assume a standard lexer
  18. _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever')
  19. tokens = {t.name:t for t in _tokens}
  20. token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in _tokens}
  21. class MatchTerminal(Terminal):
  22. def match(self, other):
  23. if isinstance(other, Tree):
  24. return False
  25. return token_res[self.data].match(other) is not None
  26. class MatchTree(Terminal):
  27. def match(self, other):
  28. try:
  29. return self.data == other.data
  30. except AttributeError:
  31. return False
  32. class WriteTokens:
  33. def __init__(self, name, expansion):
  34. self.name = name
  35. self.expansion = expansion
  36. def f(self, args):
  37. args2 = iter(args)
  38. to_write = []
  39. for sym in self.expansion:
  40. if is_discarded_terminal(sym):
  41. t = tokens[sym]
  42. assert isinstance(t.pattern, PatternStr)
  43. to_write.append(t.pattern.value)
  44. else:
  45. x = next(args2)
  46. if isinstance(x, list):
  47. to_write += x
  48. else:
  49. if isinstance(x, Token):
  50. assert x.type == sym, x
  51. else:
  52. assert x.data == sym, x
  53. to_write.append(x)
  54. assert is_iter_empty(args2)
  55. return to_write
  56. d = defaultdict(list)
  57. for name, (expansions, _o) in rules.items():
  58. for expansion, alias in expansions:
  59. if alias:
  60. d[alias].append(expansion)
  61. d[name].append([alias])
  62. else:
  63. d[name].append(expansion)
  64. rules = []
  65. expand1s = {name for name, (_x, options) in parser.rules.items()
  66. if options and options.expand1}
  67. for name, expansions in d.items():
  68. for expansion in expansions:
  69. reduced = [sym if sym.startswith('_') or sym in expand1s else
  70. MatchTerminal(sym) if is_terminal(sym) else MatchTree(sym)
  71. for sym in expansion if not is_discarded_terminal(sym)]
  72. rules.append((name, reduced, WriteTokens(name, expansion).f, None))
  73. self.rules = rules
  74. def _reconstruct(self, tree):
  75. # TODO: ambiguity?
  76. parser = earley.Parser(self.rules, tree.data, {})
  77. res = parser.parse(tree.children)
  78. for item in res:
  79. if isinstance(item, Tree):
  80. for x in self._reconstruct(item):
  81. yield x
  82. else:
  83. yield item
  84. def reconstruct(self, tree):
  85. return ''.join(self._reconstruct(tree))