This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

110 lignes
3.4 KiB

  1. import re
  2. from collections import defaultdict
  3. from .tree import Tree
  4. from .common import is_terminal
  5. from .lexer import Token, TokenDef__Str
  6. from .parsers import earley
  7. from .lark import Lark
  8. def is_discarded_terminal(t):
  9. return is_terminal(t) and t.startswith('_')
  10. def is_iter_empty(i):
  11. try:
  12. _ = next(i)
  13. return False
  14. except StopIteration:
  15. return True
  16. class Reconstructor:
  17. def __init__(self, parser):
  18. tokens = {t.name:t for t in parser.lexer_conf.tokens}
  19. token_res = {t.name:re.compile(t.to_regexp()) for t in parser.lexer_conf.tokens}
  20. class MatchData:
  21. def __init__(self, data):
  22. self.data = data
  23. class MatchTerminal(MatchData):
  24. def match(self, other):
  25. return token_res[self.data].match(other) is not None
  26. class MatchTree(MatchData):
  27. def match(self, other):
  28. return self.data == other.data
  29. class WriteTokens:
  30. def __init__(self, name, expansion):
  31. self.name = name
  32. self.expansion = expansion
  33. def f(self, args):
  34. args2 = iter(args)
  35. to_write = []
  36. for sym in self.expansion:
  37. if is_discarded_terminal(sym):
  38. t = tokens[sym]
  39. assert isinstance(t, TokenDef__Str)
  40. to_write.append(t.value)
  41. else:
  42. x = next(args2)
  43. if isinstance(x, list):
  44. to_write += x
  45. else:
  46. if isinstance(x, Token):
  47. assert x.type == sym, x
  48. else:
  49. assert x.data == sym, x
  50. to_write.append(x)
  51. assert is_iter_empty(args2)
  52. return to_write
  53. d = defaultdict(list)
  54. for name, expansions in parser.rules.items():
  55. for expansion, alias in expansions:
  56. if alias:
  57. d[alias].append(expansion)
  58. d[name].append([alias])
  59. else:
  60. d[name].append(expansion)
  61. rules = []
  62. expand1s = {name.lstrip('!').lstrip('?') for name in d
  63. if name.startswith(('?', '!?'))} # XXX Ugly code
  64. for name, expansions in d.items():
  65. for expansion in expansions:
  66. reduced = [sym if sym.startswith('_') or sym in expand1s else
  67. (sym, MatchTerminal(sym) if is_terminal(sym) else MatchTree(sym))
  68. for sym in expansion if not is_discarded_terminal(sym)]
  69. name = name.lstrip('!').lstrip('?')
  70. rules.append({'name': name,
  71. 'symbols': reduced,
  72. 'postprocess': WriteTokens(name, expansion).f
  73. })
  74. self.rules = rules
  75. def _reconstruct(self, tree):
  76. parser = earley.Parser(self.rules, tree.data)
  77. res ,= parser.parse(tree.children) # XXX ambiguity?
  78. for item in res:
  79. if isinstance(item, Tree):
  80. for x in self._reconstruct(item):
  81. yield x
  82. else:
  83. yield item
  84. def reconstruct(self, tree):
  85. return ''.join(self._reconstruct(tree))