This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

110 lines
3.4 KiB

  1. import re
  2. from collections import defaultdict
  3. from .tree import Tree
  4. from .common import is_terminal
  5. from .lexer import Token, TokenDef__Str
  6. from .parsers import earley
  7. from .lark import Lark
  8. def is_discarded_terminal(t):
  9. return is_terminal(t) and t.startswith('_')
  10. def is_iter_empty(i):
  11. try:
  12. _ = next(i)
  13. return False
  14. except StopIteration:
  15. return True
  16. class Reconstructor:
  17. def __init__(self, parser):
  18. tokens = {t.name:t for t in parser.lexer_conf.tokens}
  19. token_res = {t.name:re.compile(t.to_regexp()) for t in parser.lexer_conf.tokens}
  20. class MatchData:
  21. def __init__(self, data):
  22. self.data = data
  23. class MatchTerminal(MatchData):
  24. def match(self, other):
  25. return token_res[self.data].match(other) is not None
  26. class MatchTree(MatchData):
  27. def match(self, other):
  28. return self.data == other.data
  29. class WriteTokens:
  30. def __init__(self, name, expansion):
  31. self.name = name
  32. self.expansion = expansion
  33. def f(self, args):
  34. args2 = iter(args)
  35. to_write = []
  36. for sym in self.expansion:
  37. if is_discarded_terminal(sym):
  38. t = tokens[sym]
  39. assert isinstance(t, TokenDef__Str)
  40. to_write.append(t.value)
  41. else:
  42. x = next(args2)
  43. if isinstance(x, list):
  44. to_write += x
  45. else:
  46. if isinstance(x, Token):
  47. assert x.type == sym, x
  48. else:
  49. assert x.data == sym, x
  50. to_write.append(x)
  51. assert is_iter_empty(args2)
  52. return to_write
  53. d = defaultdict(list)
  54. for name, expansions in parser.rules.items():
  55. for expansion, alias in expansions:
  56. if alias:
  57. d[alias].append(expansion)
  58. d[name].append([alias])
  59. else:
  60. d[name].append(expansion)
  61. rules = []
  62. expand1s = {name.lstrip('!').lstrip('?') for name in d
  63. if name.startswith(('?', '!?'))} # XXX Ugly code
  64. for name, expansions in d.items():
  65. for expansion in expansions:
  66. reduced = [sym if sym.startswith('_') or sym in expand1s else
  67. (sym, MatchTerminal(sym) if is_terminal(sym) else MatchTree(sym))
  68. for sym in expansion if not is_discarded_terminal(sym)]
  69. name = name.lstrip('!').lstrip('?')
  70. rules.append({'name': name,
  71. 'symbols': reduced,
  72. 'postprocess': WriteTokens(name, expansion).f
  73. })
  74. self.rules = rules
  75. def _reconstruct(self, tree):
  76. parser = earley.Parser(self.rules, tree.data)
  77. res ,= parser.parse(tree.children) # XXX ambiguity?
  78. for item in res:
  79. if isinstance(item, Tree):
  80. for x in self._reconstruct(item):
  81. yield x
  82. else:
  83. yield item
  84. def reconstruct(self, tree):
  85. return ''.join(self._reconstruct(tree))