This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

150 рядки
4.6 KiB

  1. from ..utils import bfs, fzset, classify
  2. from ..common import GrammarError
  3. from ..grammar import Rule, Terminal, NonTerminal
  4. class RulePtr(object):
  5. __slots__ = ('rule', 'index')
  6. def __init__(self, rule, index):
  7. assert isinstance(rule, Rule)
  8. assert index <= len(rule.expansion)
  9. self.rule = rule
  10. self.index = index
  11. def __repr__(self):
  12. before = self.rule.expansion[:self.index]
  13. after = self.rule.expansion[self.index:]
  14. return '<%s : %s * %s>' % (self.rule.origin, ' '.join(before), ' '.join(after))
  15. @property
  16. def next(self):
  17. return self.rule.expansion[self.index]
  18. def advance(self, sym):
  19. assert self.next == sym
  20. return RulePtr(self.rule, self.index+1)
  21. @property
  22. def is_satisfied(self):
  23. return self.index == len(self.rule.expansion)
  24. def __eq__(self, other):
  25. return self.rule == other.rule and self.index == other.index
  26. def __hash__(self):
  27. return hash((self.rule, self.index))
  28. def update_set(set1, set2):
  29. if not set2:
  30. return False
  31. copy = set(set1)
  32. set1 |= set2
  33. return set1 != copy
  34. def calculate_sets(rules):
  35. """Calculate FOLLOW sets.
  36. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
  37. symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
  38. # foreach grammar rule X ::= Y(1) ... Y(k)
  39. # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
  40. # NULLABLE = NULLABLE union {X}
  41. # for i = 1 to k
  42. # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
  43. # FIRST(X) = FIRST(X) union FIRST(Y(i))
  44. # for j = i+1 to k
  45. # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
  46. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
  47. # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
  48. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
  49. # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
  50. NULLABLE = set()
  51. FIRST = {}
  52. FOLLOW = {}
  53. for sym in symbols:
  54. FIRST[sym]={sym} if sym.is_term else set()
  55. FOLLOW[sym]=set()
  56. # Calculate NULLABLE and FIRST
  57. changed = True
  58. while changed:
  59. changed = False
  60. for rule in rules:
  61. if set(rule.expansion) <= NULLABLE:
  62. if update_set(NULLABLE, {rule.origin}):
  63. changed = True
  64. for i, sym in enumerate(rule.expansion):
  65. if set(rule.expansion[:i]) <= NULLABLE:
  66. if update_set(FIRST[rule.origin], FIRST[sym]):
  67. changed = True
  68. # Calculate FOLLOW
  69. changed = True
  70. while changed:
  71. changed = False
  72. for rule in rules:
  73. for i, sym in enumerate(rule.expansion):
  74. if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE:
  75. if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
  76. changed = True
  77. for j in range(i+1, len(rule.expansion)):
  78. if set(rule.expansion[i+1:j]) <= NULLABLE:
  79. if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
  80. changed = True
  81. return FIRST, FOLLOW, NULLABLE
  82. class GrammarAnalyzer(object):
  83. def __init__(self, parser_conf, debug=False):
  84. self.debug = debug
  85. rules = parser_conf.rules + [Rule(NonTerminal('$root'), [NonTerminal(parser_conf.start), Terminal('$END')])]
  86. self.rules_by_origin = classify(rules, lambda r: r.origin)
  87. assert len(rules) == len(set(rules))
  88. for r in rules:
  89. for sym in r.expansion:
  90. if not (sym.is_term or sym in self.rules_by_origin):
  91. raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation
  92. self.start_state = self.expand_rule(NonTerminal('$root'))
  93. self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
  94. def expand_rule(self, rule):
  95. "Returns all init_ptrs accessible by rule (recursive)"
  96. init_ptrs = set()
  97. def _expand_rule(rule):
  98. assert not rule.is_term, rule
  99. for r in self.rules_by_origin[rule]:
  100. init_ptr = RulePtr(r, 0)
  101. init_ptrs.add(init_ptr)
  102. if r.expansion: # if not empty rule
  103. new_r = init_ptr.next
  104. if not new_r.is_term:
  105. yield new_r
  106. for _ in bfs([rule], _expand_rule):
  107. pass
  108. return fzset(init_ptrs)
  109. def _first(self, r):
  110. if r.is_term:
  111. return {r}
  112. else:
  113. return {rp.next for rp in self.expand_rule(r) if rp.next.is_term}