This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

157 строки
5.0 KiB

  1. from ..utils import bfs, fzset
  2. from ..common import GrammarError, is_terminal
  3. class Rule(object):
  4. """
  5. origin : a symbol
  6. expansion : a list of symbols
  7. """
  8. def __init__(self, origin, expansion, alias=None):
  9. self.origin = origin
  10. self.expansion = expansion
  11. self.alias = alias
  12. def __repr__(self):
  13. return '<%s : %s>' % (self.origin, ' '.join(map(unicode,self.expansion)))
  14. class RulePtr(object):
  15. def __init__(self, rule, index):
  16. assert isinstance(rule, Rule)
  17. assert index <= len(rule.expansion)
  18. self.rule = rule
  19. self.index = index
  20. def __repr__(self):
  21. before = self.rule.expansion[:self.index]
  22. after = self.rule.expansion[self.index:]
  23. return '<%s : %s * %s>' % (self.rule.origin, ' '.join(before), ' '.join(after))
  24. @property
  25. def next(self):
  26. return self.rule.expansion[self.index]
  27. def advance(self, sym):
  28. assert self.next == sym
  29. return RulePtr(self.rule, self.index+1)
  30. @property
  31. def is_satisfied(self):
  32. return self.index == len(self.rule.expansion)
  33. def __eq__(self, other):
  34. return self.rule == other.rule and self.index == other.index
  35. def __hash__(self):
  36. return hash((self.rule, self.index))
  37. def pairs(lst):
  38. return zip(lst[:-1], lst[1:])
  39. def update_set(set1, set2):
  40. copy = set(set1)
  41. set1 |= set2
  42. return set1 != copy
  43. def calculate_sets(rules):
  44. """Calculate FOLLOW sets.
  45. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
  46. symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
  47. symbols.add('$root') # what about other unused rules?
  48. # foreach grammar rule X ::= Y(1) ... Y(k)
  49. # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
  50. # NULLABLE = NULLABLE union {X}
  51. # for i = 1 to k
  52. # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
  53. # FIRST(X) = FIRST(X) union FIRST(Y(i))
  54. # for j = i+1 to k
  55. # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
  56. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
  57. # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
  58. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
  59. # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
  60. NULLABLE = set()
  61. FIRST = {}
  62. FOLLOW = {}
  63. for sym in symbols:
  64. FIRST[sym]={sym} if is_terminal(sym) else set()
  65. FOLLOW[sym]=set()
  66. changed = True
  67. while changed:
  68. changed = False
  69. for rule in rules:
  70. if set(rule.expansion) <= NULLABLE:
  71. if update_set(NULLABLE, {rule.origin}):
  72. changed = True
  73. for i, sym in enumerate(rule.expansion):
  74. if set(rule.expansion[:i]) <= NULLABLE:
  75. if update_set(FIRST[rule.origin], FIRST[sym]):
  76. changed = True
  77. if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE:
  78. if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
  79. changed = True
  80. for j in range(i+1, len(rule.expansion)):
  81. if set(rule.expansion[i+1:j]) <= NULLABLE:
  82. if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
  83. changed = True
  84. return FIRST, FOLLOW, NULLABLE
  85. class GrammarAnalyzer(object):
  86. def __init__(self, rule_tuples, start_symbol, debug=False):
  87. self.start_symbol = start_symbol
  88. self.debug = debug
  89. rule_tuples = list(rule_tuples)
  90. rule_tuples.append(('$root', [start_symbol, '$end']))
  91. rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples]
  92. self.rules = set()
  93. self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples}
  94. for origin, exp, alias in rule_tuples:
  95. r = Rule( origin, exp, alias )
  96. self.rules.add(r)
  97. self.rules_by_origin[origin].append(r)
  98. for r in self.rules:
  99. for sym in r.expansion:
  100. if not (is_terminal(sym) or sym in self.rules_by_origin):
  101. raise GrammarError("Using an undefined rule: %s" % sym)
  102. self.init_state = self.expand_rule(start_symbol)
  103. self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules)
  104. def expand_rule(self, rule):
  105. "Returns all init_ptrs accessible by rule (recursive)"
  106. init_ptrs = set()
  107. def _expand_rule(rule):
  108. assert not is_terminal(rule), rule
  109. for r in self.rules_by_origin[rule]:
  110. init_ptr = RulePtr(r, 0)
  111. init_ptrs.add(init_ptr)
  112. if r.expansion: # if not empty rule
  113. new_r = init_ptr.next
  114. if not is_terminal(new_r):
  115. yield new_r
  116. _ = list(bfs([rule], _expand_rule))
  117. return fzset(init_ptrs)
  118. def _first(self, r):
  119. if is_terminal(r):
  120. return {r}
  121. else:
  122. return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)}