This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

208 rindas
6.8 KiB

  1. from collections import defaultdict, deque
  2. from utils import classify, classify_bool, bfs, fzset
  3. ACTION_SHIFT = 0
  4. class GrammarError(Exception):
  5. pass
  6. def is_terminal(sym):
  7. return sym.isupper() or sym[0] == '$'
  8. class Rule(object):
  9. """
  10. origin : a symbol
  11. expansion : a list of symbols
  12. """
  13. def __init__(self, origin, expansion, alias=None):
  14. assert expansion, "No support for empty rules"
  15. self.origin = origin
  16. self.expansion = expansion
  17. self.alias = alias
  18. def __repr__(self):
  19. return '<%s : %s>' % (self.origin, ' '.join(self.expansion))
  20. class RulePtr(object):
  21. def __init__(self, rule, index):
  22. assert isinstance(rule, Rule)
  23. assert index <= len(rule.expansion)
  24. self.rule = rule
  25. self.index = index
  26. def __repr__(self):
  27. before = self.rule.expansion[:self.index]
  28. after = self.rule.expansion[self.index:]
  29. return '<%s : %s * %s>' % (self.rule.origin, ' '.join(before), ' '.join(after))
  30. @property
  31. def next(self):
  32. return self.rule.expansion[self.index]
  33. def advance(self, sym):
  34. assert self.next == sym
  35. return RulePtr(self.rule, self.index+1)
  36. @property
  37. def is_satisfied(self):
  38. return self.index == len(self.rule.expansion)
  39. def __eq__(self, other):
  40. return self.rule == other.rule and self.index == other.index
  41. def __hash__(self):
  42. return hash((self.rule, self.index))
  43. def pairs(lst):
  44. return zip(lst[:-1], lst[1:])
  45. def update_set(set1, set2):
  46. copy = set(set1)
  47. set1 |= set2
  48. return set1 != copy
  49. class GrammarAnalyzer(object):
  50. def __init__(self, rule_tuples):
  51. rule_tuples = list(rule_tuples)
  52. rule_tuples.append(('$root', ['start', '$end']))
  53. rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples]
  54. self.rules = set()
  55. self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples}
  56. for origin, exp, alias in rule_tuples:
  57. r = Rule( origin, exp, alias )
  58. self.rules.add(r)
  59. self.rules_by_origin[origin].append(r)
  60. for r in self.rules:
  61. for sym in r.expansion:
  62. if not (is_terminal(sym) or sym in self.rules_by_origin):
  63. raise GrammarError("Using an undefined rule: %s" % sym)
  64. self.init_state = self.expand_rule('start')
  65. def expand_rule(self, rule):
  66. "Returns all init_ptrs accessible by rule (recursive)"
  67. init_ptrs = set()
  68. def _expand_rule(rule):
  69. assert not is_terminal(rule)
  70. for r in self.rules_by_origin[rule]:
  71. init_ptr = RulePtr(r, 0)
  72. init_ptrs.add(init_ptr)
  73. new_r = init_ptr.next
  74. if not is_terminal(new_r):
  75. yield new_r
  76. _ = list(bfs([rule], _expand_rule))
  77. return fzset(init_ptrs)
  78. def _first(self, r):
  79. if is_terminal(r):
  80. return {r}
  81. else:
  82. return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)}
  83. def _calc(self):
  84. """Calculate FOLLOW sets.
  85. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
  86. symbols = {sym for rule in self.rules for sym in rule.expansion}
  87. symbols.add('$root') # what about other unused rules?
  88. # foreach grammar rule X ::= Y(1) ... Y(k)
  89. # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
  90. # NULLABLE = NULLABLE union {X}
  91. # for i = 1 to k
  92. # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
  93. # FIRST(X) = FIRST(X) union FIRST(Y(i))
  94. # for j = i+1 to k
  95. # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
  96. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
  97. # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
  98. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
  99. # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
  100. NULLABLE = set()
  101. FIRST = {}
  102. FOLLOW = {}
  103. for sym in symbols:
  104. FIRST[sym]={sym} if is_terminal(sym) else set()
  105. FOLLOW[sym]=set()
  106. changed = True
  107. while changed:
  108. changed = False
  109. for rule in self.rules:
  110. if set(rule.expansion) <= NULLABLE:
  111. if update_set(NULLABLE, {rule.origin}):
  112. changed = True
  113. for i, sym in enumerate(rule.expansion):
  114. if set(rule.expansion[:i]) <= NULLABLE:
  115. if update_set(FIRST[rule.origin], FIRST[sym]):
  116. changed = True
  117. if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE:
  118. if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
  119. changed = True
  120. for j in range(i+1, len(rule.expansion)):
  121. if set(rule.expansion[i+1:j]) <= NULLABLE:
  122. if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
  123. changed = True
  124. self.FOLLOW = FOLLOW
  125. def analyze(self):
  126. self._calc()
  127. self.states = {}
  128. def step(state):
  129. lookahead = defaultdict(list)
  130. sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied)
  131. for rp in sat:
  132. for term in self.FOLLOW.get(rp.rule.origin, ()):
  133. lookahead[term].append(('reduce', rp.rule))
  134. d = classify(unsat, lambda rp: rp.next)
  135. for sym, rps in d.items():
  136. rps = {rp.advance(sym) for rp in rps}
  137. for rp in set(rps):
  138. if not rp.is_satisfied and not is_terminal(rp.next):
  139. rps |= self.expand_rule(rp.next)
  140. lookahead[sym].append(('shift', fzset(rps)))
  141. yield fzset(rps)
  142. for k, v in lookahead.items():
  143. if len(v) > 1:
  144. for x in v:
  145. # XXX resolving shift/reduce into shift, like PLY
  146. # Give a proper warning
  147. if x[0] == 'shift':
  148. lookahead[k] = [x]
  149. for k, v in lookahead.items():
  150. assert len(v) == 1, ("Collision", k, v)
  151. self.states[state] = {k:v[0] for k, v in lookahead.items()}
  152. x = list(bfs([self.init_state], step))
  153. # --
  154. self.enum = list(self.states)
  155. self.enum_rev = {s:i for i,s in enumerate(self.enum)}
  156. self.states_idx = {}
  157. for s, la in self.states.items():
  158. la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift' else v for k,v in la.items()}
  159. self.states_idx[ self.enum_rev[s] ] = la
  160. self.init_state_idx = self.enum_rev[self.init_state]