This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

205 rader
6.8 KiB

  1. from collections import defaultdict, deque
  2. from ..utils import classify, classify_bool, bfs, fzset
  3. from ..common import GrammarError, is_terminal
  4. ACTION_SHIFT = 0
  5. class Rule(object):
  6. """
  7. origin : a symbol
  8. expansion : a list of symbols
  9. """
  10. def __init__(self, origin, expansion, alias=None):
  11. self.origin = origin
  12. self.expansion = expansion
  13. self.alias = alias
  14. def __repr__(self):
  15. return '<%s : %s>' % (self.origin, ' '.join(self.expansion))
  16. class RulePtr(object):
  17. def __init__(self, rule, index):
  18. assert isinstance(rule, Rule)
  19. assert index <= len(rule.expansion)
  20. self.rule = rule
  21. self.index = index
  22. def __repr__(self):
  23. before = self.rule.expansion[:self.index]
  24. after = self.rule.expansion[self.index:]
  25. return '<%s : %s * %s>' % (self.rule.origin, ' '.join(before), ' '.join(after))
  26. @property
  27. def next(self):
  28. return self.rule.expansion[self.index]
  29. def advance(self, sym):
  30. assert self.next == sym
  31. return RulePtr(self.rule, self.index+1)
  32. @property
  33. def is_satisfied(self):
  34. return self.index == len(self.rule.expansion)
  35. def __eq__(self, other):
  36. return self.rule == other.rule and self.index == other.index
  37. def __hash__(self):
  38. return hash((self.rule, self.index))
  39. def pairs(lst):
  40. return zip(lst[:-1], lst[1:])
  41. def update_set(set1, set2):
  42. copy = set(set1)
  43. set1 |= set2
  44. return set1 != copy
  45. class GrammarAnalyzer(object):
  46. def __init__(self, rule_tuples, start_symbol):
  47. self.start_symbol = start_symbol
  48. rule_tuples = list(rule_tuples)
  49. rule_tuples.append(('$root', [start_symbol, '$end']))
  50. rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples]
  51. self.rules = set()
  52. self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples}
  53. for origin, exp, alias in rule_tuples:
  54. r = Rule( origin, exp, alias )
  55. self.rules.add(r)
  56. self.rules_by_origin[origin].append(r)
  57. for r in self.rules:
  58. for sym in r.expansion:
  59. if not (is_terminal(sym) or sym in self.rules_by_origin):
  60. raise GrammarError("Using an undefined rule: %s" % sym)
  61. self.init_state = self.expand_rule(start_symbol)
  62. def expand_rule(self, rule):
  63. "Returns all init_ptrs accessible by rule (recursive)"
  64. init_ptrs = set()
  65. def _expand_rule(rule):
  66. assert not is_terminal(rule)
  67. for r in self.rules_by_origin[rule]:
  68. init_ptr = RulePtr(r, 0)
  69. init_ptrs.add(init_ptr)
  70. if r.expansion: # if not empty rule
  71. new_r = init_ptr.next
  72. if not is_terminal(new_r):
  73. yield new_r
  74. _ = list(bfs([rule], _expand_rule))
  75. return fzset(init_ptrs)
  76. def _first(self, r):
  77. if is_terminal(r):
  78. return {r}
  79. else:
  80. return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)}
  81. def _calc(self):
  82. """Calculate FOLLOW sets.
  83. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
  84. symbols = {sym for rule in self.rules for sym in rule.expansion} | {rule.origin for rule in self.rules}
  85. symbols.add('$root') # what about other unused rules?
  86. # foreach grammar rule X ::= Y(1) ... Y(k)
  87. # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
  88. # NULLABLE = NULLABLE union {X}
  89. # for i = 1 to k
  90. # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
  91. # FIRST(X) = FIRST(X) union FIRST(Y(i))
  92. # for j = i+1 to k
  93. # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
  94. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
  95. # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
  96. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
  97. # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
  98. NULLABLE = set()
  99. FIRST = {}
  100. FOLLOW = {}
  101. for sym in symbols:
  102. FIRST[sym]={sym} if is_terminal(sym) else set()
  103. FOLLOW[sym]=set()
  104. changed = True
  105. while changed:
  106. changed = False
  107. for rule in self.rules:
  108. if set(rule.expansion) <= NULLABLE:
  109. if update_set(NULLABLE, {rule.origin}):
  110. changed = True
  111. for i, sym in enumerate(rule.expansion):
  112. if set(rule.expansion[:i]) <= NULLABLE:
  113. if update_set(FIRST[rule.origin], FIRST[sym]):
  114. changed = True
  115. if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE:
  116. if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
  117. changed = True
  118. for j in range(i+1, len(rule.expansion)):
  119. if set(rule.expansion[i+1:j]) <= NULLABLE:
  120. if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
  121. changed = True
  122. self.FOLLOW = FOLLOW
  123. def analyze(self):
  124. self._calc()
  125. self.states = {}
  126. def step(state):
  127. lookahead = defaultdict(list)
  128. sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied)
  129. for rp in sat:
  130. for term in self.FOLLOW.get(rp.rule.origin, ()):
  131. lookahead[term].append(('reduce', rp.rule))
  132. d = classify(unsat, lambda rp: rp.next)
  133. for sym, rps in d.items():
  134. rps = {rp.advance(sym) for rp in rps}
  135. for rp in set(rps):
  136. if not rp.is_satisfied and not is_terminal(rp.next):
  137. rps |= self.expand_rule(rp.next)
  138. lookahead[sym].append(('shift', fzset(rps)))
  139. yield fzset(rps)
  140. for k, v in lookahead.items():
  141. if len(v) > 1:
  142. for x in v:
  143. # XXX resolving shift/reduce into shift, like PLY
  144. # Give a proper warning
  145. if x[0] == 'shift':
  146. lookahead[k] = [x]
  147. for k, v in lookahead.items():
  148. assert len(v) == 1, ("Collision", k, v)
  149. self.states[state] = {k:v[0] for k, v in lookahead.items()}
  150. x = list(bfs([self.init_state], step))
  151. # --
  152. self.enum = list(self.states)
  153. self.enum_rev = {s:i for i,s in enumerate(self.enum)}
  154. self.states_idx = {}
  155. for s, la in self.states.items():
  156. la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift' else v for k,v in la.items()}
  157. self.states_idx[ self.enum_rev[s] ] = la
  158. self.init_state_idx = self.enum_rev[self.init_state]