This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

213 lines
7.0 KiB

  1. import logging
  2. from collections import defaultdict, deque
  3. from ..utils import classify, classify_bool, bfs, fzset
  4. from ..common import GrammarError, is_terminal
  5. ACTION_SHIFT = 0
  6. class Rule(object):
  7. """
  8. origin : a symbol
  9. expansion : a list of symbols
  10. """
  11. def __init__(self, origin, expansion, alias=None):
  12. self.origin = origin
  13. self.expansion = expansion
  14. self.alias = alias
  15. def __repr__(self):
  16. return '<%s : %s>' % (self.origin, ' '.join(self.expansion))
  17. class RulePtr(object):
  18. def __init__(self, rule, index):
  19. assert isinstance(rule, Rule)
  20. assert index <= len(rule.expansion)
  21. self.rule = rule
  22. self.index = index
  23. def __repr__(self):
  24. before = self.rule.expansion[:self.index]
  25. after = self.rule.expansion[self.index:]
  26. return '<%s : %s * %s>' % (self.rule.origin, ' '.join(before), ' '.join(after))
  27. @property
  28. def next(self):
  29. return self.rule.expansion[self.index]
  30. def advance(self, sym):
  31. assert self.next == sym
  32. return RulePtr(self.rule, self.index+1)
  33. @property
  34. def is_satisfied(self):
  35. return self.index == len(self.rule.expansion)
  36. def __eq__(self, other):
  37. return self.rule == other.rule and self.index == other.index
  38. def __hash__(self):
  39. return hash((self.rule, self.index))
  40. def pairs(lst):
  41. return zip(lst[:-1], lst[1:])
  42. def update_set(set1, set2):
  43. copy = set(set1)
  44. set1 |= set2
  45. return set1 != copy
  46. def calculate_sets(rules):
  47. """Calculate FOLLOW sets.
  48. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
  49. symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
  50. symbols.add('$root') # what about other unused rules?
  51. # foreach grammar rule X ::= Y(1) ... Y(k)
  52. # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
  53. # NULLABLE = NULLABLE union {X}
  54. # for i = 1 to k
  55. # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
  56. # FIRST(X) = FIRST(X) union FIRST(Y(i))
  57. # for j = i+1 to k
  58. # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
  59. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
  60. # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
  61. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
  62. # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
  63. NULLABLE = set()
  64. FIRST = {}
  65. FOLLOW = {}
  66. for sym in symbols:
  67. FIRST[sym]={sym} if is_terminal(sym) else set()
  68. FOLLOW[sym]=set()
  69. changed = True
  70. while changed:
  71. changed = False
  72. for rule in rules:
  73. if set(rule.expansion) <= NULLABLE:
  74. if update_set(NULLABLE, {rule.origin}):
  75. changed = True
  76. for i, sym in enumerate(rule.expansion):
  77. if set(rule.expansion[:i]) <= NULLABLE:
  78. if update_set(FIRST[rule.origin], FIRST[sym]):
  79. changed = True
  80. if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE:
  81. if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
  82. changed = True
  83. for j in range(i+1, len(rule.expansion)):
  84. if set(rule.expansion[i+1:j]) <= NULLABLE:
  85. if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
  86. changed = True
  87. return FIRST, FOLLOW, NULLABLE
  88. class GrammarAnalyzer(object):
  89. def __init__(self, rule_tuples, start_symbol, debug=False):
  90. self.start_symbol = start_symbol
  91. self.debug = debug
  92. rule_tuples = list(rule_tuples)
  93. rule_tuples.append(('$root', [start_symbol, '$end']))
  94. rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples]
  95. self.rules = set()
  96. self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples}
  97. for origin, exp, alias in rule_tuples:
  98. r = Rule( origin, exp, alias )
  99. self.rules.add(r)
  100. self.rules_by_origin[origin].append(r)
  101. for r in self.rules:
  102. for sym in r.expansion:
  103. if not (is_terminal(sym) or sym in self.rules_by_origin):
  104. raise GrammarError("Using an undefined rule: %s" % sym)
  105. self.init_state = self.expand_rule(start_symbol)
  106. self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules)
  107. def expand_rule(self, rule):
  108. "Returns all init_ptrs accessible by rule (recursive)"
  109. init_ptrs = set()
  110. def _expand_rule(rule):
  111. assert not is_terminal(rule)
  112. for r in self.rules_by_origin[rule]:
  113. init_ptr = RulePtr(r, 0)
  114. init_ptrs.add(init_ptr)
  115. if r.expansion: # if not empty rule
  116. new_r = init_ptr.next
  117. if not is_terminal(new_r):
  118. yield new_r
  119. _ = list(bfs([rule], _expand_rule))
  120. return fzset(init_ptrs)
  121. def _first(self, r):
  122. if is_terminal(r):
  123. return {r}
  124. else:
  125. return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)}
  126. def analyze(self):
  127. self.states = {}
  128. def step(state):
  129. lookahead = defaultdict(list)
  130. sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied)
  131. for rp in sat:
  132. for term in self.FOLLOW.get(rp.rule.origin, ()):
  133. lookahead[term].append(('reduce', rp.rule))
  134. d = classify(unsat, lambda rp: rp.next)
  135. for sym, rps in d.items():
  136. rps = {rp.advance(sym) for rp in rps}
  137. for rp in set(rps):
  138. if not rp.is_satisfied and not is_terminal(rp.next):
  139. rps |= self.expand_rule(rp.next)
  140. lookahead[sym].append(('shift', fzset(rps)))
  141. yield fzset(rps)
  142. for k, v in lookahead.items():
  143. if len(v) > 1:
  144. if self.debug:
  145. logging.warn("Shift/reduce conflict for %s: %s. Resolving as shift.", k, v)
  146. for x in v:
  147. # XXX resolving shift/reduce into shift, like PLY
  148. # Give a proper warning
  149. if x[0] == 'shift':
  150. lookahead[k] = [x]
  151. for k, v in lookahead.items():
  152. assert len(v) == 1, ("Collision", k, v)
  153. self.states[state] = {k:v[0] for k, v in lookahead.items()}
  154. for _ in bfs([self.init_state], step):
  155. pass
  156. # --
  157. self.enum = list(self.states)
  158. self.enum_rev = {s:i for i,s in enumerate(self.enum)}
  159. self.states_idx = {}
  160. for s, la in self.states.items():
  161. la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift'
  162. else (v[0], (v[1], len(v[1].expansion))) # Reduce
  163. for k,v in la.items()}
  164. self.states_idx[ self.enum_rev[s] ] = la
  165. self.init_state_idx = self.enum_rev[self.init_state]