This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

242 lines
8.3 KiB

  1. """This module builds a LALR(1) transition-table for lalr_parser.py
  2. For now, shift/reduce conflicts are automatically resolved as shifts.
  3. """
  4. # Author: Erez Shinan (2017)
  5. # Email : erezshin@gmail.com
  6. import logging
  7. from collections import defaultdict
  8. from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
  9. from ..exceptions import GrammarError
  10. from .grammar_analysis import GrammarAnalyzer, Terminal, RulePtr, LR0ItemSet
  11. from ..grammar import Rule
  12. ###{standalone
  13. class Action:
  14. def __init__(self, name):
  15. self.name = name
  16. def __str__(self):
  17. return self.name
  18. def __repr__(self):
  19. return str(self)
  20. Shift = Action('Shift')
  21. Reduce = Action('Reduce')
  22. class ParseTable:
  23. def __init__(self, states, start_states, end_states):
  24. self.states = states
  25. self.start_states = start_states
  26. self.end_states = end_states
  27. def serialize(self, memo):
  28. tokens = Enumerator()
  29. rules = Enumerator()
  30. states = {
  31. state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
  32. for token, (action, arg) in actions.items()}
  33. for state, actions in self.states.items()
  34. }
  35. return {
  36. 'tokens': tokens.reversed(),
  37. 'states': states,
  38. 'start_states': self.start_states,
  39. 'end_states': self.end_states,
  40. }
  41. @classmethod
  42. def deserialize(cls, data, memo):
  43. tokens = data['tokens']
  44. states = {
  45. state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
  46. for token, (action, arg) in actions.items()}
  47. for state, actions in data['states'].items()
  48. }
  49. return cls(states, data['start_states'], data['end_states'])
  50. class IntParseTable(ParseTable):
  51. @classmethod
  52. def from_ParseTable(cls, parse_table):
  53. enum = list(parse_table.states)
  54. state_to_idx = {s:i for i,s in enumerate(enum)}
  55. int_states = {}
  56. for s, la in parse_table.states.items():
  57. la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
  58. for k,v in la.items()}
  59. int_states[ state_to_idx[s] ] = la
  60. start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
  61. end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
  62. return cls(int_states, start_states, end_states)
  63. ###}
  64. class LALR_Analyzer(GrammarAnalyzer):
  65. def generate_lr0_states(self):
  66. self.states = set()
  67. def step(state):
  68. _, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied)
  69. d = classify(unsat, lambda rp: rp.next)
  70. for sym, rps in d.items():
  71. kernel = {rp.advance(sym) for rp in rps}
  72. closure = set(kernel)
  73. for rp in kernel:
  74. if not rp.is_satisfied and not rp.next.is_term:
  75. closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin)
  76. new_state = LR0ItemSet(kernel, closure)
  77. state.transitions[sym] = new_state
  78. yield new_state
  79. self.states.add(state)
  80. for _ in bfs(self.lr0_start_states.values(), step):
  81. pass
  82. def discover_lookaheads(self):
  83. # state -> rule -> set of lookaheads
  84. self.lookaheads = defaultdict(lambda: defaultdict(set))
  85. # state -> rule -> list of (set of lookaheads) to propagate to
  86. self.propagates = defaultdict(lambda: defaultdict(list))
  87. for s in self.lr0_start_states.values():
  88. for rp in s.kernel:
  89. self.lookaheads[s][rp].add(Terminal('$END'))
  90. # There is a 1 to 1 correspondance between LR0 and LALR1 states.
  91. # We calculate the lookaheads for LALR1 kernel items from the LR0 kernel items.
  92. # use a terminal that does not exist in the grammar
  93. t = Terminal('$#')
  94. for s in self.states:
  95. for rp in s.kernel:
  96. for rp2, la in self.generate_lr1_closure([(rp, t)]):
  97. if rp2.is_satisfied:
  98. continue
  99. next_symbol = rp2.next
  100. next_state = s.transitions[next_symbol]
  101. rp3 = rp2.advance(next_symbol)
  102. assert(rp3 in next_state.kernel)
  103. x = self.lookaheads[next_state][rp3]
  104. if la == t:
  105. # we must propagate rp's lookaheads to rp3's lookahead set
  106. self.propagates[s][rp].append(x)
  107. else:
  108. # this lookahead is "generated spontaneously" for rp3
  109. x.add(la)
  110. def propagate_lookaheads(self):
  111. changed = True
  112. while changed:
  113. changed = False
  114. for s in self.states:
  115. for rp in s.kernel:
  116. # from (from is a keyword)
  117. f = self.lookaheads[s][rp]
  118. # to
  119. t = self.propagates[s][rp]
  120. for x in t:
  121. old = len(x)
  122. x |= f
  123. changed = changed or (len(x) != old)
  124. def generate_lalr1_states(self):
  125. # 1 to 1 correspondance between LR0 and LALR1 states
  126. # We must fetch the lookaheads we calculated,
  127. # to create the LALR1 kernels from the LR0 kernels.
  128. # Then, we generate the LALR1 states by taking the LR1 closure of the new kernel items.
  129. # map of LR0 states to LALR1 states
  130. m = {}
  131. for s in self.states:
  132. kernel = []
  133. for rp in s.kernel:
  134. las = self.lookaheads[s][rp]
  135. assert(len(las) > 0)
  136. for la in las:
  137. kernel.append((rp, la))
  138. m[s] = self.generate_lr1_closure(kernel)
  139. self.states = {}
  140. for s, v in m.items():
  141. actions = {}
  142. for la, next_state in s.transitions.items():
  143. actions[la] = (Shift, next_state.closure)
  144. sat, _ = classify_bool(v, lambda x: x[0].is_satisfied)
  145. reductions = classify(sat, lambda x: x[1], lambda x: x[0])
  146. for la, rps in reductions.items():
  147. if len(rps) > 1:
  148. raise GrammarError("Collision in %s: %s" % (la, ', '.join([ str(r.rule) for r in rps ])))
  149. if la in actions:
  150. if self.debug:
  151. logging.warning("Shift/reduce conflict for terminal %s: (resolving as shift)", la.name)
  152. logging.warning(' * %s', str(rps[0]))
  153. else:
  154. actions[la] = (Reduce, rps[0].rule)
  155. self.states[s.closure] = {k.name: v for k, v in actions.items()}
  156. end_states = {}
  157. for s in self.states:
  158. for rp in s:
  159. for start in self.lr0_start_states:
  160. if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied:
  161. assert(not start in end_states)
  162. end_states[start] = s
  163. self._parse_table = ParseTable(self.states, {start: state.closure for start, state in self.lr0_start_states.items()}, end_states)
  164. if self.debug:
  165. self.parse_table = self._parse_table
  166. else:
  167. self.parse_table = IntParseTable.from_ParseTable(self._parse_table)
  168. def generate_lr1_closure(self, kernel):
  169. closure = set()
  170. q = list(kernel)
  171. while len(q) > 0:
  172. rp, la = q.pop()
  173. if (rp, la) in closure:
  174. continue
  175. closure.add((rp, la))
  176. if rp.is_satisfied:
  177. continue
  178. if rp.next.is_term:
  179. continue
  180. l = []
  181. i = rp.index + 1
  182. n = len(rp.rule.expansion)
  183. while i < n:
  184. s = rp.rule.expansion[i]
  185. l.extend(self.FIRST.get(s, []))
  186. if not s in self.NULLABLE:
  187. break
  188. i += 1
  189. # if all of rp.rule.expansion[rp.index + 1:] were nullable:
  190. if i == n:
  191. l.append(la)
  192. for r in self.lr0_rules_by_origin[rp.next]:
  193. for s in l:
  194. q.append((RulePtr(r, 0), s))
  195. return closure