This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

288 lines
9.7 KiB

  1. """This module builds a LALR(1) transition-table for lalr_parser.py
  2. For now, shift/reduce conflicts are automatically resolved as shifts.
  3. """
  4. # Author: Erez Shinan (2017)
  5. # Email : erezshin@gmail.com
  6. import logging
  7. from collections import defaultdict, deque
  8. from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
  9. from ..exceptions import GrammarError
  10. from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
  11. from ..grammar import Rule
  12. ###{standalone
  13. class Action:
  14. def __init__(self, name):
  15. self.name = name
  16. def __str__(self):
  17. return self.name
  18. def __repr__(self):
  19. return str(self)
  20. Shift = Action('Shift')
  21. Reduce = Action('Reduce')
  22. class ParseTable:
  23. def __init__(self, states, start_states, end_states):
  24. self.states = states
  25. self.start_states = start_states
  26. self.end_states = end_states
  27. def serialize(self, memo):
  28. tokens = Enumerator()
  29. rules = Enumerator()
  30. states = {
  31. state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
  32. for token, (action, arg) in actions.items()}
  33. for state, actions in self.states.items()
  34. }
  35. return {
  36. 'tokens': tokens.reversed(),
  37. 'states': states,
  38. 'start_states': self.start_states,
  39. 'end_states': self.end_states,
  40. }
  41. @classmethod
  42. def deserialize(cls, data, memo):
  43. tokens = data['tokens']
  44. states = {
  45. state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
  46. for token, (action, arg) in actions.items()}
  47. for state, actions in data['states'].items()
  48. }
  49. return cls(states, data['start_states'], data['end_states'])
  50. class IntParseTable(ParseTable):
  51. @classmethod
  52. def from_ParseTable(cls, parse_table):
  53. enum = list(parse_table.states)
  54. state_to_idx = {s:i for i,s in enumerate(enum)}
  55. int_states = {}
  56. for s, la in parse_table.states.items():
  57. la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
  58. for k,v in la.items()}
  59. int_states[ state_to_idx[s] ] = la
  60. start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
  61. end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
  62. return cls(int_states, start_states, end_states)
  63. ###}
  64. # digraph and traverse, see The Theory and Practice of Compiler Writing
  65. # computes F(x) = G(x) union (union { G(y) | x R y })
  66. # X: nodes
  67. # R: relation (function mapping node -> list of nodes that satisfy the relation)
  68. # G: set valued function
  69. def digraph(X, R, G):
  70. F = {}
  71. S = []
  72. N = {}
  73. for x in X:
  74. N[x] = 0
  75. for x in X:
  76. # this is always true for the first iteration, but N[x] may be updated in traverse below
  77. if N[x] == 0:
  78. traverse(x, S, N, X, R, G, F)
  79. return F
  80. # x: single node
  81. # S: stack
  82. # N: weights
  83. # X: nodes
  84. # R: relation (see above)
  85. # G: set valued function
  86. # F: set valued function we are computing (map of input -> output)
  87. def traverse(x, S, N, X, R, G, F):
  88. S.append(x)
  89. d = len(S)
  90. N[x] = d
  91. F[x] = G[x]
  92. for y in R[x]:
  93. if N[y] == 0:
  94. traverse(y, S, N, X, R, G, F)
  95. n_x = N[x]
  96. assert(n_x > 0)
  97. n_y = N[y]
  98. assert(n_y != 0)
  99. if (n_y > 0) and (n_y < n_x):
  100. N[x] = n_y
  101. F[x].update(F[y])
  102. if N[x] == d:
  103. f_x = F[x]
  104. while True:
  105. z = S.pop()
  106. N[z] = -1
  107. F[z] = f_x
  108. if z == x:
  109. break
  110. class LALR_Analyzer(GrammarAnalyzer):
  111. def __init__(self, parser_conf, debug=False):
  112. GrammarAnalyzer.__init__(self, parser_conf, debug)
  113. self.nonterminal_transitions = []
  114. self.directly_reads = defaultdict(set)
  115. self.reads = defaultdict(set)
  116. self.includes = defaultdict(set)
  117. self.lookback = defaultdict(set)
  118. def compute_lr0_states(self):
  119. self.lr0_states = set()
  120. # map of kernels to LR0ItemSets
  121. cache = {}
  122. def step(state):
  123. _, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied)
  124. d = classify(unsat, lambda rp: rp.next)
  125. for sym, rps in d.items():
  126. kernel = fzset({rp.advance(sym) for rp in rps})
  127. new_state = cache.get(kernel, None)
  128. if new_state is None:
  129. closure = set(kernel)
  130. for rp in kernel:
  131. if not rp.is_satisfied and not rp.next.is_term:
  132. closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin)
  133. new_state = LR0ItemSet(kernel, closure)
  134. cache[kernel] = new_state
  135. state.transitions[sym] = new_state
  136. yield new_state
  137. self.lr0_states.add(state)
  138. for _ in bfs(self.lr0_start_states.values(), step):
  139. pass
  140. def compute_reads_relations(self):
  141. # handle start state
  142. for root in self.lr0_start_states.values():
  143. assert(len(root.kernel) == 1)
  144. for rp in root.kernel:
  145. assert(rp.index == 0)
  146. self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])
  147. for state in self.lr0_states:
  148. seen = set()
  149. for rp in state.closure:
  150. if rp.is_satisfied:
  151. continue
  152. s = rp.next
  153. # if s is a not a nonterminal
  154. if s not in self.lr0_rules_by_origin:
  155. continue
  156. if s in seen:
  157. continue
  158. seen.add(s)
  159. nt = (state, s)
  160. self.nonterminal_transitions.append(nt)
  161. dr = self.directly_reads[nt]
  162. r = self.reads[nt]
  163. next_state = state.transitions[s]
  164. for rp2 in next_state.closure:
  165. if rp2.is_satisfied:
  166. continue
  167. s2 = rp2.next
  168. # if s2 is a terminal
  169. if s2 not in self.lr0_rules_by_origin:
  170. dr.add(s2)
  171. if s2 in self.NULLABLE:
  172. r.add((next_state, s2))
  173. def compute_includes_lookback(self):
  174. for nt in self.nonterminal_transitions:
  175. state, nonterminal = nt
  176. includes = []
  177. lookback = self.lookback[nt]
  178. for rp in state.closure:
  179. if rp.rule.origin != nonterminal:
  180. continue
  181. # traverse the states for rp(.rule)
  182. state2 = state
  183. for i in range(rp.index, len(rp.rule.expansion)):
  184. s = rp.rule.expansion[i]
  185. nt2 = (state2, s)
  186. state2 = state2.transitions[s]
  187. if nt2 not in self.reads:
  188. continue
  189. for j in range(i + 1, len(rp.rule.expansion)):
  190. if not rp.rule.expansion[j] in self.NULLABLE:
  191. break
  192. else:
  193. includes.append(nt2)
  194. # state2 is at the final state for rp.rule
  195. if rp.index == 0:
  196. for rp2 in state2.closure:
  197. if (rp2.rule == rp.rule) and rp2.is_satisfied:
  198. lookback.add((state2, rp2.rule))
  199. for nt2 in includes:
  200. self.includes[nt2].add(nt)
  201. def compute_lookaheads(self):
  202. read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads)
  203. follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets)
  204. for nt, lookbacks in self.lookback.items():
  205. for state, rule in lookbacks:
  206. for s in follow_sets[nt]:
  207. state.lookaheads[s].add(rule)
  208. def compute_lalr1_states(self):
  209. m = {}
  210. for state in self.lr0_states:
  211. actions = {}
  212. for la, next_state in state.transitions.items():
  213. actions[la] = (Shift, next_state.closure)
  214. for la, rules in state.lookaheads.items():
  215. if len(rules) > 1:
  216. raise GrammarError('Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ])))
  217. if la in actions:
  218. if self.debug:
  219. logging.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
  220. logging.warning(' * %s', list(rules)[0])
  221. else:
  222. actions[la] = (Reduce, list(rules)[0])
  223. m[state] = { k.name: v for k, v in actions.items() }
  224. states = { k.closure: v for k, v in m.items() }
  225. # compute end states
  226. end_states = {}
  227. for state in states:
  228. for rp in state:
  229. for start in self.lr0_start_states:
  230. if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied:
  231. assert(not start in end_states)
  232. end_states[start] = state
  233. _parse_table = ParseTable(states, { start: state.closure for start, state in self.lr0_start_states.items() }, end_states)
  234. if self.debug:
  235. self.parse_table = _parse_table
  236. else:
  237. self.parse_table = IntParseTable.from_ParseTable(_parse_table)
  238. def compute_lalr(self):
  239. self.compute_lr0_states()
  240. self.compute_reads_relations()
  241. self.compute_includes_lookback()
  242. self.compute_lookaheads()
  243. self.compute_lalr1_states()