This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

143 lines
4.5 KiB

  1. """This module builds a LALR(1) transition-table for lalr_parser.py
  2. For now, shift/reduce conflicts are automatically resolved as shifts.
  3. """
  4. # Author: Erez Shinan (2017)
  5. # Email : erezshin@gmail.com
  6. import logging
  7. from collections import defaultdict
  8. from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
  9. from ..exceptions import GrammarError
  10. from .grammar_analysis import GrammarAnalyzer, Terminal
  11. ###{standalone
  12. class Action:
  13. def __init__(self, name):
  14. self.name = name
  15. def __str__(self):
  16. return self.name
  17. def __repr__(self):
  18. return str(self)
  19. Shift = Action('Shift')
  20. Reduce = Action('Reduce')
  21. class ParseTable:
  22. def __init__(self, states, start_state, end_state):
  23. self.states = states
  24. self.start_state = start_state
  25. self.end_state = end_state
  26. def serialize(self):
  27. tokens = Enumerator()
  28. rules = Enumerator()
  29. states = {
  30. state: {tokens.get(token): ((1, rules.get(arg)) if action is Reduce else (0, arg))
  31. for token, (action, arg) in actions.items()}
  32. for state, actions in self.states.items()
  33. }
  34. return {
  35. 'tokens': tokens.reversed(),
  36. 'rules': {idx: r.serialize() for idx, r in rules.reversed().items()},
  37. 'states': states,
  38. 'start_state': self.start_state,
  39. 'end_state': self.end_state,
  40. }
  41. @classmethod
  42. def deserialize(cls, data, memo):
  43. tokens = data['tokens']
  44. rules = data['rules']
  45. states = {
  46. state: {tokens[token]: ((Reduce, rules[arg]) if action==1 else (Shift, arg))
  47. for token, (action, arg) in actions.items()}
  48. for state, actions in data['states'].items()
  49. }
  50. return cls(states, data['start_state'], data['end_state'])
  51. class IntParseTable(ParseTable):
  52. @classmethod
  53. def from_ParseTable(cls, parse_table):
  54. enum = list(parse_table.states)
  55. state_to_idx = {s:i for i,s in enumerate(enum)}
  56. int_states = {}
  57. for s, la in parse_table.states.items():
  58. la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
  59. for k,v in la.items()}
  60. int_states[ state_to_idx[s] ] = la
  61. start_state = state_to_idx[parse_table.start_state]
  62. end_state = state_to_idx[parse_table.end_state]
  63. return cls(int_states, start_state, end_state)
  64. ###}
  65. class LALR_Analyzer(GrammarAnalyzer):
  66. def compute_lookahead(self):
  67. self.end_states = []
  68. self.states = {}
  69. def step(state):
  70. lookahead = defaultdict(list)
  71. sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied)
  72. for rp in sat:
  73. for term in self.FOLLOW.get(rp.rule.origin, ()):
  74. lookahead[term].append((Reduce, rp.rule))
  75. d = classify(unsat, lambda rp: rp.next)
  76. for sym, rps in d.items():
  77. rps = {rp.advance(sym) for rp in rps}
  78. for rp in set(rps):
  79. if not rp.is_satisfied and not rp.next.is_term:
  80. rps |= self.expand_rule(rp.next)
  81. new_state = fzset(rps)
  82. lookahead[sym].append((Shift, new_state))
  83. if sym == Terminal('$END'):
  84. self.end_states.append( new_state )
  85. yield new_state
  86. for k, v in lookahead.items():
  87. if len(v) > 1:
  88. if self.debug:
  89. logging.warn("Shift/reduce conflict for terminal %s: (resolving as shift)", k.name)
  90. for act, arg in v:
  91. logging.warn(' * %s: %s', act, arg)
  92. for x in v:
  93. # XXX resolving shift/reduce into shift, like PLY
  94. # Give a proper warning
  95. if x[0] is Shift:
  96. lookahead[k] = [x]
  97. for k, v in lookahead.items():
  98. if not len(v) == 1:
  99. raise GrammarError("Collision in %s: %s" %(k, ', '.join(['\n * %s: %s' % x for x in v])))
  100. self.states[state] = {k.name:v[0] for k, v in lookahead.items()}
  101. for _ in bfs([self.start_state], step):
  102. pass
  103. self.end_state ,= self.end_states
  104. self._parse_table = ParseTable(self.states, self.start_state, self.end_state)
  105. if self.debug:
  106. self.parse_table = self._parse_table
  107. else:
  108. self.parse_table = IntParseTable.from_ParseTable(self._parse_table)