This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

146 lines
4.5 KiB

  1. """This module implements a LALR(1) Parser
  2. """
  3. # Author: Erez Shinan (2017)
  4. # Email : erezshin@gmail.com
  5. from ..exceptions import UnexpectedToken
  6. from ..lexer import Token
  7. from ..grammar import Rule
  8. from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
  9. class Enumerator:
  10. def __init__(self):
  11. self.enums = {}
  12. def get(self, item):
  13. if item not in self.enums:
  14. self.enums[item] = len(self.enums)
  15. return self.enums[item]
  16. def __len__(self):
  17. return len(self.enums)
  18. def reversed(self):
  19. r = {v: k for k, v in self.enums.items()}
  20. assert len(r) == len(self.enums)
  21. return r
  22. class Parser(object):
  23. def __init__(self, parser_conf, debug=False):
  24. assert all(r.options is None or r.options.priority is None
  25. for r in parser_conf.rules), "LALR doesn't yet support prioritization"
  26. analysis = LALR_Analyzer(parser_conf, debug=debug)
  27. analysis.compute_lookahead()
  28. callbacks = parser_conf.callbacks
  29. self._parse_table = analysis.parse_table
  30. self.parser_conf = parser_conf
  31. self.parser = _Parser(analysis.parse_table, callbacks)
  32. self.parse = self.parser.parse
  33. def serialize(self):
  34. tokens = Enumerator()
  35. rules = Enumerator()
  36. states = {
  37. state: {tokens.get(token): ((1, rules.get(arg)) if action is Reduce else (0, arg))
  38. for token, (action, arg) in actions.items()}
  39. for state, actions in self._parse_table.states.items()
  40. }
  41. return {
  42. 'tokens': tokens.reversed(),
  43. 'rules': {idx: r.serialize() for idx, r in rules.reversed().items()},
  44. 'states': states,
  45. 'start_state': self._parse_table.start_state,
  46. 'end_state': self._parse_table.end_state,
  47. }
  48. @classmethod
  49. def deserialize(cls, data, callbacks):
  50. tokens = data['tokens']
  51. rules = {idx: Rule.deserialize(r) for idx, r in data['rules'].items()}
  52. states = {
  53. state: {tokens[token]: ((Reduce, rules[arg]) if action==1 else (Shift, arg))
  54. for token, (action, arg) in actions.items()}
  55. for state, actions in data['states'].items()
  56. }
  57. parse_table = IntParseTable(states, data['start_state'], data['end_state'])
  58. inst = cls.__new__(cls)
  59. inst.parser = _Parser(parse_table, callbacks)
  60. inst.parse = inst.parser.parse
  61. return inst
  62. ###{standalone
  63. class _Parser:
  64. def __init__(self, parse_table, callbacks):
  65. self.states = parse_table.states
  66. self.start_state = parse_table.start_state
  67. self.end_state = parse_table.end_state
  68. self.callbacks = callbacks
  69. def parse(self, seq, set_state=None):
  70. token = None
  71. stream = iter(seq)
  72. states = self.states
  73. state_stack = [self.start_state]
  74. value_stack = []
  75. if set_state: set_state(self.start_state)
  76. def get_action(token):
  77. state = state_stack[-1]
  78. try:
  79. return states[state][token.type]
  80. except KeyError:
  81. expected = [s for s in states[state].keys() if s.isupper()]
  82. raise UnexpectedToken(token, expected, state=state)
  83. def reduce(rule):
  84. size = len(rule.expansion)
  85. if size:
  86. s = value_stack[-size:]
  87. del state_stack[-size:]
  88. del value_stack[-size:]
  89. else:
  90. s = []
  91. value = self.callbacks[rule](s)
  92. _action, new_state = states[state_stack[-1]][rule.origin.name]
  93. assert _action is Shift
  94. state_stack.append(new_state)
  95. value_stack.append(value)
  96. # Main LALR-parser loop
  97. for token in stream:
  98. while True:
  99. action, arg = get_action(token)
  100. assert arg != self.end_state
  101. if action is Shift:
  102. state_stack.append(arg)
  103. value_stack.append(token)
  104. if set_state: set_state(arg)
  105. break # next token
  106. else:
  107. reduce(arg)
  108. token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
  109. while True:
  110. _action, arg = get_action(token)
  111. if _action is Shift:
  112. assert arg == self.end_state
  113. val ,= value_stack
  114. return val
  115. else:
  116. reduce(arg)
  117. ###}