This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

113 lines
3.4 KiB

  1. """This module implements a LALR(1) Parser
  2. """
  3. # Author: Erez Shinan (2017)
  4. # Email : erezshin@gmail.com
  5. from ..exceptions import UnexpectedToken
  6. from ..lexer import Token
  7. from ..utils import Enumerator, Serialize
  8. from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
  9. ###{standalone
  10. class LALR_Parser(object):
  11. def __init__(self, parser_conf, debug=False):
  12. assert all(r.options is None or r.options.priority is None
  13. for r in parser_conf.rules), "LALR doesn't yet support prioritization"
  14. analysis = LALR_Analyzer(parser_conf, debug=debug)
  15. analysis.generate_lr0_states()
  16. analysis.discover_lookaheads()
  17. analysis.propagate_lookaheads()
  18. analysis.generate_lalr1_states()
  19. callbacks = parser_conf.callbacks
  20. self._parse_table = analysis.parse_table
  21. self.parser_conf = parser_conf
  22. self.parser = _Parser(analysis.parse_table, callbacks)
  23. @classmethod
  24. def deserialize(cls, data, memo, callbacks):
  25. inst = cls.__new__(cls)
  26. inst._parse_table = IntParseTable.deserialize(data, memo)
  27. inst.parser = _Parser(inst._parse_table, callbacks)
  28. return inst
  29. def serialize(self, memo):
  30. return self._parse_table.serialize(memo)
  31. def parse(self, *args):
  32. return self.parser.parse(*args)
  33. class _Parser:
  34. def __init__(self, parse_table, callbacks):
  35. self.states = parse_table.states
  36. self.start_states = parse_table.start_states
  37. self.end_states = parse_table.end_states
  38. self.callbacks = callbacks
  39. def parse(self, seq, start, set_state=None):
  40. token = None
  41. stream = iter(seq)
  42. states = self.states
  43. start_state = self.start_states[start]
  44. end_state = self.end_states[start]
  45. state_stack = [start_state]
  46. value_stack = []
  47. if set_state: set_state(start_state)
  48. def get_action(token):
  49. state = state_stack[-1]
  50. try:
  51. return states[state][token.type]
  52. except KeyError:
  53. expected = [s for s in states[state].keys() if s.isupper()]
  54. raise UnexpectedToken(token, expected, state=state)
  55. def reduce(rule):
  56. if state_stack[-1] == end_state:
  57. return True
  58. size = len(rule.expansion)
  59. if size:
  60. s = value_stack[-size:]
  61. del state_stack[-size:]
  62. del value_stack[-size:]
  63. else:
  64. s = []
  65. value = self.callbacks[rule](s)
  66. _action, new_state = states[state_stack[-1]][rule.origin.name]
  67. assert _action is Shift
  68. state_stack.append(new_state)
  69. value_stack.append(value)
  70. return False
  71. # Main LALR-parser loop
  72. for token in stream:
  73. while True:
  74. action, arg = get_action(token)
  75. assert arg != end_state
  76. if action is Shift:
  77. state_stack.append(arg)
  78. value_stack.append(token)
  79. if set_state: set_state(arg)
  80. break # next token
  81. else:
  82. reduce(arg)
  83. token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
  84. while True:
  85. _action, arg = get_action(token)
  86. assert(_action is Reduce)
  87. if reduce(arg):
  88. return value_stack[-1]
  89. ###}