This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

125 lines
3.9 KiB

  1. """This module implements a LALR(1) Parser
  2. """
  3. # Author: Erez Shinan (2017)
  4. # Email : erezshin@gmail.com
  5. from ..exceptions import UnexpectedToken
  6. from ..lexer import Token
  7. from ..utils import Enumerator, Serialize
  8. from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
  9. import time
  10. ###{standalone
  11. class LALR_Parser(object):
  12. def __init__(self, parser_conf, debug=False):
  13. assert all(r.options is None or r.options.priority is None
  14. for r in parser_conf.rules), "LALR doesn't yet support prioritization"
  15. analysis = LALR_Analyzer(parser_conf, debug=debug)
  16. t0 = time.time()
  17. analysis.generate_lr0_states()
  18. t1 = time.time()
  19. analysis.discover_lookaheads()
  20. t2 = time.time()
  21. analysis.propagate_lookaheads()
  22. t3 = time.time()
  23. analysis.generate_lalr1_states()
  24. t4 = time.time()
  25. print('Generating lr0 states took {:.3f}'.format(t1 - t0))
  26. print('Discovering lookaheads took {:.3f}'.format(t2 - t1))
  27. print('Propagating lookaheads took took {:.3f}'.format(t3 - t2))
  28. print('Generating lalr states (closure) took {:.3f}'.format(t4 - t3))
  29. print('-' * 32)
  30. callbacks = parser_conf.callbacks
  31. self._parse_table = analysis.parse_table
  32. self.parser_conf = parser_conf
  33. self.parser = _Parser(analysis.parse_table, callbacks)
  34. @classmethod
  35. def deserialize(cls, data, memo, callbacks):
  36. inst = cls.__new__(cls)
  37. inst._parse_table = IntParseTable.deserialize(data, memo)
  38. inst.parser = _Parser(inst._parse_table, callbacks)
  39. return inst
  40. def serialize(self, memo):
  41. return self._parse_table.serialize(memo)
  42. def parse(self, *args):
  43. return self.parser.parse(*args)
  44. class _Parser:
  45. def __init__(self, parse_table, callbacks):
  46. self.states = parse_table.states
  47. self.start_states = parse_table.start_states
  48. self.end_states = parse_table.end_states
  49. self.callbacks = callbacks
  50. def parse(self, seq, start, set_state=None):
  51. token = None
  52. stream = iter(seq)
  53. states = self.states
  54. start_state = self.start_states[start]
  55. end_state = self.end_states[start]
  56. state_stack = [start_state]
  57. value_stack = []
  58. if set_state: set_state(start_state)
  59. def get_action(token):
  60. state = state_stack[-1]
  61. try:
  62. return states[state][token.type]
  63. except KeyError:
  64. expected = [s for s in states[state].keys() if s.isupper()]
  65. raise UnexpectedToken(token, expected, state=state)
  66. def reduce(rule):
  67. if state_stack[-1] == end_state:
  68. return True
  69. size = len(rule.expansion)
  70. if size:
  71. s = value_stack[-size:]
  72. del state_stack[-size:]
  73. del value_stack[-size:]
  74. else:
  75. s = []
  76. value = self.callbacks[rule](s)
  77. _action, new_state = states[state_stack[-1]][rule.origin.name]
  78. assert _action is Shift
  79. state_stack.append(new_state)
  80. value_stack.append(value)
  81. return False
  82. # Main LALR-parser loop
  83. for token in stream:
  84. while True:
  85. action, arg = get_action(token)
  86. assert arg != end_state
  87. if action is Shift:
  88. state_stack.append(arg)
  89. value_stack.append(token)
  90. if set_state: set_state(arg)
  91. break # next token
  92. else:
  93. reduce(arg)
  94. token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
  95. while True:
  96. _action, arg = get_action(token)
  97. assert(_action is Reduce)
  98. if reduce(arg):
  99. return value_stack[-1]
  100. ###}