This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

165 lines
5.2 KiB

  1. """This module implements a LALR(1) Parser
  2. """
  3. # Author: Erez Shinan (2017)
  4. # Email : erezshin@gmail.com
  5. from copy import deepcopy, copy
  6. from ..exceptions import UnexpectedInput, UnexpectedToken
  7. from ..lexer import Token
  8. from ..utils import Serialize
  9. from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
  10. from .lalr_puppet import ParserPuppet
  11. ###{standalone
  12. class LALR_Parser(Serialize):
  13. def __init__(self, parser_conf, debug=False):
  14. analysis = LALR_Analyzer(parser_conf, debug=debug)
  15. analysis.compute_lalr()
  16. callbacks = parser_conf.callbacks
  17. self._parse_table = analysis.parse_table
  18. self.parser_conf = parser_conf
  19. self.parser = _Parser(analysis.parse_table, callbacks, debug)
  20. @classmethod
  21. def deserialize(cls, data, memo, callbacks, debug=False):
  22. inst = cls.__new__(cls)
  23. inst._parse_table = IntParseTable.deserialize(data, memo)
  24. inst.parser = _Parser(inst._parse_table, callbacks, debug)
  25. return inst
  26. def serialize(self, memo):
  27. return self._parse_table.serialize(memo)
  28. def parse(self, *args):
  29. return self.parser.parse(*args)
  30. class ParseConf(object):
  31. __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states'
  32. def __init__(self, parse_table, callbacks, start):
  33. self.parse_table = parse_table
  34. self.start_state = self.parse_table.start_states[start]
  35. self.end_state = self.parse_table.end_states[start]
  36. self.states = self.parse_table.states
  37. self.callbacks = callbacks
  38. self.start = start
  39. class ParserState(object):
  40. __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack'
  41. def __init__(self, parse_conf, lexer, state_stack=None, value_stack=None):
  42. self.parse_conf = parse_conf
  43. self.lexer = lexer
  44. self.state_stack = state_stack or [self.parse_conf.start_state]
  45. self.value_stack = value_stack or []
  46. @property
  47. def position(self):
  48. return self.state_stack[-1]
  49. # Necessary for match_examples() to work
  50. def __eq__(self, other):
  51. if not isinstance(other, ParserState):
  52. return False
  53. return self.position == other.position
  54. def __copy__(self):
  55. return type(self)(
  56. self.parse_conf,
  57. self.lexer, # XXX copy
  58. copy(self.state_stack),
  59. deepcopy(self.value_stack),
  60. )
  61. def copy(self):
  62. return copy(self)
  63. def feed_token(self, token, is_end=False):
  64. state_stack = self.state_stack
  65. value_stack = self.value_stack
  66. states = self.parse_conf.states
  67. end_state = self.parse_conf.end_state
  68. callbacks = self.parse_conf.callbacks
  69. while True:
  70. state = state_stack[-1]
  71. try:
  72. action, arg = states[state][token.type]
  73. except KeyError:
  74. expected = {s for s in states[state].keys() if s.isupper()}
  75. raise UnexpectedToken(token, expected, state=self, puppet=None)
  76. assert arg != end_state
  77. if action is Shift:
  78. # shift once and return
  79. assert not is_end
  80. state_stack.append(arg)
  81. value_stack.append(token)
  82. return
  83. else:
  84. # reduce+shift as many times as necessary
  85. rule = arg
  86. size = len(rule.expansion)
  87. if size:
  88. s = value_stack[-size:]
  89. del state_stack[-size:]
  90. del value_stack[-size:]
  91. else:
  92. s = []
  93. value = callbacks[rule](s)
  94. _action, new_state = states[state_stack[-1]][rule.origin.name]
  95. assert _action is Shift
  96. state_stack.append(new_state)
  97. value_stack.append(value)
  98. if is_end and state_stack[-1] == end_state:
  99. return value_stack[-1]
  100. class _Parser(object):
  101. def __init__(self, parse_table, callbacks, debug=False):
  102. self.parse_table = parse_table
  103. self.callbacks = callbacks
  104. self.debug = debug
  105. def parse(self, lexer, start, value_stack=None, state_stack=None):
  106. parse_conf = ParseConf(self.parse_table, self.callbacks, start)
  107. parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
  108. return self.parse_from_state(parser_state)
  109. def parse_from_state(self, state):
  110. # Main LALR-parser loop
  111. try:
  112. token = None
  113. for token in state.lexer.lex(state):
  114. state.feed_token(token)
  115. token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
  116. return state.feed_token(token, True)
  117. except UnexpectedInput as e:
  118. try:
  119. e.puppet = ParserPuppet(self, state, state.lexer)
  120. except NameError:
  121. pass
  122. raise e
  123. except Exception as e:
  124. if self.debug:
  125. print("")
  126. print("STATE STACK DUMP")
  127. print("----------------")
  128. for i, s in enumerate(state.state_stack):
  129. print('%d)' % i , s)
  130. print("")
  131. raise
  132. ###}