This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

203 lines
6.9 KiB

  1. """This module implements a LALR(1) Parser
  2. """
  3. # Author: Erez Shinan (2017)
  4. # Email : erezshin@gmail.com
  5. from copy import deepcopy, copy
  6. from ..exceptions import UnexpectedInput, UnexpectedToken
  7. from ..lexer import Token
  8. from ..utils import Serialize
  9. from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
  10. from .lalr_puppet import ParserPuppet
  11. from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
  12. ###{standalone
  13. class LALR_Parser(Serialize):
  14. def __init__(self, parser_conf, debug=False):
  15. analysis = LALR_Analyzer(parser_conf, debug=debug)
  16. analysis.compute_lalr()
  17. callbacks = parser_conf.callbacks
  18. self._parse_table = analysis.parse_table
  19. self.parser_conf = parser_conf
  20. self.parser = _Parser(analysis.parse_table, callbacks, debug)
  21. @classmethod
  22. def deserialize(cls, data, memo, callbacks, debug=False):
  23. inst = cls.__new__(cls)
  24. inst._parse_table = IntParseTable.deserialize(data, memo)
  25. inst.parser = _Parser(inst._parse_table, callbacks, debug)
  26. return inst
  27. def serialize(self, memo):
  28. return self._parse_table.serialize(memo)
  29. def get_puppet(self, lexer, start):
  30. return self.parser.get_puppet(lexer, start)
  31. def parse(self, lexer, start, on_error=None):
  32. try:
  33. return self.parser.parse(lexer, start)
  34. except UnexpectedInput as e:
  35. if on_error is None:
  36. raise
  37. while True:
  38. if isinstance(e, UnexpectedCharacters):
  39. s = e.puppet.lexer_state.state
  40. p = s.line_ctr.char_pos
  41. if not on_error(e):
  42. raise e
  43. if isinstance(e, UnexpectedCharacters):
  44. # If user didn't change the character position, then we should
  45. if p == s.line_ctr.char_pos:
  46. s.line_ctr.feed(s.text[p:p+1])
  47. try:
  48. return e.puppet.resume_parse()
  49. except UnexpectedToken as e2:
  50. if isinstance(e, UnexpectedToken) and e.token.type == e2.token.type == '$END' and e.puppet == e2.puppet:
  51. # Prevent infinite loop
  52. raise e2
  53. e = e2
  54. except UnexpectedCharacters as e2:
  55. e = e2
  56. class ParseConf(object):
  57. __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states'
  58. def __init__(self, parse_table, callbacks, start):
  59. self.parse_table = parse_table
  60. self.start_state = self.parse_table.start_states[start]
  61. self.end_state = self.parse_table.end_states[start]
  62. self.states = self.parse_table.states
  63. self.callbacks = callbacks
  64. self.start = start
  65. class ParserState(object):
  66. __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack'
  67. def __init__(self, parse_conf, lexer, state_stack=None, value_stack=None):
  68. self.parse_conf = parse_conf
  69. self.lexer = lexer
  70. self.state_stack = state_stack or [self.parse_conf.start_state]
  71. self.value_stack = value_stack or []
  72. @property
  73. def position(self):
  74. return self.state_stack[-1]
  75. # Necessary for match_examples() to work
  76. def __eq__(self, other):
  77. if not isinstance(other, ParserState):
  78. return NotImplemented
  79. return len(self.state_stack) == len(other.state_stack) and self.position == other.position
  80. def __copy__(self):
  81. return type(self)(
  82. self.parse_conf,
  83. self.lexer, # XXX copy
  84. copy(self.state_stack),
  85. deepcopy(self.value_stack),
  86. )
  87. def copy(self):
  88. return copy(self)
  89. def feed_token(self, token, is_end=False):
  90. state_stack = self.state_stack
  91. value_stack = self.value_stack
  92. states = self.parse_conf.states
  93. end_state = self.parse_conf.end_state
  94. callbacks = self.parse_conf.callbacks
  95. while True:
  96. state = state_stack[-1]
  97. try:
  98. action, arg = states[state][token.type]
  99. except KeyError:
  100. expected = {s for s in states[state].keys() if s.isupper()}
  101. raise UnexpectedToken(token, expected, state=self, puppet=None)
  102. assert arg != end_state
  103. if action is Shift:
  104. # shift once and return
  105. assert not is_end
  106. state_stack.append(arg)
  107. value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
  108. return
  109. else:
  110. # reduce+shift as many times as necessary
  111. rule = arg
  112. size = len(rule.expansion)
  113. if size:
  114. s = value_stack[-size:]
  115. del state_stack[-size:]
  116. del value_stack[-size:]
  117. else:
  118. s = []
  119. value = callbacks[rule](s)
  120. _action, new_state = states[state_stack[-1]][rule.origin.name]
  121. assert _action is Shift
  122. state_stack.append(new_state)
  123. value_stack.append(value)
  124. if is_end and state_stack[-1] == end_state:
  125. return value_stack[-1]
  126. class _Parser(object):
  127. def __init__(self, parse_table, callbacks, debug=False):
  128. self.parse_table = parse_table
  129. self.callbacks = callbacks
  130. self.debug = debug
  131. def get_puppet(self, lexer, start, value_stack=None, state_stack=None):
  132. parse_conf = ParseConf(self.parse_table, self.callbacks, start)
  133. parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
  134. return ParserPuppet(self, parser_state, parser_state.lexer)
  135. def parse(self, lexer, start, value_stack=None, state_stack=None):
  136. parse_conf = ParseConf(self.parse_table, self.callbacks, start)
  137. parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
  138. return self.parse_from_state(parser_state)
  139. def parse_from_state(self, state):
  140. # Main LALR-parser loop
  141. try:
  142. token = None
  143. for token in state.lexer.lex(state):
  144. state.feed_token(token)
  145. token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
  146. return state.feed_token(token, True)
  147. except UnexpectedInput as e:
  148. try:
  149. e.puppet = ParserPuppet(self, state, state.lexer)
  150. except NameError:
  151. pass
  152. raise e
  153. except Exception as e:
  154. if self.debug:
  155. print("")
  156. print("STATE STACK DUMP")
  157. print("----------------")
  158. for i, s in enumerate(state.state_stack):
  159. print('%d)' % i , s)
  160. print("")
  161. raise
  162. ###}