This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

150 lines
4.8 KiB

  1. """This module implements a LALR(1) Parser
  2. """
  3. # Author: Erez Shinan (2017)
  4. # Email : erezshin@gmail.com
  5. from copy import deepcopy, copy
  6. from ..exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
  7. from ..lexer import Token
  8. from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
  9. from .lalr_puppet import ParserPuppet
  10. ###{standalone
  11. class LALR_Parser(object):
  12. def __init__(self, parser_conf, debug=False):
  13. analysis = LALR_Analyzer(parser_conf, debug=debug)
  14. analysis.compute_lalr()
  15. callbacks = parser_conf.callbacks
  16. self._parse_table = analysis.parse_table
  17. self.parser_conf = parser_conf
  18. self.parser = _Parser(analysis.parse_table, callbacks, debug)
  19. @classmethod
  20. def deserialize(cls, data, memo, callbacks, debug=False):
  21. inst = cls.__new__(cls)
  22. inst._parse_table = IntParseTable.deserialize(data, memo)
  23. inst.parser = _Parser(inst._parse_table, callbacks, debug)
  24. return inst
  25. def serialize(self, memo):
  26. return self._parse_table.serialize(memo)
  27. def parse(self, *args):
  28. return self.parser.parse(*args)
  29. class ParserState:
  30. __slots__ = 'parse_table', 'lexer', 'callbacks', 'start', 'state_stack', 'value_stack', 'start_state', 'end_state', 'states'
  31. def __init__(self, parse_table, lexer, callbacks, start, state_stack=None, value_stack=None):
  32. self.parse_table = parse_table
  33. self.start_state = self.parse_table.start_states[start]
  34. self.end_state = self.parse_table.end_states[start]
  35. self.states = self.parse_table.states
  36. self.lexer = lexer
  37. self.callbacks = callbacks
  38. self.start = start
  39. self.state_stack = state_stack or [self.start_state]
  40. self.value_stack = value_stack or []
  41. @property
  42. def position(self):
  43. return self.state_stack[-1]
  44. def __copy__(self):
  45. return type(self)(
  46. self.parse_table,
  47. self.lexer, # XXX copy
  48. self.callbacks,
  49. self.start,
  50. copy(self.state_stack),
  51. deepcopy(self.value_stack),
  52. )
  53. def copy(self):
  54. return copy(self)
  55. def feed_token(self, token, is_end=False):
  56. state_stack = self.state_stack
  57. value_stack = self.value_stack
  58. states = self.states
  59. while True:
  60. state = state_stack[-1]
  61. try:
  62. action, arg = states[state][token.type]
  63. except KeyError:
  64. expected = {s for s in states[state].keys() if s.isupper()}
  65. raise UnexpectedToken(token, expected, state=state, puppet=None)
  66. assert arg != self.end_state
  67. if action is Shift:
  68. # shift once and return
  69. assert not is_end
  70. state_stack.append(arg)
  71. value_stack.append(token)
  72. return arg
  73. else:
  74. # reduce+shift as many times as necessary
  75. rule = arg
  76. size = len(rule.expansion)
  77. if size:
  78. s = value_stack[-size:]
  79. del state_stack[-size:]
  80. del value_stack[-size:]
  81. else:
  82. s = []
  83. value = self.callbacks[rule](s)
  84. _action, new_state = states[state_stack[-1]][rule.origin.name]
  85. assert _action is Shift
  86. state_stack.append(new_state)
  87. value_stack.append(value)
  88. if is_end and state_stack[-1] == self.end_state:
  89. return value_stack[-1]
  90. class _Parser:
  91. def __init__(self, parse_table, callbacks, debug=False):
  92. self.parse_table = parse_table
  93. self.callbacks = callbacks
  94. self.debug = debug
  95. def parse(self, lexer, start, value_stack=None, state_stack=None):
  96. parser_state = ParserState(self.parse_table, lexer, self.callbacks, start, state_stack, value_stack)
  97. return self.parse_from_state(parser_state)
  98. def parse_from_state(self, state):
  99. # Main LALR-parser loop
  100. try:
  101. token = None
  102. for token in state.lexer.lex(state):
  103. state.feed_token(token)
  104. token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
  105. return state.feed_token(token, True)
  106. except UnexpectedInput as e:
  107. try:
  108. e.puppet = ParserPuppet(self, state, state.lexer)
  109. except NameError:
  110. pass
  111. raise e
  112. except Exception as e:
  113. if self.debug:
  114. print("")
  115. print("STATE STACK DUMP")
  116. print("----------------")
  117. for i, s in enumerate(state.state_stack):
  118. print('%d)' % i , s)
  119. print("")
  120. raise
  121. ###}