This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

151 lines
4.9 KiB

  1. """This module implements a LALR(1) Parser
  2. """
  3. # Author: Erez Shinan (2017)
  4. # Email : erezshin@gmail.com
  5. from copy import deepcopy
  6. from ..exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
  7. from ..lexer import Token
  8. from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
  9. from .lalr_puppet import ParserPuppet
  10. ###{standalone
  11. class LALR_Parser(object):
  12. def __init__(self, parser_conf, debug=False):
  13. analysis = LALR_Analyzer(parser_conf, debug=debug)
  14. analysis.compute_lalr()
  15. callbacks = parser_conf.callbacks
  16. self._parse_table = analysis.parse_table
  17. self.parser_conf = parser_conf
  18. self.parser = _Parser(analysis.parse_table, callbacks, debug)
  19. @classmethod
  20. def deserialize(cls, data, memo, callbacks, debug=False):
  21. inst = cls.__new__(cls)
  22. inst._parse_table = IntParseTable.deserialize(data, memo)
  23. inst.parser = _Parser(inst._parse_table, callbacks, debug)
  24. return inst
  25. def serialize(self, memo):
  26. return self._parse_table.serialize(memo)
  27. def parse(self, *args):
  28. return self.parser.parse(*args)
  29. class ParserState:
  30. __slots__ = 'parse_table', 'lexer', 'callbacks', 'start', 'state_stack', 'value_stack', 'start_state', 'end_state', 'states'
  31. def __init__(self, parse_table, lexer, callbacks, start, state_stack=None, value_stack=None):
  32. self.parse_table = parse_table
  33. self.start_state = self.parse_table.start_states[start]
  34. self.end_state = self.parse_table.end_states[start]
  35. self.states = self.parse_table.states
  36. self.lexer = lexer
  37. self.callbacks = callbacks
  38. self.start = start
  39. self.state_stack = state_stack or [self.start_state]
  40. self.value_stack = value_stack or []
  41. @property
  42. def position(self):
  43. return self.state_stack[-1]
  44. def __copy__(self):
  45. return type(self)(
  46. self.parse_table,
  47. self.lexer, # XXX copy
  48. self.callbacks,
  49. self.start,
  50. list(self.state_stack),
  51. deepcopy(self.value_stack),
  52. )
  53. def feed_token(self, token, is_end=False):
  54. state_stack = self.state_stack
  55. value_stack = self.value_stack
  56. states = self.states
  57. while True:
  58. state = state_stack[-1]
  59. try:
  60. action, arg = states[state][token.type]
  61. except KeyError:
  62. expected = {s for s in states[state].keys() if s.isupper()}
  63. raise UnexpectedToken(token, expected, state=state, puppet=None)
  64. assert arg != self.end_state
  65. if action is Shift:
  66. # shift once and return
  67. assert not is_end
  68. state_stack.append(arg)
  69. value_stack.append(token)
  70. return arg
  71. else:
  72. # reduce+shift as many times as necessary
  73. rule = arg
  74. size = len(rule.expansion)
  75. if size:
  76. s = value_stack[-size:]
  77. del state_stack[-size:]
  78. del value_stack[-size:]
  79. else:
  80. s = []
  81. value = self.callbacks[rule](s)
  82. _action, new_state = states[state_stack[-1]][rule.origin.name]
  83. assert _action is Shift
  84. state_stack.append(new_state)
  85. value_stack.append(value)
  86. if is_end and state_stack[-1] == self.end_state:
  87. return value_stack[-1]
  88. class _Parser:
  89. def __init__(self, parse_table, callbacks, debug=False):
  90. self.parse_table = parse_table
  91. self.callbacks = callbacks
  92. self.debug = debug
  93. def parse(self, lexer, start, value_stack=None, state_stack=None):
  94. parser_state = ParserState(self.parse_table, lexer, self.callbacks, start, state_stack, value_stack)
  95. return self.parse_from_state(parser_state)
  96. def parse_from_state(self, state):
  97. # Main LALR-parser loop
  98. try:
  99. token = None
  100. for token in state.lexer.lex(state):
  101. state.feed_token(token)
  102. token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
  103. return state.feed_token(token, True)
  104. except UnexpectedInput as e:
  105. try:
  106. e.puppet = ParserPuppet(self, state, state.lexer)
  107. except NameError:
  108. pass
  109. if isinstance(e, UnexpectedCharacters):
  110. s = state.lexer.state
  111. p = s.line_ctr.char_pos
  112. s.line_ctr.feed(s.text[p:p+1])
  113. raise e
  114. except Exception as e:
  115. if self.debug:
  116. print("")
  117. print("STATE STACK DUMP")
  118. print("----------------")
  119. for i, s in enumerate(state.state_stack):
  120. print('%d)' % i , s)
  121. print("")
  122. raise
  123. ###}