"""This module implements a LALR(1) Parser """ # Author: Erez Shinan (2017) # Email : erezshin@gmail.com from ..exceptions import UnexpectedToken from ..lexer import Token from ..grammar import Rule from ..utils import Enumerator from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable class Parser(object): def __init__(self, parser_conf, debug=False): assert all(r.options is None or r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization" analysis = LALR_Analyzer(parser_conf, debug=debug) analysis.compute_lookahead() callbacks = parser_conf.callbacks self._parse_table = analysis.parse_table self.parser_conf = parser_conf self.parser = _Parser(analysis.parse_table, callbacks) self.parse = self.parser.parse def serialize(self): tokens = Enumerator() rules = Enumerator() states = { state: {tokens.get(token): ((1, rules.get(arg)) if action is Reduce else (0, arg)) for token, (action, arg) in actions.items()} for state, actions in self._parse_table.states.items() } return { 'tokens': tokens.reversed(), 'rules': {idx: r.serialize() for idx, r in rules.reversed().items()}, 'states': states, 'start_state': self._parse_table.start_state, 'end_state': self._parse_table.end_state, } @classmethod def deserialize(cls, data, callbacks): tokens = data['tokens'] rules = {idx: Rule.deserialize(r) for idx, r in data['rules'].items()} states = { state: {tokens[token]: ((Reduce, rules[arg]) if action==1 else (Shift, arg)) for token, (action, arg) in actions.items()} for state, actions in data['states'].items() } parse_table = IntParseTable(states, data['start_state'], data['end_state']) inst = cls.__new__(cls) inst.parser = _Parser(parse_table, callbacks) inst.parse = inst.parser.parse return inst ###{standalone class _Parser: def __init__(self, parse_table, callbacks): self.states = parse_table.states self.start_state = parse_table.start_state self.end_state = parse_table.end_state self.callbacks = callbacks def parse(self, seq, set_state=None): token = None stream = iter(seq) states = self.states state_stack = [self.start_state] value_stack = [] if set_state: set_state(self.start_state) def get_action(token): state = state_stack[-1] try: return states[state][token.type] except KeyError: expected = [s for s in states[state].keys() if s.isupper()] raise UnexpectedToken(token, expected, state=state) def reduce(rule): size = len(rule.expansion) if size: s = value_stack[-size:] del state_stack[-size:] del value_stack[-size:] else: s = [] value = self.callbacks[rule](s) _action, new_state = states[state_stack[-1]][rule.origin.name] assert _action is Shift state_stack.append(new_state) value_stack.append(value) # Main LALR-parser loop for token in stream: while True: action, arg = get_action(token) assert arg != self.end_state if action is Shift: state_stack.append(arg) value_stack.append(token) if set_state: set_state(arg) break # next token else: reduce(arg) token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) while True: _action, arg = get_action(token) if _action is Shift: assert arg == self.end_state val ,= value_stack return val else: reduce(arg) ###}