From 0ee80e675a74720a65bd5f637328a73d48e38503 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sat, 6 Jan 2018 18:49:24 +0200 Subject: [PATCH] Refactoring for LALR, added the ParseTable class --- lark/parser_frontends.py | 2 +- lark/parsers/grammar_analysis.py | 2 +- lark/parsers/lalr_analysis.py | 61 +++++++++++++++++++++++--------- lark/parsers/lalr_parser.py | 29 +++++++-------- 4 files changed, 61 insertions(+), 33 deletions(-) diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 718a0f9..ad5017b 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -38,7 +38,7 @@ class LALR_ContextualLexer: self.parser = lalr_parser.Parser(parser_conf) - d = {idx:t.keys() for idx, t in self.parser.analysis.states_idx.items()} + d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()} always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept) diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py index 9250c47..391e3dd 100644 --- a/lark/parsers/grammar_analysis.py +++ b/lark/parsers/grammar_analysis.py @@ -125,7 +125,7 @@ class GrammarAnalyzer(object): if not (is_terminal(sym) or sym in self.rules_by_origin): raise GrammarError("Using an undefined rule: %s" % sym) - self.init_state = self.expand_rule('$root') + self.start_state = self.expand_rule('$root') self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules) diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index e763b08..3f2d30f 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -14,7 +14,41 @@ from ..common import GrammarError, is_terminal from .grammar_analysis import GrammarAnalyzer -ACTION_SHIFT = 0 +class Action: + def __str__(self): + return self.__name__ + def __repr__(self): + return str(self) + +class Shift(Action): pass +class Reduce(Action): pass + +class ParseTable: + def __init__(self, states, start_state, end_state): + self.states = states + self.start_state = start_state + self.end_state = end_state + +class IntParseTable(ParseTable): + + @classmethod + def from_ParseTable(cls, parse_table): + enum = list(parse_table.states) + state_to_idx = {s:i for i,s in enumerate(enum)} + int_states = {} + + for s, la in parse_table.states.items(): + la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v + for k,v in la.items()} + int_states[ state_to_idx[s] ] = la + + + start_state = state_to_idx[parse_table.start_state] + end_state = state_to_idx[parse_table.end_state] + return cls(int_states, start_state, end_state) + + + class LALR_Analyzer(GrammarAnalyzer): @@ -27,7 +61,7 @@ class LALR_Analyzer(GrammarAnalyzer): sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied) for rp in sat: for term in self.FOLLOW.get(rp.rule.origin, ()): - lookahead[term].append(('reduce', rp.rule)) + lookahead[term].append((Reduce, rp.rule)) d = classify(unsat, lambda rp: rp.next) for sym, rps in d.items(): @@ -38,7 +72,7 @@ class LALR_Analyzer(GrammarAnalyzer): rps |= self.expand_rule(rp.next) new_state = fzset(rps) - lookahead[sym].append(('shift', new_state)) + lookahead[sym].append((Shift, new_state)) if sym == '$end': self.end_states.append( new_state ) yield fzset(rps) @@ -50,7 +84,7 @@ class LALR_Analyzer(GrammarAnalyzer): for x in v: # XXX resolving shift/reduce into shift, like PLY # Give a proper warning - if x[0] == 'shift': + if x[0] is Shift: lookahead[k] = [x] for k, v in lookahead.items(): @@ -59,22 +93,15 @@ class LALR_Analyzer(GrammarAnalyzer): self.states[state] = {k:v[0] for k, v in lookahead.items()} - for _ in bfs([self.init_state], step): + for _ in bfs([self.start_state], step): pass self.end_state ,= self.end_states - # -- - self.enum = list(self.states) - self.enum_rev = {s:i for i,s in enumerate(self.enum)} - self.states_idx = {} - - for s, la in self.states.items(): - la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift' - else (v[0], (v[1], len(v[1].expansion))) # Reduce - for k,v in la.items()} - self.states_idx[ self.enum_rev[s] ] = la + self._parse_table = ParseTable(self.states, self.start_state, self.end_state) + if self.debug: + self.parse_table = self._parse_table + else: + self.parse_table = IntParseTable.from_ParseTable(self._parse_table) - self.init_state_idx = self.enum_rev[self.init_state] - self.end_state_idx = self.enum_rev[self.end_state] diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index f224bec..c913661 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -5,7 +5,7 @@ from ..common import ParseError, UnexpectedToken -from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT +from .lalr_analysis import LALR_Analyzer, Shift class FinalReduce: def __init__(self, value): @@ -19,14 +19,14 @@ class Parser: callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) for rule in analysis.rules} - self.parser = _Parser(analysis.states_idx, analysis.init_state_idx, analysis.end_state_idx, callbacks) + self.parser = _Parser(analysis.parse_table, callbacks) self.parse = self.parser.parse class _Parser: - def __init__(self, states, init_state, end_state, callbacks): - self.states = states - self.init_state = init_state - self.end_state = end_state + def __init__(self, parse_table, callbacks): + self.states = parse_table.states + self.start_state = parse_table.start_state + self.end_state = parse_table.end_state self.callbacks = callbacks def parse(self, seq, set_state=None): @@ -35,10 +35,10 @@ class _Parser: stream = iter(seq) states = self.states - state_stack = [self.init_state] + state_stack = [self.start_state] value_stack = [] - if set_state: set_state(self.init_state) + if set_state: set_state(self.start_state) def get_action(key): state = state_stack[-1] @@ -49,7 +49,8 @@ class _Parser: raise UnexpectedToken(token, expected, seq, i) - def reduce(rule, size): + def reduce(rule): + size = len(rule.expansion) if size: s = value_stack[-size:] del state_stack[-size:] @@ -60,7 +61,7 @@ class _Parser: value = self.callbacks[rule](s) _action, new_state = get_action(rule.origin) - assert _action == ACTION_SHIFT + assert _action is Shift state_stack.append(new_state) value_stack.append(value) @@ -72,22 +73,22 @@ class _Parser: action, arg = get_action(token.type) assert arg != self.end_state - if action == ACTION_SHIFT: + if action is Shift: state_stack.append(arg) value_stack.append(token) if set_state: set_state(arg) token = next(stream) i += 1 else: - reduce(*arg) + reduce(arg) except StopIteration: pass while True: _action, arg = get_action('$end') - if _action == ACTION_SHIFT: + if _action is Shift: assert arg == self.end_state val ,= value_stack return val else: - reduce(*arg) + reduce(arg)