From 5f5acca40bb17663110c9a75484ffc6106fdbbdb Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sun, 24 Dec 2017 20:52:10 +0200 Subject: [PATCH] Cleaned up the LALR(1) parser --- lark/common.py | 2 +- lark/parsers/grammar_analysis.py | 2 +- lark/parsers/lalr_analysis.py | 9 ++++++++- lark/parsers/lalr_parser.py | 32 ++++++++++++++------------------ 4 files changed, 24 insertions(+), 21 deletions(-) diff --git a/lark/common.py b/lark/common.py index d7a5e71..55e9d28 100644 --- a/lark/common.py +++ b/lark/common.py @@ -33,7 +33,7 @@ class UnexpectedToken(ParseError): def is_terminal(sym): - return isinstance(sym, Terminal) or sym.isupper() or sym[0] == '$' + return isinstance(sym, Terminal) or sym.isupper() or sym == '$end' class LexerConf: diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py index f1b276f..9250c47 100644 --- a/lark/parsers/grammar_analysis.py +++ b/lark/parsers/grammar_analysis.py @@ -125,7 +125,7 @@ class GrammarAnalyzer(object): if not (is_terminal(sym) or sym in self.rules_by_origin): raise GrammarError("Using an undefined rule: %s" % sym) - self.init_state = self.expand_rule(start_symbol) + self.init_state = self.expand_rule('$root') self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules) diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index caa41c9..e763b08 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -19,6 +19,7 @@ ACTION_SHIFT = 0 class LALR_Analyzer(GrammarAnalyzer): def compute_lookahead(self): + self.end_states = [] self.states = {} def step(state): @@ -36,7 +37,10 @@ class LALR_Analyzer(GrammarAnalyzer): if not rp.is_satisfied and not is_terminal(rp.next): rps |= self.expand_rule(rp.next) - lookahead[sym].append(('shift', fzset(rps))) + new_state = fzset(rps) + lookahead[sym].append(('shift', new_state)) + if sym == '$end': + self.end_states.append( new_state ) yield fzset(rps) for k, v in lookahead.items(): @@ -58,6 +62,8 @@ class LALR_Analyzer(GrammarAnalyzer): for _ in bfs([self.init_state], step): pass + self.end_state ,= self.end_states + # -- self.enum = list(self.states) self.enum_rev = {s:i for i,s in enumerate(self.enum)} @@ -71,3 +77,4 @@ class LALR_Analyzer(GrammarAnalyzer): self.init_state_idx = self.enum_rev[self.init_state] + self.end_state_idx = self.enum_rev[self.end_state] diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 5b6f336..f224bec 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -19,14 +19,14 @@ class Parser: callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) for rule in analysis.rules} - self.parser = _Parser(analysis.states_idx, analysis.init_state_idx, analysis.start_symbol, callbacks) + self.parser = _Parser(analysis.states_idx, analysis.init_state_idx, analysis.end_state_idx, callbacks) self.parse = self.parser.parse class _Parser: - def __init__(self, states, init_state, start_symbol, callbacks): + def __init__(self, states, init_state, end_state, callbacks): self.states = states self.init_state = init_state - self.start_symbol = start_symbol + self.end_state = end_state self.callbacks = callbacks def parse(self, seq, set_state=None): @@ -49,7 +49,7 @@ class _Parser: raise UnexpectedToken(token, expected, seq, i) - def reduce(rule, size, end=False): + def reduce(rule, size): if size: s = value_stack[-size:] del state_stack[-size:] @@ -57,15 +57,12 @@ class _Parser: else: s = [] - res = self.callbacks[rule](s) - - if end and len(state_stack) == 1 and rule.origin == self.start_symbol: - return FinalReduce(res) + value = self.callbacks[rule](s) _action, new_state = get_action(rule.origin) assert _action == ACTION_SHIFT state_stack.append(new_state) - value_stack.append(res) + value_stack.append(value) # Main LALR-parser loop try: @@ -73,6 +70,7 @@ class _Parser: i += 1 while True: action, arg = get_action(token.type) + assert arg != self.end_state if action == ACTION_SHIFT: state_stack.append(arg) @@ -86,12 +84,10 @@ class _Parser: pass while True: - _action, rule = get_action('$end') - assert _action == 'reduce' - res = reduce(*rule, end=True) - if isinstance(res, FinalReduce): - assert state_stack == [self.init_state] and not value_stack, len(state_stack) - return res.value - - - + _action, arg = get_action('$end') + if _action == ACTION_SHIFT: + assert arg == self.end_state + val ,= value_stack + return val + else: + reduce(*arg)