From 0c59cba3f5329381fc75a1a37a8426c15165b230 Mon Sep 17 00:00:00 2001
From: Raekye <Raekye@users.noreply.github.com>
Date: Fri, 9 Aug 2019 03:26:27 -0400
Subject: [PATCH] implement DeRemer and Pennello's lookahead algorithm for
 LALR(1)

---
 lark/grammar.py                  |   4 +-
 lark/parsers/grammar_analysis.py | 110 +-------
 lark/parsers/lalr_analysis.py    | 432 +++++++++++--------------------
 lark/parsers/lalr_parser.py      |  27 +-
 4 files changed, 169 insertions(+), 404 deletions(-)

diff --git a/lark/grammar.py b/lark/grammar.py
index f90cce4..3480651 100644
--- a/lark/grammar.py
+++ b/lark/grammar.py
@@ -28,7 +28,7 @@ class Symbol(Serialize):
 
 
 class Terminal(Symbol):
-    __serialize_fields__ = 'name', 'filter_out'
+    __serialize_fields__ = 'name', 'filter_out', '_hash'
 
     is_term = True
 
@@ -44,7 +44,7 @@ class Terminal(Symbol):
 
 
 class NonTerminal(Symbol):
-    __serialize_fields__ = 'name',
+    __serialize_fields__ = 'name', '_hash'
 
     is_term = False
 
diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py
index 71a7bc5..b32f62f 100644
--- a/lark/parsers/grammar_analysis.py
+++ b/lark/parsers/grammar_analysis.py
@@ -5,37 +5,18 @@ from ..exceptions import GrammarError
 from ..grammar import Rule, Terminal, NonTerminal
 import time
 
-t_firsts = 0
-t_xy = 0
-t_call = 0
-cache_hits = 0
-cache_misses = 0
-
-# used to be just a tuple (rp, la)
-# but by making it an object,
-# the hash and equality become trivial
-# (slightly faster for sets which are hashtables?)
-class RulePtrLookahead(object):
-    __slots__ = 'rp', 'la'
-
-    def __init__(self, rp, la):
-        self.rp = rp
-        self.la = la
 
+# optimizations were made so that there should never be two distinct equal RulePtrs
+# to help with hashtable lookup
 class RulePtr(object):
-    __slots__ = ('rule', 'index', '_advance', '_lookaheads', '_next_rules_by_origin', '_first')
+    __slots__ = ('rule', 'index', '_advance')
 
     def __init__(self, rule, index):
         assert isinstance(rule, Rule)
         assert index <= len(rule.expansion)
         self.rule = rule
         self.index = index
-        #self._hash = hash((self.rule, self.index))
-        #self._hash = None
         self._advance = None
-        self._lookaheads = {}
-        self._next_rules_by_origin = None
-        self._first = None
 
     def __repr__(self):
         before = [x.name for x in self.rule.expansion[:self.index]]
@@ -59,89 +40,16 @@ class RulePtr(object):
     def is_satisfied(self):
         return self.index == len(self.rule.expansion)
 
-    def lookahead(self, la):
-        rp_la = self._lookaheads.get(la, None)
-        if rp_la is None:
-            rp_la = RulePtrLookahead(self, la)
-            self._lookaheads[la] = rp_la
-        return rp_la
-
-    def next_rules_by_origin(self, rules_by_origin):
-        n = self._next_rules_by_origin
-        if n is None:
-            n = rules_by_origin[self.next]
-            self._next_rules_by_origin = n
-        return n
-
-    # recursive form of lalr_analyis.py:343 (which is easier to understand IMO)
-    # normally avoid recursion but this allows us to cache
-    # each intermediate step in a corresponding RulePtr
-    def first(self, i, firsts, nullable, t):
-        global cache_hits
-        global cache_misses
-        global t_firsts
-        global t_xy
-        global t_call
-        t_call += time.time() - t
-        n = len(self.rule.expansion)
-        if i == n:
-            return ([], True)
-        x = self._first
-        t_x = time.time()
-        if x is None:
-            t0 = time.time()
-            t_y = time.time()
-            cache_misses += 1
-            s = self.rule.expansion[i]
-            l = list(firsts.get(s, []))
-            b = (s in nullable)
-            if b:
-                t1 = time.time()
-                t_firsts += t1 - t0
-                l_b_2 = self.advance(s).first(i + 1, firsts, nullable, time.time())
-                #l_b_2 = first(self.advance(self.next), i + 1, firsts, nullable, time.time())
-                t0 = time.time()
-                l.extend(l_b_2[0])
-                b = l_b_2[1]
-            x = (l, b)
-            self._first = x
-            t1 = time.time()
-            t_firsts += t1 - t0
-        else:
-            t_y = time.time()
-            cache_hits += 1
-        t_xy += t_y - t_x
-        return x
-
-    # optimizations were made so that there should never be
-    # two distinct equal RulePtrs
-    # should help set/hashtable lookups?
-    '''
-    def __eq__(self, other):
-        return self.rule == other.rule and self.index == other.index
-    def __hash__(self):
-        return self._hash
-    '''
-
 
+# state generation ensures no duplicate LR0ItemSets
 class LR0ItemSet(object):
-    __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads', '_hash')
+    __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
 
     def __init__(self, kernel, closure):
         self.kernel = fzset(kernel)
         self.closure = fzset(closure)
         self.transitions = {}
         self.lookaheads = defaultdict(set)
-        #self._hash = hash(self.kernel)
-
-    # state generation ensures no duplicate LR0ItemSets
-    '''
-    def __eq__(self, other):
-        return self.kernel == other.kernel
-
-    def __hash__(self):
-        return self._hash
-    '''
 
     def __repr__(self):
         return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
@@ -258,9 +166,11 @@ class GrammarAnalyzer(object):
 
         self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
 
-        # unused, did not help
-        self.lr1_cache = {}
-        self.lr1_cache2 = {}
+        self.nonterminal_transitions = []
+        self.directly_reads = defaultdict(set)
+        self.reads = defaultdict(set)
+        self.includes = defaultdict(set)
+        self.lookback = defaultdict(set)
 
     def expand_rule(self, source_rule, rules_by_origin=None):
         "Returns all init_ptrs accessible by rule (recursive)"
diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
index eb87e7a..4104713 100644
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -12,9 +12,8 @@ from collections import defaultdict, deque
 from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
 from ..exceptions import GrammarError
 
-from .grammar_analysis import GrammarAnalyzer, Terminal, RulePtr, LR0ItemSet
+from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
 from ..grammar import Rule
-from . import grammar_analysis
 
 import time
 
@@ -31,15 +30,6 @@ class Action:
 Shift = Action('Shift')
 Reduce = Action('Reduce')
 
-t_set_0 = 0
-t_set_1 = 0
-t_expand = 0
-t_rules = 0
-t_append = 0
-t_z = 0
-t_begin = 0
-t_count = 0
-t_call = 0
 
 class ParseTable:
     def __init__(self, states, start_states, end_states):
@@ -95,9 +85,60 @@ class IntParseTable(ParseTable):
 
 ###}
 
+
+# digraph and traverse, see The Theory and Practice of Compiler Writing
+
+# computes F(x) = G(x) union (union { G(y) | x R y })
+# X: nodes
+# R: relation (function mapping node -> list of nodes that satisfy the relation)
+# G: set valued function
+def digraph(X, R, G):
+    F = {}
+    S = []
+    N = {}
+    for x in X:
+        N[x] = 0
+    for x in X:
+        # this is always true for the first iteration, but N[x] may be updated in traverse below
+        if N[x] == 0:
+            traverse(x, S, N, X, R, G, F)
+    return F
+
+# x: single node
+# S: stack
+# N: weights
+# X: nodes
+# R: relation (see above)
+# G: set valued function
+# F: set valued function we are computing (map of input -> output)
+def traverse(x, S, N, X, R, G, F):
+    S.append(x)
+    d = len(S)
+    N[x] = d
+    F[x] = G(x)
+    for y in R(x):
+        if N[y] == 0:
+            traverse(y, S, N, X, R, G, F)
+        n_x = N[x]
+        assert(n_x > 0)
+        n_y = N[y]
+        assert(n_y != 0)
+        if (n_y > 0) and (n_y < n_x):
+            N[x] = n_y
+        F[x].update(F[y])
+    if N[x] == d:
+        f_x = F[x]
+        while True:
+            z = S.pop()
+            N[z] = -1
+            F[z] = f_x
+            if z == x:
+                break
+
+
 class LALR_Analyzer(GrammarAnalyzer):
 
-    def generate_lr0_states(self):
+    def compute_lr0_states(self):
         self.states = set()
         # map of kernels to LR0ItemSets
         cache = {}
@@ -125,297 +166,118 @@ class LALR_Analyzer(GrammarAnalyzer):
         for _ in bfs(self.lr0_start_states.values(), step):
             pass
 
-    def discover_lookaheads(self):
-        # lookaheads is now a member of LR0ItemSet, so don't need to look up a dictionary here
-        # state -> rule -> set of lookaheads
-        #self.lookaheads = defaultdict(lambda: defaultdict(set))
-        # state -> rule -> list of (set of lookaheads) to propagate to
-        #self.propagates = defaultdict(lambda: defaultdict(list))
-        self.propagates = {}
-
-        t0 = time.time()
-
-        t = Terminal('$END')
-        for s in self.lr0_start_states.values():
-            for rp in s.kernel:
-                #self.lookaheads[s][rp].add(Terminal('$END'))
-                s.lookaheads[rp].add(t)
-
-        t_closure = 0
-
-        # There is a 1 to 1 correspondance between LR0 and LALR1 states.
-        # We calculate the lookaheads for LALR1 kernel items from the LR0 kernel items.
-        # use a terminal that does not exist in the grammar
-        t = Terminal('$#')
-        for s in self.states:
-            p = {}
-            self.propagates[s] = p
-            for rp in s.kernel:
-                q = []
-                p[rp] = q
-                t2 = time.time()
-                z = self.generate_lr1_closure([rp.lookahead(t)], time.time())
-                t3 = time.time()
-                t_closure += t3 - t2
-                #for rp2, la in self.generate_lr1_closure([(rp, t)], time.time()):
-                for rp2_la in z:
-                    rp2 = rp2_la.rp
-                    la = rp2_la.la
+    def compute_reads_relations(self):
+        # handle start state
+        for root in self.lr0_start_states.values():
+            assert(len(root.kernel) == 1)
+            for rp in root.kernel:
+                assert(rp.index == 0)
+                self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])
+
+        for state in self.states:
+            seen = set()
+            for rp in state.closure:
+                if rp.is_satisfied:
+                    continue
+                s = rp.next
+                # if s is a not a nonterminal
+                if not s in self.lr0_rules_by_origin:
+                    continue
+                if s in seen:
+                    continue
+                seen.add(s)
+                nt = (state, s)
+                self.nonterminal_transitions.append(nt)
+                dr = self.directly_reads[nt]
+                r = self.reads[nt]
+                next_state = state.transitions[s]
+                for rp2 in next_state.closure:
                     if rp2.is_satisfied:
                         continue
-                    next_symbol = rp2.next
-                    next_state = s.transitions[next_symbol]
-                    rp3 = rp2.advance(next_symbol)
-                    assert(rp3 in next_state.kernel)
-                    #x = self.lookaheads[next_state][rp3]
-                    x = next_state.lookaheads[rp3]
-                    if la == t:
-                        # we must propagate rp's lookaheads to rp3's lookahead set
-                        q.append(x)
+                    s2 = rp2.next
+                    # if s2 is a terminal
+                    if not s2 in self.lr0_rules_by_origin:
+                        dr.add(s2)
+                    if s2 in self.NULLABLE:
+                        r.add((next_state, s2))
+
+    def compute_read_sets(self):
+        R = lambda nt: self.reads[nt]
+        G = lambda nt: self.directly_reads[nt]
+        self.read_sets = digraph(self.nonterminal_transitions, R, G)
+
+    def compute_includes_lookback(self):
+        for nt in self.nonterminal_transitions:
+            state, nonterminal = nt
+            includes = []
+            lookback = self.lookback[nt]
+            for rp in state.closure:
+                if rp.rule.origin != nonterminal:
+                    continue
+                # traverse the states for rp(.rule)
+                state2 = state
+                for i in range(rp.index, len(rp.rule.expansion)):
+                    s = rp.rule.expansion[i]
+                    nt2 = (state2, s)
+                    state2 = state2.transitions[s]
+                    if not nt2 in self.reads:
+                        continue
+                    j = i + 1
+                    for j in range(i + 1, len(rp.rule.expansion)):
+                        if not rp.rule.expansion[j] in self.NULLABLE:
+                            break
                     else:
-                        # this lookahead is "generated spontaneously" for rp3
-                        x.add(la)
-
-        t1 = time.time()
-        print('Discovering took {:.3f} (generating closure), {:.3f} (total)'.format(t_closure, t1 - t0))
-
-    def propagate_lookaheads(self):
-        changed = True
-        while changed:
-            changed = False
-            for s in self.states:
-                for rp in s.kernel:
-                    # from (from is a keyword)
-                    #f = self.lookaheads[s][rp]
-                    f = s.lookaheads[rp]
-                    # to
-                    t = self.propagates[s][rp]
-                    for x in t:
-                        old = len(x)
-                        x |= f
-                        changed = changed or (len(x) != old)
-
-    def generate_lalr1_states(self):
-        t0 = time.time()
-        # 1 to 1 correspondance between LR0 and LALR1 states
-        # We must fetch the lookaheads we calculated,
-        # to create the LALR1 kernels from the LR0 kernels.
-        # Then, we generate the LALR1 states by taking the LR1 closure of the new kernel items.
-        # map of LR0 states to LALR1 states
+                        includes.append(nt2)
+                # state2 is at the final state for rp.rule
+                if rp.index == 0:
+                    for rp2 in state2.closure:
+                        if (rp2.rule == rp.rule) and rp2.is_satisfied:
+                            lookback.add((state2, rp2.rule))
+            for nt2 in includes:
+                self.includes[nt2].add(nt)
+
+    def compute_follow_sets(self):
+        R = lambda nt: self.includes[nt]
+        G = lambda nt: self.read_sets[nt]
+        self.follow_sets = digraph(self.nonterminal_transitions, R, G)
+
+    def compute_lookaheads(self):
+        for nt, lookbacks in self.lookback.items():
+            for state, rule in lookbacks:
+                for s in self.follow_sets[nt]:
+                    state.lookaheads[s].add(rule)
+
+    def compute_lalr1_states(self):
         m = {}
-        t_closure = 0
-        z = 0
-        for s in self.states:
-            z = max(z, len(s.closure))
-            kernel = []
-            for rp in s.kernel:
-                #las = self.lookaheads[s][rp]
-                las = s.lookaheads[rp]
-                assert(len(las) > 0)
-                for la in las:
-                    kernel.append(rp.lookahead(la))
-            t0_0 = time.time()
-            m[s] = self.generate_lr1_closure(kernel, time.time())
-            t0_1 = time.time()
-            t_closure += t0_1 - t0_0
-
-        print('Generating lalr1 closure for lalr kernels took {:.3f}'.format(t_closure))
-        print('Max lr0 state size was {}'.format(z))
-
-        t1 = time.time()
-
-        self.states = {}
-        for s, v in m.items():
+        for state in self.states:
             actions = {}
-            for la, next_state in s.transitions.items():
+            for la, next_state in state.transitions.items():
                 actions[la] = (Shift, next_state.closure)
-
-            sat, _ = classify_bool(v, lambda x: x.rp.is_satisfied)
-            reductions = classify(sat, lambda x: x.la, lambda x: x.rp)
-            for la, rps in reductions.items():
-                if len(rps) > 1:
-                    raise GrammarError("Collision in %s: %s" % (la, ', '.join([ str(r.rule) for r in rps ])))
+            for la, rules in state.lookaheads.items():
+                if len(rules) > 1:
+                    raise GrammarError('Collision in %s: %s' % (la, ', '.join([ str(r) for r in rules ])))
                 if la in actions:
                     if self.debug:
-                        logging.warning("Shift/reduce conflict for terminal %s:  (resolving as shift)", la.name)
-                        logging.warning(' * %s', str(rps[0]))
+                        logging.warning('Shift/reduce conflict for terminal %s: (resolving as shift)', la.name)
+                        logging.warning(' * %s', list(rules)[0])
                 else:
-                    actions[la] = (Reduce, rps[0].rule)
+                    actions[la] = (Reduce, list(rules)[0])
+            m[state] = { k.name: v for k, v in actions.items() }
 
-            self.states[s.closure] = {k.name: v for k, v in actions.items()}
-
-        t2 = time.time()
+        self.states = { k.closure: v for k, v in m.items() }
 
+        # compute end states
         end_states = {}
-        for s in self.states:
-            for rp in s:
+        for state in self.states:
+            for rp in state:
                 for start in self.lr0_start_states:
                     if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied:
                         assert(not start in end_states)
-                        end_states[start] = s
-
-        t3 = time.time()
+                        end_states[start] = state
 
-        self._parse_table = ParseTable(self.states, {start: state.closure for start, state in self.lr0_start_states.items()}, end_states)
-
-        t4 = time.time()
+        self._parse_table = ParseTable(self.states, { start: state.closure for start, state in self.lr0_start_states.items() }, end_states)
 
         if self.debug:
             self.parse_table = self._parse_table
         else:
             self.parse_table = IntParseTable.from_ParseTable(self._parse_table)
-
-        t5 = time.time()
-
-        print(('Generating lalr1 states took ' + ', '.join([ '{:.3f}' ] * 5)).format(t1 - t0, t2 - t1, t3 - t2, t4 - t3, t5 - t4))
-        print('Generating firsts took {:.3f} (time actually calculating), {:.3f} (end to end), {:.3f} (just function call)'.format(grammar_analysis.t_firsts, grammar_analysis.t_xy, grammar_analysis.t_call))
-
-    def generate_lr1_closure(self, kernel, t_caller):
-        global t_call
-        global t_set_0
-        global t_set_1
-        global t_expand
-        global t_rules
-        global t_append
-        global t_z
-        global t_begin
-        global t_count
-
-        t_start = time.time()
-        t_call += t_start - t_caller
-
-        # cache the results of this function
-        # not many hits, no noticeable performance improvement
-        '''
-        k = fzset(kernel)
-        cached = self.lr1_cache.get(k, None)
-        if not cached is None:
-            return cached
-        '''
-
-        closure = set()
-        closure_hash = {}
-
-        y = 0
-
-        q = list(kernel)
-        while len(q) > 0:
-            t_a = time.time()
-            rp_la = q.pop()
-            #rp_la_hash = hash(rp_la)
-            t0 = time.time()
-            t_begin += t0 - t_a
-            # try to manually maintain hashtable,
-            # as a set of just hashes (ints) was notably faster
-            '''
-            if rp_la_hash in closure_hash:
-                if rp_la in closure_hash[rp_la_hash]:
-                    t0_0 = time.time()
-                    t_set_0 += t0_0 - t0
-                    continue
-                t0_0 = time.time()
-                t_set_0 += t0_0 - t0
-            else:
-                closure_hash[rp_la_hash] = []
-            '''
-            if rp_la in closure:
-                t0_0 = time.time()
-                t_set_0 += t0_0 - t0
-                continue
-            t0_0 = time.time()
-            closure.add(rp_la)
-            #closure_hash[rp_la_hash].append(rp_la)
-            t1 = time.time()
-            t_set_0 += t0_0 - t0
-            t_set_1 += t1 - t0_0
-            rp = rp_la.rp
-            la = rp_la.la
-
-            if rp.is_satisfied:
-                continue
-            if rp.next.is_term:
-                continue
-
-            t2 = time.time()
-
-            # cache these calculations inside each RulePtr
-            # see grammar_analysis.py:79
-            l = []
-            '''
-            i = rp.index + 1
-            n = len(rp.rule.expansion)
-            l2_i = self.lr1_cache2.get((rp.rule, i), None)
-            l2 = []
-            if l2_i is None:
-                while i < n:
-                    s = rp.rule.expansion[i]
-                    l2.extend(self.FIRST.get(s, []))
-                    if not s in self.NULLABLE:
-                        break
-                    i += 1
-                self.lr1_cache2[(rp.rule, i)] = (l2, i)
-            else:
-                l2 = l2_i[0]
-                i = l2_i[1]
-
-            l.extend(l2)
-            '''
-            # this function call seems really slow (see grammar_analysis.t_call above)
-            # tried making it not a method call so don't need to look up vtable
-            # still equally slow
-            l2, nullable = rp.first(rp.index + 1, self.FIRST, self.NULLABLE, time.time())
-            #l2, nullable = grammar_analysis.first(rp, rp.index + 1, self.FIRST, self.NULLABLE, time.time())
-            #l.extend(l2)
-            l = l2
-            t3 = time.time()
-
-            t_expand += t3 - t2
-
-            # if we don't modify l2 and add an extra check in the loop below,
-            # we don't have to copy it
-            # if all of rp.rule.expansion[rp.index + 1:] were nullable:
-            #if nullable:
-            #    l.append(la)
-
-            t4 = time.time()
-            x = rp.next_rules_by_origin(self.lr0_rules_by_origin)
-            t5 = time.time()
-
-            # usually between 20-60? seen as high as ~175
-            y = max(y, len(x) * len(l))
-            #print('adding {} * {} rules to closure max {}'.format(len(x), len(l), y))
-            for r in x:
-                for s in l:
-                    # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
-                    # cache r._rp in _rp (1 less object property lookup?)
-                    _rp = r._rp
-                    if _rp is None:
-                        _rp = RulePtr(r, 0)
-                        r._rp = _rp
-                    q.append(_rp.lookahead(s))
-                    #q.append((r._rp, s))
-                if nullable:
-                    _rp = r._rp
-                    if _rp is None:
-                        _rp = RulePtr(r, 0)
-                        r._rp = _rp
-                    q.append(_rp.lookahead(la))
-                    #q.append((r._rp, la))
-
-            t6 = time.time()
-            t_rules += t5 - t4
-            t_append += t6 - t5
-
-        #self.lr1_cache[k] = closure
-
-        t_end = time.time()
-        t_z += t_end - t_start
-
-        t_count += 1
-
-        if t_count % 1000 == 0:
-            print('\tGenerating lr1 closure took begin {:.3f}, set contains {:.3f}, set add {:.3f}, get first {:.3f}'.format(t_begin, t_set_0, t_set_1, t_expand))
-            print('\tget next rules {:.3f}, append rules {:.3f}, total {:.3f}, call time {:.3f}, count {}'.format(t_rules, t_append, t_z, t_call, t_count))
-            print('\tmax number of appends {}'.format(y))
-
-        return closure
diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py
index b3985ae..657e795 100644
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -17,20 +17,13 @@ class LALR_Parser(object):
         assert all(r.options is None or r.options.priority is None
                    for r in parser_conf.rules), "LALR doesn't yet support prioritization"
         analysis = LALR_Analyzer(parser_conf, debug=debug)
-        t0 = time.time()
-        analysis.generate_lr0_states()
-        t1 = time.time()
-        analysis.discover_lookaheads()
-        t2 = time.time()
-        analysis.propagate_lookaheads()
-        t3 = time.time()
-        analysis.generate_lalr1_states()
-        t4 = time.time()
-        print('Generating lr0 states took {:.3f}'.format(t1 - t0))
-        print('Discovering lookaheads took {:.3f}'.format(t2 - t1))
-        print('Propagating lookaheads took took {:.3f}'.format(t3 - t2))
-        print('Generating lalr states (closure) took {:.3f}'.format(t4 - t3))
-        print('-' * 32)
+        analysis.compute_lr0_states()
+        analysis.compute_reads_relations()
+        analysis.compute_read_sets()
+        analysis.compute_includes_lookback()
+        analysis.compute_follow_sets()
+        analysis.compute_lookaheads()
+        analysis.compute_lalr1_states()
         callbacks = parser_conf.callbacks
 
         self._parse_table = analysis.parse_table
@@ -80,9 +73,6 @@ class _Parser:
                 raise UnexpectedToken(token, expected, state=state)
 
         def reduce(rule):
-            if state_stack[-1] == end_state:
-                return True
-
             size = len(rule.expansion)
             if size:
                 s = value_stack[-size:]
@@ -98,6 +88,9 @@ class _Parser:
             state_stack.append(new_state)
             value_stack.append(value)
 
+            if state_stack[-1] == end_state:
+                return True
+
             return False
 
         # Main LALR-parser loop