|
|
@@ -12,7 +12,7 @@ from collections import defaultdict |
|
|
|
from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator |
|
|
|
from ..exceptions import GrammarError |
|
|
|
|
|
|
|
from .grammar_analysis import GrammarAnalyzer, Terminal |
|
|
|
from .grammar_analysis import GrammarAnalyzer, Terminal, RulePtr, LR0ItemSet |
|
|
|
from ..grammar import Rule |
|
|
|
|
|
|
|
###{standalone |
|
|
@@ -84,53 +84,158 @@ class IntParseTable(ParseTable): |
|
|
|
|
|
|
|
class LALR_Analyzer(GrammarAnalyzer): |
|
|
|
|
|
|
|
def compute_lookahead(self): |
|
|
|
def generate_lr0_states(self): |
|
|
|
self.states = set() |
|
|
|
|
|
|
|
self.states = {} |
|
|
|
def step(state): |
|
|
|
lookahead = defaultdict(list) |
|
|
|
sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied) |
|
|
|
for rp in sat: |
|
|
|
for term in self.FOLLOW.get(rp.rule.origin, ()): |
|
|
|
lookahead[term].append((Reduce, rp.rule)) |
|
|
|
_, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied) |
|
|
|
|
|
|
|
d = classify(unsat, lambda rp: rp.next) |
|
|
|
for sym, rps in d.items(): |
|
|
|
rps = {rp.advance(sym) for rp in rps} |
|
|
|
kernel = {rp.advance(sym) for rp in rps} |
|
|
|
closure = set(kernel) |
|
|
|
|
|
|
|
for rp in set(rps): |
|
|
|
for rp in kernel: |
|
|
|
if not rp.is_satisfied and not rp.next.is_term: |
|
|
|
rps |= self.expand_rule(rp.next) |
|
|
|
closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin) |
|
|
|
|
|
|
|
new_state = fzset(rps) |
|
|
|
lookahead[sym].append((Shift, new_state)) |
|
|
|
new_state = LR0ItemSet(kernel, closure) |
|
|
|
state.transitions[sym] = new_state |
|
|
|
yield new_state |
|
|
|
|
|
|
|
for k, v in lookahead.items(): |
|
|
|
if len(v) > 1: |
|
|
|
if self.debug: |
|
|
|
logging.warning("Shift/reduce conflict for terminal %s: (resolving as shift)", k.name) |
|
|
|
for act, arg in v: |
|
|
|
logging.warning(' * %s: %s', act, arg) |
|
|
|
for x in v: |
|
|
|
# XXX resolving shift/reduce into shift, like PLY |
|
|
|
# Give a proper warning |
|
|
|
if x[0] is Shift: |
|
|
|
lookahead[k] = [x] |
|
|
|
|
|
|
|
for k, v in lookahead.items(): |
|
|
|
if not len(v) == 1: |
|
|
|
raise GrammarError("Collision in %s: %s" %(k, ', '.join(['\n * %s: %s' % x for x in v]))) |
|
|
|
|
|
|
|
self.states[state] = {k.name:v[0] for k, v in lookahead.items()} |
|
|
|
|
|
|
|
for _ in bfs(self.start_states.values(), step): |
|
|
|
self.states.add(state) |
|
|
|
|
|
|
|
for _ in bfs(self.lr0_start_states.values(), step): |
|
|
|
pass |
|
|
|
|
|
|
|
self._parse_table = ParseTable(self.states, self.start_states, self.end_states) |
|
|
|
def discover_lookaheads(self): |
|
|
|
# state -> rule -> set of lookaheads |
|
|
|
self.lookaheads = defaultdict(lambda: defaultdict(set)) |
|
|
|
# state -> rule -> list of (set of lookaheads) to propagate to |
|
|
|
self.propagates = defaultdict(lambda: defaultdict(list)) |
|
|
|
|
|
|
|
for s in self.lr0_start_states.values(): |
|
|
|
for rp in s.kernel: |
|
|
|
self.lookaheads[s][rp].add(Terminal('$END')) |
|
|
|
|
|
|
|
# There is a 1 to 1 correspondance between LR0 and LALR1 states. |
|
|
|
# We calculate the lookaheads for LALR1 kernel items from the LR0 kernel items. |
|
|
|
# use a terminal that does not exist in the grammar |
|
|
|
t = Terminal('$#') |
|
|
|
for s in self.states: |
|
|
|
for rp in s.kernel: |
|
|
|
for rp2, la in self.generate_lr1_closure([(rp, t)]): |
|
|
|
if rp2.is_satisfied: |
|
|
|
continue |
|
|
|
next_symbol = rp2.next |
|
|
|
next_state = s.transitions[next_symbol] |
|
|
|
rp3 = rp2.advance(next_symbol) |
|
|
|
assert(rp3 in next_state.kernel) |
|
|
|
x = self.lookaheads[next_state][rp3] |
|
|
|
if la == t: |
|
|
|
# we must propagate rp's lookaheads to rp3's lookahead set |
|
|
|
self.propagates[s][rp].append(x) |
|
|
|
else: |
|
|
|
# this lookahead is "generated spontaneously" for rp3 |
|
|
|
x.add(la) |
|
|
|
|
|
|
|
def propagate_lookaheads(self): |
|
|
|
changed = True |
|
|
|
while changed: |
|
|
|
changed = False |
|
|
|
for s in self.states: |
|
|
|
for rp in s.kernel: |
|
|
|
# from (from is a keyword) |
|
|
|
f = self.lookaheads[s][rp] |
|
|
|
# to |
|
|
|
t = self.propagates[s][rp] |
|
|
|
for x in t: |
|
|
|
old = len(x) |
|
|
|
x |= f |
|
|
|
changed = changed or (len(x) != old) |
|
|
|
|
|
|
|
def generate_lalr1_states(self): |
|
|
|
# 1 to 1 correspondance between LR0 and LALR1 states |
|
|
|
# We must fetch the lookaheads we calculated, |
|
|
|
# to create the LALR1 kernels from the LR0 kernels. |
|
|
|
# Then, we generate the LALR1 states by taking the LR1 closure of the new kernel items. |
|
|
|
# map of LR0 states to LALR1 states |
|
|
|
m = {} |
|
|
|
for s in self.states: |
|
|
|
kernel = [] |
|
|
|
for rp in s.kernel: |
|
|
|
las = self.lookaheads[s][rp] |
|
|
|
assert(len(las) > 0) |
|
|
|
for la in las: |
|
|
|
kernel.append((rp, la)) |
|
|
|
m[s] = self.generate_lr1_closure(kernel) |
|
|
|
|
|
|
|
self.states = {} |
|
|
|
for s, v in m.items(): |
|
|
|
actions = {} |
|
|
|
for la, next_state in s.transitions.items(): |
|
|
|
actions[la] = (Shift, next_state.closure) |
|
|
|
|
|
|
|
sat, _ = classify_bool(v, lambda x: x[0].is_satisfied) |
|
|
|
reductions = classify(sat, lambda x: x[1], lambda x: x[0]) |
|
|
|
for la, rps in reductions.items(): |
|
|
|
if len(rps) > 1: |
|
|
|
raise GrammarError("Collision in %s: %s" % (la, ', '.join([ str(r.rule) for r in rps ]))) |
|
|
|
if la in actions: |
|
|
|
if self.debug: |
|
|
|
logging.warning("Shift/reduce conflict for terminal %s: (resolving as shift)", la.name) |
|
|
|
logging.warning(' * %s', str(rps[0])) |
|
|
|
else: |
|
|
|
actions[la] = (Reduce, rps[0].rule) |
|
|
|
|
|
|
|
self.states[s.closure] = {k.name: v for k, v in actions.items()} |
|
|
|
|
|
|
|
end_states = {} |
|
|
|
for s in self.states: |
|
|
|
for rp in s: |
|
|
|
for start in self.lr0_start_states: |
|
|
|
if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied: |
|
|
|
assert(not start in end_states) |
|
|
|
end_states[start] = s |
|
|
|
|
|
|
|
self._parse_table = ParseTable(self.states, {start: state.closure for start, state in self.lr0_start_states.items()}, end_states) |
|
|
|
|
|
|
|
if self.debug: |
|
|
|
self.parse_table = self._parse_table |
|
|
|
else: |
|
|
|
self.parse_table = IntParseTable.from_ParseTable(self._parse_table) |
|
|
|
|
|
|
|
def generate_lr1_closure(self, kernel): |
|
|
|
closure = set() |
|
|
|
|
|
|
|
q = list(kernel) |
|
|
|
while len(q) > 0: |
|
|
|
rp, la = q.pop() |
|
|
|
if (rp, la) in closure: |
|
|
|
continue |
|
|
|
closure.add((rp, la)) |
|
|
|
|
|
|
|
if rp.is_satisfied: |
|
|
|
continue |
|
|
|
if rp.next.is_term: |
|
|
|
continue |
|
|
|
|
|
|
|
l = [] |
|
|
|
i = rp.index + 1 |
|
|
|
n = len(rp.rule.expansion) |
|
|
|
while i < n: |
|
|
|
s = rp.rule.expansion[i] |
|
|
|
l.extend(self.FIRST.get(s, [])) |
|
|
|
if not s in self.NULLABLE: |
|
|
|
break |
|
|
|
i += 1 |
|
|
|
|
|
|
|
# if all of rp.rule.expansion[rp.index + 1:] were nullable: |
|
|
|
if i == n: |
|
|
|
l.append(la) |
|
|
|
|
|
|
|
for r in self.lr0_rules_by_origin[rp.next]: |
|
|
|
for s in l: |
|
|
|
q.append((RulePtr(r, 0), s)) |
|
|
|
|
|
|
|
return closure |