|
|
@@ -7,13 +7,16 @@ For now, shift/reduce conflicts are automatically resolved as shifts. |
|
|
|
# Email : erezshin@gmail.com |
|
|
|
|
|
|
|
import logging |
|
|
|
from collections import defaultdict |
|
|
|
from collections import defaultdict, deque |
|
|
|
|
|
|
|
from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator |
|
|
|
from ..exceptions import GrammarError |
|
|
|
|
|
|
|
from .grammar_analysis import GrammarAnalyzer, Terminal, RulePtr, LR0ItemSet |
|
|
|
from ..grammar import Rule |
|
|
|
from . import grammar_analysis |
|
|
|
|
|
|
|
import time |
|
|
|
|
|
|
|
###{standalone |
|
|
|
|
|
|
@@ -28,6 +31,16 @@ class Action: |
|
|
|
Shift = Action('Shift') |
|
|
|
Reduce = Action('Reduce') |
|
|
|
|
|
|
|
t_set_0 = 0 |
|
|
|
t_set_1 = 0 |
|
|
|
t_expand = 0 |
|
|
|
t_rules = 0 |
|
|
|
t_append = 0 |
|
|
|
t_z = 0 |
|
|
|
t_begin = 0 |
|
|
|
t_count = 0 |
|
|
|
t_call = 0 |
|
|
|
|
|
|
|
class ParseTable: |
|
|
|
def __init__(self, states, start_states, end_states): |
|
|
|
self.states = states |
|
|
@@ -86,20 +99,24 @@ class LALR_Analyzer(GrammarAnalyzer): |
|
|
|
|
|
|
|
def generate_lr0_states(self): |
|
|
|
self.states = set() |
|
|
|
# map of kernels to LR0ItemSets |
|
|
|
cache = {} |
|
|
|
|
|
|
|
def step(state): |
|
|
|
_, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied) |
|
|
|
|
|
|
|
d = classify(unsat, lambda rp: rp.next) |
|
|
|
for sym, rps in d.items(): |
|
|
|
kernel = {rp.advance(sym) for rp in rps} |
|
|
|
closure = set(kernel) |
|
|
|
kernel = fzset({rp.advance(sym) for rp in rps}) |
|
|
|
new_state = cache.get(kernel, None) |
|
|
|
if new_state is None: |
|
|
|
closure = set(kernel) |
|
|
|
for rp in kernel: |
|
|
|
if not rp.is_satisfied and not rp.next.is_term: |
|
|
|
closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin) |
|
|
|
new_state = LR0ItemSet(kernel, closure) |
|
|
|
cache[kernel] = new_state |
|
|
|
|
|
|
|
for rp in kernel: |
|
|
|
if not rp.is_satisfied and not rp.next.is_term: |
|
|
|
closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin) |
|
|
|
|
|
|
|
new_state = LR0ItemSet(kernel, closure) |
|
|
|
state.transitions[sym] = new_state |
|
|
|
yield new_state |
|
|
|
|
|
|
@@ -109,36 +126,59 @@ class LALR_Analyzer(GrammarAnalyzer): |
|
|
|
pass |
|
|
|
|
|
|
|
def discover_lookaheads(self): |
|
|
|
# lookaheads is now a member of LR0ItemSet, so don't need to look up a dictionary here |
|
|
|
# state -> rule -> set of lookaheads |
|
|
|
self.lookaheads = defaultdict(lambda: defaultdict(set)) |
|
|
|
#self.lookaheads = defaultdict(lambda: defaultdict(set)) |
|
|
|
# state -> rule -> list of (set of lookaheads) to propagate to |
|
|
|
self.propagates = defaultdict(lambda: defaultdict(list)) |
|
|
|
#self.propagates = defaultdict(lambda: defaultdict(list)) |
|
|
|
self.propagates = {} |
|
|
|
|
|
|
|
t0 = time.time() |
|
|
|
|
|
|
|
t = Terminal('$END') |
|
|
|
for s in self.lr0_start_states.values(): |
|
|
|
for rp in s.kernel: |
|
|
|
self.lookaheads[s][rp].add(Terminal('$END')) |
|
|
|
#self.lookaheads[s][rp].add(Terminal('$END')) |
|
|
|
s.lookaheads[rp].add(t) |
|
|
|
|
|
|
|
t_closure = 0 |
|
|
|
|
|
|
|
# There is a 1 to 1 correspondance between LR0 and LALR1 states. |
|
|
|
# We calculate the lookaheads for LALR1 kernel items from the LR0 kernel items. |
|
|
|
# use a terminal that does not exist in the grammar |
|
|
|
t = Terminal('$#') |
|
|
|
for s in self.states: |
|
|
|
p = {} |
|
|
|
self.propagates[s] = p |
|
|
|
for rp in s.kernel: |
|
|
|
for rp2, la in self.generate_lr1_closure([(rp, t)]): |
|
|
|
q = [] |
|
|
|
p[rp] = q |
|
|
|
t2 = time.time() |
|
|
|
z = self.generate_lr1_closure([rp.lookahead(t)], time.time()) |
|
|
|
t3 = time.time() |
|
|
|
t_closure += t3 - t2 |
|
|
|
#for rp2, la in self.generate_lr1_closure([(rp, t)], time.time()): |
|
|
|
for rp2_la in z: |
|
|
|
rp2 = rp2_la.rp |
|
|
|
la = rp2_la.la |
|
|
|
if rp2.is_satisfied: |
|
|
|
continue |
|
|
|
next_symbol = rp2.next |
|
|
|
next_state = s.transitions[next_symbol] |
|
|
|
rp3 = rp2.advance(next_symbol) |
|
|
|
assert(rp3 in next_state.kernel) |
|
|
|
x = self.lookaheads[next_state][rp3] |
|
|
|
#x = self.lookaheads[next_state][rp3] |
|
|
|
x = next_state.lookaheads[rp3] |
|
|
|
if la == t: |
|
|
|
# we must propagate rp's lookaheads to rp3's lookahead set |
|
|
|
self.propagates[s][rp].append(x) |
|
|
|
q.append(x) |
|
|
|
else: |
|
|
|
# this lookahead is "generated spontaneously" for rp3 |
|
|
|
x.add(la) |
|
|
|
|
|
|
|
t1 = time.time() |
|
|
|
print('Discovering took {:.3f} (generating closure), {:.3f} (total)'.format(t_closure, t1 - t0)) |
|
|
|
|
|
|
|
def propagate_lookaheads(self): |
|
|
|
changed = True |
|
|
|
while changed: |
|
|
@@ -146,7 +186,8 @@ class LALR_Analyzer(GrammarAnalyzer): |
|
|
|
for s in self.states: |
|
|
|
for rp in s.kernel: |
|
|
|
# from (from is a keyword) |
|
|
|
f = self.lookaheads[s][rp] |
|
|
|
#f = self.lookaheads[s][rp] |
|
|
|
f = s.lookaheads[rp] |
|
|
|
# to |
|
|
|
t = self.propagates[s][rp] |
|
|
|
for x in t: |
|
|
@@ -155,20 +196,33 @@ class LALR_Analyzer(GrammarAnalyzer): |
|
|
|
changed = changed or (len(x) != old) |
|
|
|
|
|
|
|
def generate_lalr1_states(self): |
|
|
|
t0 = time.time() |
|
|
|
# 1 to 1 correspondance between LR0 and LALR1 states |
|
|
|
# We must fetch the lookaheads we calculated, |
|
|
|
# to create the LALR1 kernels from the LR0 kernels. |
|
|
|
# Then, we generate the LALR1 states by taking the LR1 closure of the new kernel items. |
|
|
|
# map of LR0 states to LALR1 states |
|
|
|
m = {} |
|
|
|
t_closure = 0 |
|
|
|
z = 0 |
|
|
|
for s in self.states: |
|
|
|
z = max(z, len(s.closure)) |
|
|
|
kernel = [] |
|
|
|
for rp in s.kernel: |
|
|
|
las = self.lookaheads[s][rp] |
|
|
|
#las = self.lookaheads[s][rp] |
|
|
|
las = s.lookaheads[rp] |
|
|
|
assert(len(las) > 0) |
|
|
|
for la in las: |
|
|
|
kernel.append((rp, la)) |
|
|
|
m[s] = self.generate_lr1_closure(kernel) |
|
|
|
kernel.append(rp.lookahead(la)) |
|
|
|
t0_0 = time.time() |
|
|
|
m[s] = self.generate_lr1_closure(kernel, time.time()) |
|
|
|
t0_1 = time.time() |
|
|
|
t_closure += t0_1 - t0_0 |
|
|
|
|
|
|
|
print('Generating lalr1 closure for lalr kernels took {:.3f}'.format(t_closure)) |
|
|
|
print('Max lr0 state size was {}'.format(z)) |
|
|
|
|
|
|
|
t1 = time.time() |
|
|
|
|
|
|
|
self.states = {} |
|
|
|
for s, v in m.items(): |
|
|
@@ -176,8 +230,8 @@ class LALR_Analyzer(GrammarAnalyzer): |
|
|
|
for la, next_state in s.transitions.items(): |
|
|
|
actions[la] = (Shift, next_state.closure) |
|
|
|
|
|
|
|
sat, _ = classify_bool(v, lambda x: x[0].is_satisfied) |
|
|
|
reductions = classify(sat, lambda x: x[1], lambda x: x[0]) |
|
|
|
sat, _ = classify_bool(v, lambda x: x.rp.is_satisfied) |
|
|
|
reductions = classify(sat, lambda x: x.la, lambda x: x.rp) |
|
|
|
for la, rps in reductions.items(): |
|
|
|
if len(rps) > 1: |
|
|
|
raise GrammarError("Collision in %s: %s" % (la, ', '.join([ str(r.rule) for r in rps ]))) |
|
|
@@ -190,6 +244,8 @@ class LALR_Analyzer(GrammarAnalyzer): |
|
|
|
|
|
|
|
self.states[s.closure] = {k.name: v for k, v in actions.items()} |
|
|
|
|
|
|
|
t2 = time.time() |
|
|
|
|
|
|
|
end_states = {} |
|
|
|
for s in self.states: |
|
|
|
for rp in s: |
|
|
@@ -198,44 +254,168 @@ class LALR_Analyzer(GrammarAnalyzer): |
|
|
|
assert(not start in end_states) |
|
|
|
end_states[start] = s |
|
|
|
|
|
|
|
t3 = time.time() |
|
|
|
|
|
|
|
self._parse_table = ParseTable(self.states, {start: state.closure for start, state in self.lr0_start_states.items()}, end_states) |
|
|
|
|
|
|
|
t4 = time.time() |
|
|
|
|
|
|
|
if self.debug: |
|
|
|
self.parse_table = self._parse_table |
|
|
|
else: |
|
|
|
self.parse_table = IntParseTable.from_ParseTable(self._parse_table) |
|
|
|
|
|
|
|
def generate_lr1_closure(self, kernel): |
|
|
|
t5 = time.time() |
|
|
|
|
|
|
|
print(('Generating lalr1 states took ' + ', '.join([ '{:.3f}' ] * 5)).format(t1 - t0, t2 - t1, t3 - t2, t4 - t3, t5 - t4)) |
|
|
|
print('Generating firsts took {:.3f} (time actually calculating), {:.3f} (end to end), {:.3f} (just function call)'.format(grammar_analysis.t_firsts, grammar_analysis.t_xy, grammar_analysis.t_call)) |
|
|
|
|
|
|
|
def generate_lr1_closure(self, kernel, t_caller): |
|
|
|
global t_call |
|
|
|
global t_set_0 |
|
|
|
global t_set_1 |
|
|
|
global t_expand |
|
|
|
global t_rules |
|
|
|
global t_append |
|
|
|
global t_z |
|
|
|
global t_begin |
|
|
|
global t_count |
|
|
|
|
|
|
|
t_start = time.time() |
|
|
|
t_call += t_start - t_caller |
|
|
|
|
|
|
|
# cache the results of this function |
|
|
|
# not many hits, no noticeable performance improvement |
|
|
|
''' |
|
|
|
k = fzset(kernel) |
|
|
|
cached = self.lr1_cache.get(k, None) |
|
|
|
if not cached is None: |
|
|
|
return cached |
|
|
|
''' |
|
|
|
|
|
|
|
closure = set() |
|
|
|
closure_hash = {} |
|
|
|
|
|
|
|
y = 0 |
|
|
|
|
|
|
|
q = list(kernel) |
|
|
|
while len(q) > 0: |
|
|
|
rp, la = q.pop() |
|
|
|
if (rp, la) in closure: |
|
|
|
t_a = time.time() |
|
|
|
rp_la = q.pop() |
|
|
|
#rp_la_hash = hash(rp_la) |
|
|
|
t0 = time.time() |
|
|
|
t_begin += t0 - t_a |
|
|
|
# try to manually maintain hashtable, |
|
|
|
# as a set of just hashes (ints) was notably faster |
|
|
|
''' |
|
|
|
if rp_la_hash in closure_hash: |
|
|
|
if rp_la in closure_hash[rp_la_hash]: |
|
|
|
t0_0 = time.time() |
|
|
|
t_set_0 += t0_0 - t0 |
|
|
|
continue |
|
|
|
t0_0 = time.time() |
|
|
|
t_set_0 += t0_0 - t0 |
|
|
|
else: |
|
|
|
closure_hash[rp_la_hash] = [] |
|
|
|
''' |
|
|
|
if rp_la in closure: |
|
|
|
t0_0 = time.time() |
|
|
|
t_set_0 += t0_0 - t0 |
|
|
|
continue |
|
|
|
closure.add((rp, la)) |
|
|
|
t0_0 = time.time() |
|
|
|
closure.add(rp_la) |
|
|
|
#closure_hash[rp_la_hash].append(rp_la) |
|
|
|
t1 = time.time() |
|
|
|
t_set_0 += t0_0 - t0 |
|
|
|
t_set_1 += t1 - t0_0 |
|
|
|
rp = rp_la.rp |
|
|
|
la = rp_la.la |
|
|
|
|
|
|
|
if rp.is_satisfied: |
|
|
|
continue |
|
|
|
if rp.next.is_term: |
|
|
|
continue |
|
|
|
|
|
|
|
t2 = time.time() |
|
|
|
|
|
|
|
# cache these calculations inside each RulePtr |
|
|
|
# see grammar_analysis.py:79 |
|
|
|
l = [] |
|
|
|
''' |
|
|
|
i = rp.index + 1 |
|
|
|
n = len(rp.rule.expansion) |
|
|
|
while i < n: |
|
|
|
s = rp.rule.expansion[i] |
|
|
|
l.extend(self.FIRST.get(s, [])) |
|
|
|
if not s in self.NULLABLE: |
|
|
|
break |
|
|
|
i += 1 |
|
|
|
|
|
|
|
l2_i = self.lr1_cache2.get((rp.rule, i), None) |
|
|
|
l2 = [] |
|
|
|
if l2_i is None: |
|
|
|
while i < n: |
|
|
|
s = rp.rule.expansion[i] |
|
|
|
l2.extend(self.FIRST.get(s, [])) |
|
|
|
if not s in self.NULLABLE: |
|
|
|
break |
|
|
|
i += 1 |
|
|
|
self.lr1_cache2[(rp.rule, i)] = (l2, i) |
|
|
|
else: |
|
|
|
l2 = l2_i[0] |
|
|
|
i = l2_i[1] |
|
|
|
|
|
|
|
l.extend(l2) |
|
|
|
''' |
|
|
|
# this function call seems really slow (see grammar_analysis.t_call above) |
|
|
|
# tried making it not a method call so don't need to look up vtable |
|
|
|
# still equally slow |
|
|
|
l2, nullable = rp.first(rp.index + 1, self.FIRST, self.NULLABLE, time.time()) |
|
|
|
#l2, nullable = grammar_analysis.first(rp, rp.index + 1, self.FIRST, self.NULLABLE, time.time()) |
|
|
|
#l.extend(l2) |
|
|
|
l = l2 |
|
|
|
t3 = time.time() |
|
|
|
|
|
|
|
t_expand += t3 - t2 |
|
|
|
|
|
|
|
# if we don't modify l2 and add an extra check in the loop below, |
|
|
|
# we don't have to copy it |
|
|
|
# if all of rp.rule.expansion[rp.index + 1:] were nullable: |
|
|
|
if i == n: |
|
|
|
l.append(la) |
|
|
|
#if nullable: |
|
|
|
# l.append(la) |
|
|
|
|
|
|
|
t4 = time.time() |
|
|
|
x = rp.next_rules_by_origin(self.lr0_rules_by_origin) |
|
|
|
t5 = time.time() |
|
|
|
|
|
|
|
for r in self.lr0_rules_by_origin[rp.next]: |
|
|
|
# usually between 20-60? seen as high as ~175 |
|
|
|
y = max(y, len(x) * len(l)) |
|
|
|
#print('adding {} * {} rules to closure max {}'.format(len(x), len(l), y)) |
|
|
|
for r in x: |
|
|
|
for s in l: |
|
|
|
q.append((RulePtr(r, 0), s)) |
|
|
|
# cache RulePtr(r, 0) in r (no duplicate RulePtr objects) |
|
|
|
# cache r._rp in _rp (1 less object property lookup?) |
|
|
|
_rp = r._rp |
|
|
|
if _rp is None: |
|
|
|
_rp = RulePtr(r, 0) |
|
|
|
r._rp = _rp |
|
|
|
q.append(_rp.lookahead(s)) |
|
|
|
#q.append((r._rp, s)) |
|
|
|
if nullable: |
|
|
|
_rp = r._rp |
|
|
|
if _rp is None: |
|
|
|
_rp = RulePtr(r, 0) |
|
|
|
r._rp = _rp |
|
|
|
q.append(_rp.lookahead(la)) |
|
|
|
#q.append((r._rp, la)) |
|
|
|
|
|
|
|
t6 = time.time() |
|
|
|
t_rules += t5 - t4 |
|
|
|
t_append += t6 - t5 |
|
|
|
|
|
|
|
#self.lr1_cache[k] = closure |
|
|
|
|
|
|
|
t_end = time.time() |
|
|
|
t_z += t_end - t_start |
|
|
|
|
|
|
|
t_count += 1 |
|
|
|
|
|
|
|
if t_count % 1000 == 0: |
|
|
|
print('\tGenerating lr1 closure took begin {:.3f}, set contains {:.3f}, set add {:.3f}, get first {:.3f}'.format(t_begin, t_set_0, t_set_1, t_expand)) |
|
|
|
print('\tget next rules {:.3f}, append rules {:.3f}, total {:.3f}, call time {:.3f}, count {}'.format(t_rules, t_append, t_z, t_call, t_count)) |
|
|
|
print('\tmax number of appends {}'.format(y)) |
|
|
|
|
|
|
|
return closure |