@@ -71,7 +71,7 @@ class Rule(Serialize): | |||
expansion : a list of symbols | |||
order : index of this expansion amongst all rules of the same name | |||
""" | |||
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash', '_rp') | |||
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') | |||
__serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' | |||
__serialize_namespace__ = Terminal, NonTerminal, RuleOptions | |||
@@ -83,7 +83,6 @@ class Rule(Serialize): | |||
self.order = order | |||
self.options = options | |||
self._hash = hash((self.origin, tuple(self.expansion))) | |||
self._rp = None | |||
def _deserialize(self): | |||
self._hash = hash((self.origin, tuple(self.expansion))) | |||
@@ -3,20 +3,16 @@ from collections import Counter, defaultdict | |||
from ..utils import bfs, fzset, classify | |||
from ..exceptions import GrammarError | |||
from ..grammar import Rule, Terminal, NonTerminal | |||
import time | |||
# optimizations were made so that there should never be two distinct equal RulePtrs | |||
# to help with hashtable lookup | |||
class RulePtr(object): | |||
__slots__ = ('rule', 'index', '_advance') | |||
__slots__ = ('rule', 'index') | |||
def __init__(self, rule, index): | |||
assert isinstance(rule, Rule) | |||
assert index <= len(rule.expansion) | |||
self.rule = rule | |||
self.index = index | |||
self._advance = None | |||
def __repr__(self): | |||
before = [x.name for x in self.rule.expansion[:self.index]] | |||
@@ -27,19 +23,19 @@ class RulePtr(object): | |||
def next(self): | |||
return self.rule.expansion[self.index] | |||
# don't create duplicate RulePtrs | |||
def advance(self, sym): | |||
assert self.next == sym | |||
a = self._advance | |||
if a is None: | |||
a = RulePtr(self.rule, self.index + 1) | |||
self._advance = a | |||
return a | |||
return RulePtr(self.rule, self.index+1) | |||
@property | |||
def is_satisfied(self): | |||
return self.index == len(self.rule.expansion) | |||
def __eq__(self, other): | |||
return self.rule == other.rule and self.index == other.index | |||
def __hash__(self): | |||
return hash((self.rule, self.index)) | |||
# state generation ensures no duplicate LR0ItemSets | |||
class LR0ItemSet(object): | |||
@@ -159,19 +155,11 @@ class GrammarAnalyzer(object): | |||
self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin) | |||
# cache RulePtr(r, 0) in r (no duplicate RulePtr objects) | |||
for root_rule in lr0_root_rules.values(): | |||
root_rule._rp = RulePtr(root_rule, 0) | |||
self.lr0_start_states = {start: LR0ItemSet([root_rule._rp], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin)) | |||
self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin)) | |||
for start, root_rule in lr0_root_rules.items()} | |||
self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) | |||
self.nonterminal_transitions = [] | |||
self.directly_reads = defaultdict(set) | |||
self.reads = defaultdict(set) | |||
self.includes = defaultdict(set) | |||
self.lookback = defaultdict(set) | |||
def expand_rule(self, source_rule, rules_by_origin=None): | |||
"Returns all init_ptrs accessible by rule (recursive)" | |||
@@ -183,11 +171,7 @@ class GrammarAnalyzer(object): | |||
assert not rule.is_term, rule | |||
for r in rules_by_origin[rule]: | |||
# don't create duplicate RulePtr objects | |||
init_ptr = r._rp | |||
if init_ptr is None: | |||
init_ptr = RulePtr(r, 0) | |||
r._rp = init_ptr | |||
init_ptr = RulePtr(r, 0) | |||
init_ptrs.add(init_ptr) | |||
if r.expansion: # if not empty rule | |||
@@ -15,8 +15,6 @@ from ..exceptions import GrammarError | |||
from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet | |||
from ..grammar import Rule | |||
import time | |||
###{standalone | |||
class Action: | |||
@@ -115,8 +113,8 @@ def traverse(x, S, N, X, R, G, F): | |||
S.append(x) | |||
d = len(S) | |||
N[x] = d | |||
F[x] = G(x) | |||
for y in R(x): | |||
F[x] = G[x] | |||
for y in R[x]: | |||
if N[y] == 0: | |||
traverse(y, S, N, X, R, G, F) | |||
n_x = N[x] | |||
@@ -137,9 +135,17 @@ def traverse(x, S, N, X, R, G, F): | |||
class LALR_Analyzer(GrammarAnalyzer): | |||
def __init__(self, parser_conf, debug=False): | |||
GrammarAnalyzer.__init__(self, parser_conf, debug) | |||
self.nonterminal_transitions = [] | |||
self.directly_reads = defaultdict(set) | |||
self.reads = defaultdict(set) | |||
self.includes = defaultdict(set) | |||
self.lookback = defaultdict(set) | |||
def compute_lr0_states(self): | |||
self.states = set() | |||
self.lr0_states = set() | |||
# map of kernels to LR0ItemSets | |||
cache = {} | |||
@@ -161,7 +167,7 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
state.transitions[sym] = new_state | |||
yield new_state | |||
self.states.add(state) | |||
self.lr0_states.add(state) | |||
for _ in bfs(self.lr0_start_states.values(), step): | |||
pass | |||
@@ -174,14 +180,14 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
assert(rp.index == 0) | |||
self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ]) | |||
for state in self.states: | |||
for state in self.lr0_states: | |||
seen = set() | |||
for rp in state.closure: | |||
if rp.is_satisfied: | |||
continue | |||
s = rp.next | |||
# if s is a not a nonterminal | |||
if not s in self.lr0_rules_by_origin: | |||
if s not in self.lr0_rules_by_origin: | |||
continue | |||
if s in seen: | |||
continue | |||
@@ -201,11 +207,6 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
if s2 in self.NULLABLE: | |||
r.add((next_state, s2)) | |||
def compute_read_sets(self): | |||
R = lambda nt: self.reads[nt] | |||
G = lambda nt: self.directly_reads[nt] | |||
self.read_sets = digraph(self.nonterminal_transitions, R, G) | |||
def compute_includes_lookback(self): | |||
for nt in self.nonterminal_transitions: | |||
state, nonterminal = nt | |||
@@ -220,9 +221,8 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
s = rp.rule.expansion[i] | |||
nt2 = (state2, s) | |||
state2 = state2.transitions[s] | |||
if not nt2 in self.reads: | |||
if nt2 not in self.reads: | |||
continue | |||
j = i + 1 | |||
for j in range(i + 1, len(rp.rule.expansion)): | |||
if not rp.rule.expansion[j] in self.NULLABLE: | |||
break | |||
@@ -236,20 +236,18 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
for nt2 in includes: | |||
self.includes[nt2].add(nt) | |||
def compute_follow_sets(self): | |||
R = lambda nt: self.includes[nt] | |||
G = lambda nt: self.read_sets[nt] | |||
self.follow_sets = digraph(self.nonterminal_transitions, R, G) | |||
def compute_lookaheads(self): | |||
read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads) | |||
follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets) | |||
for nt, lookbacks in self.lookback.items(): | |||
for state, rule in lookbacks: | |||
for s in self.follow_sets[nt]: | |||
for s in follow_sets[nt]: | |||
state.lookaheads[s].add(rule) | |||
def compute_lalr1_states(self): | |||
m = {} | |||
for state in self.states: | |||
for state in self.lr0_states: | |||
actions = {} | |||
for la, next_state in state.transitions.items(): | |||
actions[la] = (Shift, next_state.closure) | |||
@@ -281,3 +279,10 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
self.parse_table = self._parse_table | |||
else: | |||
self.parse_table = IntParseTable.from_ParseTable(self._parse_table) | |||
def compute_lalr(self): | |||
self.compute_lr0_states() | |||
self.compute_reads_relations() | |||
self.compute_includes_lookback() | |||
self.compute_lookaheads() | |||
self.compute_lalr1_states() |
@@ -8,8 +8,6 @@ from ..utils import Enumerator, Serialize | |||
from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | |||
import time | |||
###{standalone | |||
class LALR_Parser(object): | |||
@@ -17,13 +15,7 @@ class LALR_Parser(object): | |||
assert all(r.options is None or r.options.priority is None | |||
for r in parser_conf.rules), "LALR doesn't yet support prioritization" | |||
analysis = LALR_Analyzer(parser_conf, debug=debug) | |||
analysis.compute_lr0_states() | |||
analysis.compute_reads_relations() | |||
analysis.compute_read_sets() | |||
analysis.compute_includes_lookback() | |||
analysis.compute_follow_sets() | |||
analysis.compute_lookaheads() | |||
analysis.compute_lalr1_states() | |||
analysis.compute_lalr() | |||
callbacks = parser_conf.callbacks | |||
self._parse_table = analysis.parse_table | |||
@@ -88,11 +80,6 @@ class _Parser: | |||
state_stack.append(new_state) | |||
value_stack.append(value) | |||
if state_stack[-1] == end_state: | |||
return True | |||
return False | |||
# Main LALR-parser loop | |||
for token in stream: | |||
while True: | |||
@@ -111,7 +98,8 @@ class _Parser: | |||
while True: | |||
_action, arg = get_action(token) | |||
assert(_action is Reduce) | |||
if reduce(arg): | |||
reduce(arg) | |||
if state_stack[-1] == end_state: | |||
return value_stack[-1] | |||
###} |