@@ -71,7 +71,7 @@ class Rule(Serialize): | |||||
expansion : a list of symbols | expansion : a list of symbols | ||||
order : index of this expansion amongst all rules of the same name | order : index of this expansion amongst all rules of the same name | ||||
""" | """ | ||||
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash', '_rp') | |||||
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') | |||||
__serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' | __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' | ||||
__serialize_namespace__ = Terminal, NonTerminal, RuleOptions | __serialize_namespace__ = Terminal, NonTerminal, RuleOptions | ||||
@@ -83,7 +83,6 @@ class Rule(Serialize): | |||||
self.order = order | self.order = order | ||||
self.options = options | self.options = options | ||||
self._hash = hash((self.origin, tuple(self.expansion))) | self._hash = hash((self.origin, tuple(self.expansion))) | ||||
self._rp = None | |||||
def _deserialize(self): | def _deserialize(self): | ||||
self._hash = hash((self.origin, tuple(self.expansion))) | self._hash = hash((self.origin, tuple(self.expansion))) | ||||
@@ -3,20 +3,16 @@ from collections import Counter, defaultdict | |||||
from ..utils import bfs, fzset, classify | from ..utils import bfs, fzset, classify | ||||
from ..exceptions import GrammarError | from ..exceptions import GrammarError | ||||
from ..grammar import Rule, Terminal, NonTerminal | from ..grammar import Rule, Terminal, NonTerminal | ||||
import time | |||||
# optimizations were made so that there should never be two distinct equal RulePtrs | |||||
# to help with hashtable lookup | |||||
class RulePtr(object): | class RulePtr(object): | ||||
__slots__ = ('rule', 'index', '_advance') | |||||
__slots__ = ('rule', 'index') | |||||
def __init__(self, rule, index): | def __init__(self, rule, index): | ||||
assert isinstance(rule, Rule) | assert isinstance(rule, Rule) | ||||
assert index <= len(rule.expansion) | assert index <= len(rule.expansion) | ||||
self.rule = rule | self.rule = rule | ||||
self.index = index | self.index = index | ||||
self._advance = None | |||||
def __repr__(self): | def __repr__(self): | ||||
before = [x.name for x in self.rule.expansion[:self.index]] | before = [x.name for x in self.rule.expansion[:self.index]] | ||||
@@ -27,19 +23,19 @@ class RulePtr(object): | |||||
def next(self): | def next(self): | ||||
return self.rule.expansion[self.index] | return self.rule.expansion[self.index] | ||||
# don't create duplicate RulePtrs | |||||
def advance(self, sym): | def advance(self, sym): | ||||
assert self.next == sym | assert self.next == sym | ||||
a = self._advance | |||||
if a is None: | |||||
a = RulePtr(self.rule, self.index + 1) | |||||
self._advance = a | |||||
return a | |||||
return RulePtr(self.rule, self.index+1) | |||||
@property | @property | ||||
def is_satisfied(self): | def is_satisfied(self): | ||||
return self.index == len(self.rule.expansion) | return self.index == len(self.rule.expansion) | ||||
def __eq__(self, other): | |||||
return self.rule == other.rule and self.index == other.index | |||||
def __hash__(self): | |||||
return hash((self.rule, self.index)) | |||||
# state generation ensures no duplicate LR0ItemSets | # state generation ensures no duplicate LR0ItemSets | ||||
class LR0ItemSet(object): | class LR0ItemSet(object): | ||||
@@ -159,19 +155,11 @@ class GrammarAnalyzer(object): | |||||
self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin) | self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin) | ||||
# cache RulePtr(r, 0) in r (no duplicate RulePtr objects) | # cache RulePtr(r, 0) in r (no duplicate RulePtr objects) | ||||
for root_rule in lr0_root_rules.values(): | |||||
root_rule._rp = RulePtr(root_rule, 0) | |||||
self.lr0_start_states = {start: LR0ItemSet([root_rule._rp], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin)) | |||||
self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin)) | |||||
for start, root_rule in lr0_root_rules.items()} | for start, root_rule in lr0_root_rules.items()} | ||||
self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) | self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) | ||||
self.nonterminal_transitions = [] | |||||
self.directly_reads = defaultdict(set) | |||||
self.reads = defaultdict(set) | |||||
self.includes = defaultdict(set) | |||||
self.lookback = defaultdict(set) | |||||
def expand_rule(self, source_rule, rules_by_origin=None): | def expand_rule(self, source_rule, rules_by_origin=None): | ||||
"Returns all init_ptrs accessible by rule (recursive)" | "Returns all init_ptrs accessible by rule (recursive)" | ||||
@@ -183,11 +171,7 @@ class GrammarAnalyzer(object): | |||||
assert not rule.is_term, rule | assert not rule.is_term, rule | ||||
for r in rules_by_origin[rule]: | for r in rules_by_origin[rule]: | ||||
# don't create duplicate RulePtr objects | |||||
init_ptr = r._rp | |||||
if init_ptr is None: | |||||
init_ptr = RulePtr(r, 0) | |||||
r._rp = init_ptr | |||||
init_ptr = RulePtr(r, 0) | |||||
init_ptrs.add(init_ptr) | init_ptrs.add(init_ptr) | ||||
if r.expansion: # if not empty rule | if r.expansion: # if not empty rule | ||||
@@ -15,8 +15,6 @@ from ..exceptions import GrammarError | |||||
from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet | from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet | ||||
from ..grammar import Rule | from ..grammar import Rule | ||||
import time | |||||
###{standalone | ###{standalone | ||||
class Action: | class Action: | ||||
@@ -115,8 +113,8 @@ def traverse(x, S, N, X, R, G, F): | |||||
S.append(x) | S.append(x) | ||||
d = len(S) | d = len(S) | ||||
N[x] = d | N[x] = d | ||||
F[x] = G(x) | |||||
for y in R(x): | |||||
F[x] = G[x] | |||||
for y in R[x]: | |||||
if N[y] == 0: | if N[y] == 0: | ||||
traverse(y, S, N, X, R, G, F) | traverse(y, S, N, X, R, G, F) | ||||
n_x = N[x] | n_x = N[x] | ||||
@@ -137,9 +135,17 @@ def traverse(x, S, N, X, R, G, F): | |||||
class LALR_Analyzer(GrammarAnalyzer): | class LALR_Analyzer(GrammarAnalyzer): | ||||
def __init__(self, parser_conf, debug=False): | |||||
GrammarAnalyzer.__init__(self, parser_conf, debug) | |||||
self.nonterminal_transitions = [] | |||||
self.directly_reads = defaultdict(set) | |||||
self.reads = defaultdict(set) | |||||
self.includes = defaultdict(set) | |||||
self.lookback = defaultdict(set) | |||||
def compute_lr0_states(self): | def compute_lr0_states(self): | ||||
self.states = set() | |||||
self.lr0_states = set() | |||||
# map of kernels to LR0ItemSets | # map of kernels to LR0ItemSets | ||||
cache = {} | cache = {} | ||||
@@ -161,7 +167,7 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
state.transitions[sym] = new_state | state.transitions[sym] = new_state | ||||
yield new_state | yield new_state | ||||
self.states.add(state) | |||||
self.lr0_states.add(state) | |||||
for _ in bfs(self.lr0_start_states.values(), step): | for _ in bfs(self.lr0_start_states.values(), step): | ||||
pass | pass | ||||
@@ -174,14 +180,14 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
assert(rp.index == 0) | assert(rp.index == 0) | ||||
self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ]) | self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ]) | ||||
for state in self.states: | |||||
for state in self.lr0_states: | |||||
seen = set() | seen = set() | ||||
for rp in state.closure: | for rp in state.closure: | ||||
if rp.is_satisfied: | if rp.is_satisfied: | ||||
continue | continue | ||||
s = rp.next | s = rp.next | ||||
# if s is a not a nonterminal | # if s is a not a nonterminal | ||||
if not s in self.lr0_rules_by_origin: | |||||
if s not in self.lr0_rules_by_origin: | |||||
continue | continue | ||||
if s in seen: | if s in seen: | ||||
continue | continue | ||||
@@ -201,11 +207,6 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
if s2 in self.NULLABLE: | if s2 in self.NULLABLE: | ||||
r.add((next_state, s2)) | r.add((next_state, s2)) | ||||
def compute_read_sets(self): | |||||
R = lambda nt: self.reads[nt] | |||||
G = lambda nt: self.directly_reads[nt] | |||||
self.read_sets = digraph(self.nonterminal_transitions, R, G) | |||||
def compute_includes_lookback(self): | def compute_includes_lookback(self): | ||||
for nt in self.nonterminal_transitions: | for nt in self.nonterminal_transitions: | ||||
state, nonterminal = nt | state, nonterminal = nt | ||||
@@ -220,9 +221,8 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
s = rp.rule.expansion[i] | s = rp.rule.expansion[i] | ||||
nt2 = (state2, s) | nt2 = (state2, s) | ||||
state2 = state2.transitions[s] | state2 = state2.transitions[s] | ||||
if not nt2 in self.reads: | |||||
if nt2 not in self.reads: | |||||
continue | continue | ||||
j = i + 1 | |||||
for j in range(i + 1, len(rp.rule.expansion)): | for j in range(i + 1, len(rp.rule.expansion)): | ||||
if not rp.rule.expansion[j] in self.NULLABLE: | if not rp.rule.expansion[j] in self.NULLABLE: | ||||
break | break | ||||
@@ -236,20 +236,18 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
for nt2 in includes: | for nt2 in includes: | ||||
self.includes[nt2].add(nt) | self.includes[nt2].add(nt) | ||||
def compute_follow_sets(self): | |||||
R = lambda nt: self.includes[nt] | |||||
G = lambda nt: self.read_sets[nt] | |||||
self.follow_sets = digraph(self.nonterminal_transitions, R, G) | |||||
def compute_lookaheads(self): | def compute_lookaheads(self): | ||||
read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads) | |||||
follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets) | |||||
for nt, lookbacks in self.lookback.items(): | for nt, lookbacks in self.lookback.items(): | ||||
for state, rule in lookbacks: | for state, rule in lookbacks: | ||||
for s in self.follow_sets[nt]: | |||||
for s in follow_sets[nt]: | |||||
state.lookaheads[s].add(rule) | state.lookaheads[s].add(rule) | ||||
def compute_lalr1_states(self): | def compute_lalr1_states(self): | ||||
m = {} | m = {} | ||||
for state in self.states: | |||||
for state in self.lr0_states: | |||||
actions = {} | actions = {} | ||||
for la, next_state in state.transitions.items(): | for la, next_state in state.transitions.items(): | ||||
actions[la] = (Shift, next_state.closure) | actions[la] = (Shift, next_state.closure) | ||||
@@ -281,3 +279,10 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
self.parse_table = self._parse_table | self.parse_table = self._parse_table | ||||
else: | else: | ||||
self.parse_table = IntParseTable.from_ParseTable(self._parse_table) | self.parse_table = IntParseTable.from_ParseTable(self._parse_table) | ||||
def compute_lalr(self): | |||||
self.compute_lr0_states() | |||||
self.compute_reads_relations() | |||||
self.compute_includes_lookback() | |||||
self.compute_lookaheads() | |||||
self.compute_lalr1_states() |
@@ -8,8 +8,6 @@ from ..utils import Enumerator, Serialize | |||||
from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | ||||
import time | |||||
###{standalone | ###{standalone | ||||
class LALR_Parser(object): | class LALR_Parser(object): | ||||
@@ -17,13 +15,7 @@ class LALR_Parser(object): | |||||
assert all(r.options is None or r.options.priority is None | assert all(r.options is None or r.options.priority is None | ||||
for r in parser_conf.rules), "LALR doesn't yet support prioritization" | for r in parser_conf.rules), "LALR doesn't yet support prioritization" | ||||
analysis = LALR_Analyzer(parser_conf, debug=debug) | analysis = LALR_Analyzer(parser_conf, debug=debug) | ||||
analysis.compute_lr0_states() | |||||
analysis.compute_reads_relations() | |||||
analysis.compute_read_sets() | |||||
analysis.compute_includes_lookback() | |||||
analysis.compute_follow_sets() | |||||
analysis.compute_lookaheads() | |||||
analysis.compute_lalr1_states() | |||||
analysis.compute_lalr() | |||||
callbacks = parser_conf.callbacks | callbacks = parser_conf.callbacks | ||||
self._parse_table = analysis.parse_table | self._parse_table = analysis.parse_table | ||||
@@ -88,11 +80,6 @@ class _Parser: | |||||
state_stack.append(new_state) | state_stack.append(new_state) | ||||
value_stack.append(value) | value_stack.append(value) | ||||
if state_stack[-1] == end_state: | |||||
return True | |||||
return False | |||||
# Main LALR-parser loop | # Main LALR-parser loop | ||||
for token in stream: | for token in stream: | ||||
while True: | while True: | ||||
@@ -111,7 +98,8 @@ class _Parser: | |||||
while True: | while True: | ||||
_action, arg = get_action(token) | _action, arg = get_action(token) | ||||
assert(_action is Reduce) | assert(_action is Reduce) | ||||
if reduce(arg): | |||||
reduce(arg) | |||||
if state_stack[-1] == end_state: | |||||
return value_stack[-1] | return value_stack[-1] | ||||
###} | ###} |