| @@ -71,7 +71,7 @@ class Rule(Serialize): | |||
| expansion : a list of symbols | |||
| order : index of this expansion amongst all rules of the same name | |||
| """ | |||
| __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash', '_rp') | |||
| __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') | |||
| __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' | |||
| __serialize_namespace__ = Terminal, NonTerminal, RuleOptions | |||
| @@ -83,7 +83,6 @@ class Rule(Serialize): | |||
| self.order = order | |||
| self.options = options | |||
| self._hash = hash((self.origin, tuple(self.expansion))) | |||
| self._rp = None | |||
| def _deserialize(self): | |||
| self._hash = hash((self.origin, tuple(self.expansion))) | |||
| @@ -3,20 +3,16 @@ from collections import Counter, defaultdict | |||
| from ..utils import bfs, fzset, classify | |||
| from ..exceptions import GrammarError | |||
| from ..grammar import Rule, Terminal, NonTerminal | |||
| import time | |||
| # optimizations were made so that there should never be two distinct equal RulePtrs | |||
| # to help with hashtable lookup | |||
| class RulePtr(object): | |||
| __slots__ = ('rule', 'index', '_advance') | |||
| __slots__ = ('rule', 'index') | |||
| def __init__(self, rule, index): | |||
| assert isinstance(rule, Rule) | |||
| assert index <= len(rule.expansion) | |||
| self.rule = rule | |||
| self.index = index | |||
| self._advance = None | |||
| def __repr__(self): | |||
| before = [x.name for x in self.rule.expansion[:self.index]] | |||
| @@ -27,19 +23,19 @@ class RulePtr(object): | |||
| def next(self): | |||
| return self.rule.expansion[self.index] | |||
| # don't create duplicate RulePtrs | |||
| def advance(self, sym): | |||
| assert self.next == sym | |||
| a = self._advance | |||
| if a is None: | |||
| a = RulePtr(self.rule, self.index + 1) | |||
| self._advance = a | |||
| return a | |||
| return RulePtr(self.rule, self.index+1) | |||
| @property | |||
| def is_satisfied(self): | |||
| return self.index == len(self.rule.expansion) | |||
| def __eq__(self, other): | |||
| return self.rule == other.rule and self.index == other.index | |||
| def __hash__(self): | |||
| return hash((self.rule, self.index)) | |||
| # state generation ensures no duplicate LR0ItemSets | |||
| class LR0ItemSet(object): | |||
| @@ -159,19 +155,11 @@ class GrammarAnalyzer(object): | |||
| self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin) | |||
| # cache RulePtr(r, 0) in r (no duplicate RulePtr objects) | |||
| for root_rule in lr0_root_rules.values(): | |||
| root_rule._rp = RulePtr(root_rule, 0) | |||
| self.lr0_start_states = {start: LR0ItemSet([root_rule._rp], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin)) | |||
| self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin)) | |||
| for start, root_rule in lr0_root_rules.items()} | |||
| self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) | |||
| self.nonterminal_transitions = [] | |||
| self.directly_reads = defaultdict(set) | |||
| self.reads = defaultdict(set) | |||
| self.includes = defaultdict(set) | |||
| self.lookback = defaultdict(set) | |||
| def expand_rule(self, source_rule, rules_by_origin=None): | |||
| "Returns all init_ptrs accessible by rule (recursive)" | |||
| @@ -183,11 +171,7 @@ class GrammarAnalyzer(object): | |||
| assert not rule.is_term, rule | |||
| for r in rules_by_origin[rule]: | |||
| # don't create duplicate RulePtr objects | |||
| init_ptr = r._rp | |||
| if init_ptr is None: | |||
| init_ptr = RulePtr(r, 0) | |||
| r._rp = init_ptr | |||
| init_ptr = RulePtr(r, 0) | |||
| init_ptrs.add(init_ptr) | |||
| if r.expansion: # if not empty rule | |||
| @@ -15,8 +15,6 @@ from ..exceptions import GrammarError | |||
| from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet | |||
| from ..grammar import Rule | |||
| import time | |||
| ###{standalone | |||
| class Action: | |||
| @@ -115,8 +113,8 @@ def traverse(x, S, N, X, R, G, F): | |||
| S.append(x) | |||
| d = len(S) | |||
| N[x] = d | |||
| F[x] = G(x) | |||
| for y in R(x): | |||
| F[x] = G[x] | |||
| for y in R[x]: | |||
| if N[y] == 0: | |||
| traverse(y, S, N, X, R, G, F) | |||
| n_x = N[x] | |||
| @@ -137,9 +135,17 @@ def traverse(x, S, N, X, R, G, F): | |||
| class LALR_Analyzer(GrammarAnalyzer): | |||
| def __init__(self, parser_conf, debug=False): | |||
| GrammarAnalyzer.__init__(self, parser_conf, debug) | |||
| self.nonterminal_transitions = [] | |||
| self.directly_reads = defaultdict(set) | |||
| self.reads = defaultdict(set) | |||
| self.includes = defaultdict(set) | |||
| self.lookback = defaultdict(set) | |||
| def compute_lr0_states(self): | |||
| self.states = set() | |||
| self.lr0_states = set() | |||
| # map of kernels to LR0ItemSets | |||
| cache = {} | |||
| @@ -161,7 +167,7 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
| state.transitions[sym] = new_state | |||
| yield new_state | |||
| self.states.add(state) | |||
| self.lr0_states.add(state) | |||
| for _ in bfs(self.lr0_start_states.values(), step): | |||
| pass | |||
| @@ -174,14 +180,14 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
| assert(rp.index == 0) | |||
| self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ]) | |||
| for state in self.states: | |||
| for state in self.lr0_states: | |||
| seen = set() | |||
| for rp in state.closure: | |||
| if rp.is_satisfied: | |||
| continue | |||
| s = rp.next | |||
| # if s is a not a nonterminal | |||
| if not s in self.lr0_rules_by_origin: | |||
| if s not in self.lr0_rules_by_origin: | |||
| continue | |||
| if s in seen: | |||
| continue | |||
| @@ -201,11 +207,6 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
| if s2 in self.NULLABLE: | |||
| r.add((next_state, s2)) | |||
| def compute_read_sets(self): | |||
| R = lambda nt: self.reads[nt] | |||
| G = lambda nt: self.directly_reads[nt] | |||
| self.read_sets = digraph(self.nonterminal_transitions, R, G) | |||
| def compute_includes_lookback(self): | |||
| for nt in self.nonterminal_transitions: | |||
| state, nonterminal = nt | |||
| @@ -220,9 +221,8 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
| s = rp.rule.expansion[i] | |||
| nt2 = (state2, s) | |||
| state2 = state2.transitions[s] | |||
| if not nt2 in self.reads: | |||
| if nt2 not in self.reads: | |||
| continue | |||
| j = i + 1 | |||
| for j in range(i + 1, len(rp.rule.expansion)): | |||
| if not rp.rule.expansion[j] in self.NULLABLE: | |||
| break | |||
| @@ -236,20 +236,18 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
| for nt2 in includes: | |||
| self.includes[nt2].add(nt) | |||
| def compute_follow_sets(self): | |||
| R = lambda nt: self.includes[nt] | |||
| G = lambda nt: self.read_sets[nt] | |||
| self.follow_sets = digraph(self.nonterminal_transitions, R, G) | |||
| def compute_lookaheads(self): | |||
| read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads) | |||
| follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets) | |||
| for nt, lookbacks in self.lookback.items(): | |||
| for state, rule in lookbacks: | |||
| for s in self.follow_sets[nt]: | |||
| for s in follow_sets[nt]: | |||
| state.lookaheads[s].add(rule) | |||
| def compute_lalr1_states(self): | |||
| m = {} | |||
| for state in self.states: | |||
| for state in self.lr0_states: | |||
| actions = {} | |||
| for la, next_state in state.transitions.items(): | |||
| actions[la] = (Shift, next_state.closure) | |||
| @@ -281,3 +279,10 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
| self.parse_table = self._parse_table | |||
| else: | |||
| self.parse_table = IntParseTable.from_ParseTable(self._parse_table) | |||
| def compute_lalr(self): | |||
| self.compute_lr0_states() | |||
| self.compute_reads_relations() | |||
| self.compute_includes_lookback() | |||
| self.compute_lookaheads() | |||
| self.compute_lalr1_states() | |||
| @@ -8,8 +8,6 @@ from ..utils import Enumerator, Serialize | |||
| from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | |||
| import time | |||
| ###{standalone | |||
| class LALR_Parser(object): | |||
| @@ -17,13 +15,7 @@ class LALR_Parser(object): | |||
| assert all(r.options is None or r.options.priority is None | |||
| for r in parser_conf.rules), "LALR doesn't yet support prioritization" | |||
| analysis = LALR_Analyzer(parser_conf, debug=debug) | |||
| analysis.compute_lr0_states() | |||
| analysis.compute_reads_relations() | |||
| analysis.compute_read_sets() | |||
| analysis.compute_includes_lookback() | |||
| analysis.compute_follow_sets() | |||
| analysis.compute_lookaheads() | |||
| analysis.compute_lalr1_states() | |||
| analysis.compute_lalr() | |||
| callbacks = parser_conf.callbacks | |||
| self._parse_table = analysis.parse_table | |||
| @@ -88,11 +80,6 @@ class _Parser: | |||
| state_stack.append(new_state) | |||
| value_stack.append(value) | |||
| if state_stack[-1] == end_state: | |||
| return True | |||
| return False | |||
| # Main LALR-parser loop | |||
| for token in stream: | |||
| while True: | |||
| @@ -111,7 +98,8 @@ class _Parser: | |||
| while True: | |||
| _action, arg = get_action(token) | |||
| assert(_action is Reduce) | |||
| if reduce(arg): | |||
| reduce(arg) | |||
| if state_stack[-1] == end_state: | |||
| return value_stack[-1] | |||
| ###} | |||