diff --git a/lark/grammar.py b/lark/grammar.py index d975a19..91435b2 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -71,7 +71,7 @@ class Rule(Serialize): expansion : a list of symbols order : index of this expansion amongst all rules of the same name """ - __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash', '_rp') + __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' __serialize_namespace__ = Terminal, NonTerminal, RuleOptions @@ -83,7 +83,6 @@ class Rule(Serialize): self.order = order self.options = options self._hash = hash((self.origin, tuple(self.expansion))) - self._rp = None def _deserialize(self): self._hash = hash((self.origin, tuple(self.expansion))) diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py index b32f62f..94c32cc 100644 --- a/lark/parsers/grammar_analysis.py +++ b/lark/parsers/grammar_analysis.py @@ -3,20 +3,16 @@ from collections import Counter, defaultdict from ..utils import bfs, fzset, classify from ..exceptions import GrammarError from ..grammar import Rule, Terminal, NonTerminal -import time -# optimizations were made so that there should never be two distinct equal RulePtrs -# to help with hashtable lookup class RulePtr(object): - __slots__ = ('rule', 'index', '_advance') + __slots__ = ('rule', 'index') def __init__(self, rule, index): assert isinstance(rule, Rule) assert index <= len(rule.expansion) self.rule = rule self.index = index - self._advance = None def __repr__(self): before = [x.name for x in self.rule.expansion[:self.index]] @@ -27,19 +23,19 @@ class RulePtr(object): def next(self): return self.rule.expansion[self.index] - # don't create duplicate RulePtrs def advance(self, sym): assert self.next == sym - a = self._advance - if a is None: - a = RulePtr(self.rule, self.index + 1) - self._advance = a - return a + return RulePtr(self.rule, self.index+1) @property def is_satisfied(self): return self.index == len(self.rule.expansion) + def __eq__(self, other): + return self.rule == other.rule and self.index == other.index + def __hash__(self): + return hash((self.rule, self.index)) + # state generation ensures no duplicate LR0ItemSets class LR0ItemSet(object): @@ -159,19 +155,11 @@ class GrammarAnalyzer(object): self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin) # cache RulePtr(r, 0) in r (no duplicate RulePtr objects) - for root_rule in lr0_root_rules.values(): - root_rule._rp = RulePtr(root_rule, 0) - self.lr0_start_states = {start: LR0ItemSet([root_rule._rp], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin)) + self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin)) for start, root_rule in lr0_root_rules.items()} self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) - self.nonterminal_transitions = [] - self.directly_reads = defaultdict(set) - self.reads = defaultdict(set) - self.includes = defaultdict(set) - self.lookback = defaultdict(set) - def expand_rule(self, source_rule, rules_by_origin=None): "Returns all init_ptrs accessible by rule (recursive)" @@ -183,11 +171,7 @@ class GrammarAnalyzer(object): assert not rule.is_term, rule for r in rules_by_origin[rule]: - # don't create duplicate RulePtr objects - init_ptr = r._rp - if init_ptr is None: - init_ptr = RulePtr(r, 0) - r._rp = init_ptr + init_ptr = RulePtr(r, 0) init_ptrs.add(init_ptr) if r.expansion: # if not empty rule diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index 4104713..4af2c24 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -15,8 +15,6 @@ from ..exceptions import GrammarError from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet from ..grammar import Rule -import time - ###{standalone class Action: @@ -115,8 +113,8 @@ def traverse(x, S, N, X, R, G, F): S.append(x) d = len(S) N[x] = d - F[x] = G(x) - for y in R(x): + F[x] = G[x] + for y in R[x]: if N[y] == 0: traverse(y, S, N, X, R, G, F) n_x = N[x] @@ -137,9 +135,17 @@ def traverse(x, S, N, X, R, G, F): class LALR_Analyzer(GrammarAnalyzer): + def __init__(self, parser_conf, debug=False): + GrammarAnalyzer.__init__(self, parser_conf, debug) + self.nonterminal_transitions = [] + self.directly_reads = defaultdict(set) + self.reads = defaultdict(set) + self.includes = defaultdict(set) + self.lookback = defaultdict(set) + def compute_lr0_states(self): - self.states = set() + self.lr0_states = set() # map of kernels to LR0ItemSets cache = {} @@ -161,7 +167,7 @@ class LALR_Analyzer(GrammarAnalyzer): state.transitions[sym] = new_state yield new_state - self.states.add(state) + self.lr0_states.add(state) for _ in bfs(self.lr0_start_states.values(), step): pass @@ -174,14 +180,14 @@ class LALR_Analyzer(GrammarAnalyzer): assert(rp.index == 0) self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ]) - for state in self.states: + for state in self.lr0_states: seen = set() for rp in state.closure: if rp.is_satisfied: continue s = rp.next # if s is a not a nonterminal - if not s in self.lr0_rules_by_origin: + if s not in self.lr0_rules_by_origin: continue if s in seen: continue @@ -201,11 +207,6 @@ class LALR_Analyzer(GrammarAnalyzer): if s2 in self.NULLABLE: r.add((next_state, s2)) - def compute_read_sets(self): - R = lambda nt: self.reads[nt] - G = lambda nt: self.directly_reads[nt] - self.read_sets = digraph(self.nonterminal_transitions, R, G) - def compute_includes_lookback(self): for nt in self.nonterminal_transitions: state, nonterminal = nt @@ -220,9 +221,8 @@ class LALR_Analyzer(GrammarAnalyzer): s = rp.rule.expansion[i] nt2 = (state2, s) state2 = state2.transitions[s] - if not nt2 in self.reads: + if nt2 not in self.reads: continue - j = i + 1 for j in range(i + 1, len(rp.rule.expansion)): if not rp.rule.expansion[j] in self.NULLABLE: break @@ -236,20 +236,18 @@ class LALR_Analyzer(GrammarAnalyzer): for nt2 in includes: self.includes[nt2].add(nt) - def compute_follow_sets(self): - R = lambda nt: self.includes[nt] - G = lambda nt: self.read_sets[nt] - self.follow_sets = digraph(self.nonterminal_transitions, R, G) - def compute_lookaheads(self): + read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads) + follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets) + for nt, lookbacks in self.lookback.items(): for state, rule in lookbacks: - for s in self.follow_sets[nt]: + for s in follow_sets[nt]: state.lookaheads[s].add(rule) def compute_lalr1_states(self): m = {} - for state in self.states: + for state in self.lr0_states: actions = {} for la, next_state in state.transitions.items(): actions[la] = (Shift, next_state.closure) @@ -281,3 +279,10 @@ class LALR_Analyzer(GrammarAnalyzer): self.parse_table = self._parse_table else: self.parse_table = IntParseTable.from_ParseTable(self._parse_table) + + def compute_lalr(self): + self.compute_lr0_states() + self.compute_reads_relations() + self.compute_includes_lookback() + self.compute_lookaheads() + self.compute_lalr1_states() \ No newline at end of file diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 657e795..82c8bba 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -8,8 +8,6 @@ from ..utils import Enumerator, Serialize from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable -import time - ###{standalone class LALR_Parser(object): @@ -17,13 +15,7 @@ class LALR_Parser(object): assert all(r.options is None or r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization" analysis = LALR_Analyzer(parser_conf, debug=debug) - analysis.compute_lr0_states() - analysis.compute_reads_relations() - analysis.compute_read_sets() - analysis.compute_includes_lookback() - analysis.compute_follow_sets() - analysis.compute_lookaheads() - analysis.compute_lalr1_states() + analysis.compute_lalr() callbacks = parser_conf.callbacks self._parse_table = analysis.parse_table @@ -88,11 +80,6 @@ class _Parser: state_stack.append(new_state) value_stack.append(value) - if state_stack[-1] == end_state: - return True - - return False - # Main LALR-parser loop for token in stream: while True: @@ -111,7 +98,8 @@ class _Parser: while True: _action, arg = get_action(token) assert(_action is Reduce) - if reduce(arg): + reduce(arg) + if state_stack[-1] == end_state: return value_stack[-1] ###}