diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 515f018..561bd8b 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -143,10 +143,10 @@ class CYK(WithLexer): self.init_traditional_lexer(lexer_conf) self._analysis = GrammarAnalyzer(parser_conf) - self._parser = cyk.Parser(self._analysis.rules, parser_conf.start) + self._parser = cyk.Parser(parser_conf.rules, parser_conf.start) self._postprocess = {} - for rule in self._analysis.rules: + for rule in parser_conf.rules: a = rule.alias self._postprocess[a] = a if callable(a) else (a and getattr(parser_conf.callback, a)) diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py index a8c7757..2c76eb0 100644 --- a/lark/parsers/grammar_analysis.py +++ b/lark/parsers/grammar_analysis.py @@ -1,5 +1,5 @@ -from ..utils import bfs, fzset +from ..utils import bfs, fzset, classify from ..common import GrammarError, is_terminal from ..grammar import Rule @@ -34,9 +34,6 @@ class RulePtr(object): return hash((self.rule, self.index)) -def pairs(lst): - return zip(lst[:-1], lst[1:]) - def update_set(set1, set2): copy = set(set1) set1 |= set2 @@ -68,6 +65,7 @@ def calculate_sets(rules): FIRST[sym]={sym} if is_terminal(sym) else set() FOLLOW[sym]=set() + # Calculate NULLABLE and FIRST changed = True while changed: changed = False @@ -81,6 +79,14 @@ def calculate_sets(rules): if set(rule.expansion[:i]) <= NULLABLE: if update_set(FIRST[rule.origin], FIRST[sym]): changed = True + + # Calculate FOLLOW + changed = True + while changed: + changed = False + + for rule in rules: + for i, sym in enumerate(rule.expansion): if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE: if update_set(FOLLOW[sym], FOLLOW[rule.origin]): changed = True @@ -95,29 +101,20 @@ def calculate_sets(rules): class GrammarAnalyzer(object): def __init__(self, parser_conf, debug=False): - rules = parser_conf.rules - assert len(rules) == len(set(rules)) - - self.start_symbol = parser_conf.start self.debug = debug - root_rule = Rule('$root', [self.start_symbol, '$END']) - - self.rules_by_origin = {r.origin: [] for r in rules} - for r in rules: - self.rules_by_origin[r.origin].append(r) - - self.rules_by_origin[root_rule.origin] = [root_rule] + rules = parser_conf.rules + [Rule('$root', [parser_conf.start, '$END'])] + self.rules_by_origin = classify(rules, lambda r: r.origin) + assert len(rules) == len(set(rules)) for r in rules: for sym in r.expansion: if not (is_terminal(sym) or sym in self.rules_by_origin): - raise GrammarError("Using an undefined rule: %s" % sym) + raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation self.start_state = self.expand_rule('$root') - self.rules = rules - self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules + [root_rule]) + self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) def expand_rule(self, rule): "Returns all init_ptrs accessible by rule (recursive)" diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index 6eb3fdf..e876fcf 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -77,7 +77,7 @@ class LALR_Analyzer(GrammarAnalyzer): lookahead[sym].append((Shift, new_state)) if sym == '$END': self.end_states.append( new_state ) - yield fzset(rps) + yield new_state for k, v in lookahead.items(): if len(v) > 1: diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index eafc4ea..0e2b110 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -14,7 +14,7 @@ class Parser: self.analysis = analysis = LALR_Analyzer(parser_conf) analysis.compute_lookahead() callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) - for rule in analysis.rules} + for rule in parser_conf.rules} self.parser_conf = parser_conf self.parser = _Parser(analysis.parse_table, callbacks)