diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 54b67bb..9e5c248 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -2,7 +2,7 @@ import re import sre_parse from .lexer import Lexer, ContextualLexer -from .parsers.lalr_analysis import GrammarAnalyzer +from .parsers.lalr_analysis import LALR_Analyzer from .common import is_terminal, GrammarError from .parsers import lalr_parser, earley @@ -24,8 +24,8 @@ class LALR(WithLexer): WithLexer.__init__(self, lexer_conf) self.parser_conf = parser_conf - analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start) - analyzer.analyze() + analyzer = LALR_Analyzer(parser_conf.rules, parser_conf.start) + analyzer.compute_lookahead() self.parser = lalr_parser.Parser(analyzer, parser_conf.callback) def parse(self, text): @@ -37,8 +37,8 @@ class LALR_ContextualLexer: self.lexer_conf = lexer_conf self.parser_conf = parser_conf - self.analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start) - self.analyzer.analyze() + self.analyzer = LALR_Analyzer(parser_conf.rules, parser_conf.start) + self.analyzer.compute_lookahead() d = {idx:t.keys() for idx, t in self.analyzer.states_idx.items()} self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py index bdd6c73..d51700a 100644 --- a/lark/parsers/grammar_analysis.py +++ b/lark/parsers/grammar_analysis.py @@ -1,11 +1,7 @@ -import logging -from collections import defaultdict, deque -from ..utils import classify, classify_bool, bfs, fzset +from ..utils import bfs, fzset from ..common import GrammarError, is_terminal -ACTION_SHIFT = 0 - class Rule(object): """ origin : a symbol @@ -158,55 +154,3 @@ class GrammarAnalyzer(object): else: return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)} - def analyze(self): - - self.states = {} - def step(state): - lookahead = defaultdict(list) - sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied) - for rp in sat: - for term in self.FOLLOW.get(rp.rule.origin, ()): - lookahead[term].append(('reduce', rp.rule)) - - d = classify(unsat, lambda rp: rp.next) - for sym, rps in d.items(): - rps = {rp.advance(sym) for rp in rps} - - for rp in set(rps): - if not rp.is_satisfied and not is_terminal(rp.next): - rps |= self.expand_rule(rp.next) - - lookahead[sym].append(('shift', fzset(rps))) - yield fzset(rps) - - for k, v in lookahead.items(): - if len(v) > 1: - if self.debug: - logging.warn("Shift/reduce conflict for %s: %s. Resolving as shift.", k, v) - for x in v: - # XXX resolving shift/reduce into shift, like PLY - # Give a proper warning - if x[0] == 'shift': - lookahead[k] = [x] - - for k, v in lookahead.items(): - assert len(v) == 1, ("Collision", k, v) - - self.states[state] = {k:v[0] for k, v in lookahead.items()} - - for _ in bfs([self.init_state], step): - pass - - # -- - self.enum = list(self.states) - self.enum_rev = {s:i for i,s in enumerate(self.enum)} - self.states_idx = {} - - for s, la in self.states.items(): - la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift' - else (v[0], (v[1], len(v[1].expansion))) # Reduce - for k,v in la.items()} - self.states_idx[ self.enum_rev[s] ] = la - - - self.init_state_idx = self.enum_rev[self.init_state] diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index 4384ca8..8a8365d 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -1 +1,65 @@ -from grammar_analysis import GrammarAnalyzer, ACTION_SHIFT +import logging +from collections import defaultdict + +from ..utils import classify, classify_bool, bfs, fzset +from ..common import GrammarError, is_terminal + +from grammar_analysis import GrammarAnalyzer + +ACTION_SHIFT = 0 + +class LALR_Analyzer(GrammarAnalyzer): + + def compute_lookahead(self): + + self.states = {} + def step(state): + lookahead = defaultdict(list) + sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied) + for rp in sat: + for term in self.FOLLOW.get(rp.rule.origin, ()): + lookahead[term].append(('reduce', rp.rule)) + + d = classify(unsat, lambda rp: rp.next) + for sym, rps in d.items(): + rps = {rp.advance(sym) for rp in rps} + + for rp in set(rps): + if not rp.is_satisfied and not is_terminal(rp.next): + rps |= self.expand_rule(rp.next) + + lookahead[sym].append(('shift', fzset(rps))) + yield fzset(rps) + + for k, v in lookahead.items(): + if len(v) > 1: + if self.debug: + logging.warn("Shift/reduce conflict for %s: %s. Resolving as shift.", k, v) + for x in v: + # XXX resolving shift/reduce into shift, like PLY + # Give a proper warning + if x[0] == 'shift': + lookahead[k] = [x] + + for k, v in lookahead.items(): + if not len(v) == 1: + raise GrammarError("Collision in %s: %s" %(k, v)) + + self.states[state] = {k:v[0] for k, v in lookahead.items()} + + for _ in bfs([self.init_state], step): + pass + + # -- + self.enum = list(self.states) + self.enum_rev = {s:i for i,s in enumerate(self.enum)} + self.states_idx = {} + + for s, la in self.states.items(): + la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift' + else (v[0], (v[1], len(v[1].expansion))) # Reduce + for k,v in la.items()} + self.states_idx[ self.enum_rev[s] ] = la + + + self.init_state_idx = self.enum_rev[self.init_state] diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 313d808..3280d01 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -1,6 +1,6 @@ -from .lalr_analysis import ACTION_SHIFT from ..common import ParseError, UnexpectedToken +from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT class Parser(object): def __init__(self, analysis, callback):