@@ -2,7 +2,7 @@ import re | |||||
import sre_parse | import sre_parse | ||||
from .lexer import Lexer, ContextualLexer | from .lexer import Lexer, ContextualLexer | ||||
from .parsers.lalr_analysis import GrammarAnalyzer | |||||
from .parsers.lalr_analysis import LALR_Analyzer | |||||
from .common import is_terminal, GrammarError | from .common import is_terminal, GrammarError | ||||
from .parsers import lalr_parser, earley | from .parsers import lalr_parser, earley | ||||
@@ -24,8 +24,8 @@ class LALR(WithLexer): | |||||
WithLexer.__init__(self, lexer_conf) | WithLexer.__init__(self, lexer_conf) | ||||
self.parser_conf = parser_conf | self.parser_conf = parser_conf | ||||
analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start) | |||||
analyzer.analyze() | |||||
analyzer = LALR_Analyzer(parser_conf.rules, parser_conf.start) | |||||
analyzer.compute_lookahead() | |||||
self.parser = lalr_parser.Parser(analyzer, parser_conf.callback) | self.parser = lalr_parser.Parser(analyzer, parser_conf.callback) | ||||
def parse(self, text): | def parse(self, text): | ||||
@@ -37,8 +37,8 @@ class LALR_ContextualLexer: | |||||
self.lexer_conf = lexer_conf | self.lexer_conf = lexer_conf | ||||
self.parser_conf = parser_conf | self.parser_conf = parser_conf | ||||
self.analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start) | |||||
self.analyzer.analyze() | |||||
self.analyzer = LALR_Analyzer(parser_conf.rules, parser_conf.start) | |||||
self.analyzer.compute_lookahead() | |||||
d = {idx:t.keys() for idx, t in self.analyzer.states_idx.items()} | d = {idx:t.keys() for idx, t in self.analyzer.states_idx.items()} | ||||
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, | self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, | ||||
@@ -1,11 +1,7 @@ | |||||
import logging | |||||
from collections import defaultdict, deque | |||||
from ..utils import classify, classify_bool, bfs, fzset | |||||
from ..utils import bfs, fzset | |||||
from ..common import GrammarError, is_terminal | from ..common import GrammarError, is_terminal | ||||
ACTION_SHIFT = 0 | |||||
class Rule(object): | class Rule(object): | ||||
""" | """ | ||||
origin : a symbol | origin : a symbol | ||||
@@ -158,55 +154,3 @@ class GrammarAnalyzer(object): | |||||
else: | else: | ||||
return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)} | return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)} | ||||
def analyze(self): | |||||
self.states = {} | |||||
def step(state): | |||||
lookahead = defaultdict(list) | |||||
sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied) | |||||
for rp in sat: | |||||
for term in self.FOLLOW.get(rp.rule.origin, ()): | |||||
lookahead[term].append(('reduce', rp.rule)) | |||||
d = classify(unsat, lambda rp: rp.next) | |||||
for sym, rps in d.items(): | |||||
rps = {rp.advance(sym) for rp in rps} | |||||
for rp in set(rps): | |||||
if not rp.is_satisfied and not is_terminal(rp.next): | |||||
rps |= self.expand_rule(rp.next) | |||||
lookahead[sym].append(('shift', fzset(rps))) | |||||
yield fzset(rps) | |||||
for k, v in lookahead.items(): | |||||
if len(v) > 1: | |||||
if self.debug: | |||||
logging.warn("Shift/reduce conflict for %s: %s. Resolving as shift.", k, v) | |||||
for x in v: | |||||
# XXX resolving shift/reduce into shift, like PLY | |||||
# Give a proper warning | |||||
if x[0] == 'shift': | |||||
lookahead[k] = [x] | |||||
for k, v in lookahead.items(): | |||||
assert len(v) == 1, ("Collision", k, v) | |||||
self.states[state] = {k:v[0] for k, v in lookahead.items()} | |||||
for _ in bfs([self.init_state], step): | |||||
pass | |||||
# -- | |||||
self.enum = list(self.states) | |||||
self.enum_rev = {s:i for i,s in enumerate(self.enum)} | |||||
self.states_idx = {} | |||||
for s, la in self.states.items(): | |||||
la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift' | |||||
else (v[0], (v[1], len(v[1].expansion))) # Reduce | |||||
for k,v in la.items()} | |||||
self.states_idx[ self.enum_rev[s] ] = la | |||||
self.init_state_idx = self.enum_rev[self.init_state] |
@@ -1 +1,65 @@ | |||||
from grammar_analysis import GrammarAnalyzer, ACTION_SHIFT | |||||
import logging | |||||
from collections import defaultdict | |||||
from ..utils import classify, classify_bool, bfs, fzset | |||||
from ..common import GrammarError, is_terminal | |||||
from grammar_analysis import GrammarAnalyzer | |||||
ACTION_SHIFT = 0 | |||||
class LALR_Analyzer(GrammarAnalyzer): | |||||
def compute_lookahead(self): | |||||
self.states = {} | |||||
def step(state): | |||||
lookahead = defaultdict(list) | |||||
sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied) | |||||
for rp in sat: | |||||
for term in self.FOLLOW.get(rp.rule.origin, ()): | |||||
lookahead[term].append(('reduce', rp.rule)) | |||||
d = classify(unsat, lambda rp: rp.next) | |||||
for sym, rps in d.items(): | |||||
rps = {rp.advance(sym) for rp in rps} | |||||
for rp in set(rps): | |||||
if not rp.is_satisfied and not is_terminal(rp.next): | |||||
rps |= self.expand_rule(rp.next) | |||||
lookahead[sym].append(('shift', fzset(rps))) | |||||
yield fzset(rps) | |||||
for k, v in lookahead.items(): | |||||
if len(v) > 1: | |||||
if self.debug: | |||||
logging.warn("Shift/reduce conflict for %s: %s. Resolving as shift.", k, v) | |||||
for x in v: | |||||
# XXX resolving shift/reduce into shift, like PLY | |||||
# Give a proper warning | |||||
if x[0] == 'shift': | |||||
lookahead[k] = [x] | |||||
for k, v in lookahead.items(): | |||||
if not len(v) == 1: | |||||
raise GrammarError("Collision in %s: %s" %(k, v)) | |||||
self.states[state] = {k:v[0] for k, v in lookahead.items()} | |||||
for _ in bfs([self.init_state], step): | |||||
pass | |||||
# -- | |||||
self.enum = list(self.states) | |||||
self.enum_rev = {s:i for i,s in enumerate(self.enum)} | |||||
self.states_idx = {} | |||||
for s, la in self.states.items(): | |||||
la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift' | |||||
else (v[0], (v[1], len(v[1].expansion))) # Reduce | |||||
for k,v in la.items()} | |||||
self.states_idx[ self.enum_rev[s] ] = la | |||||
self.init_state_idx = self.enum_rev[self.init_state] |
@@ -1,6 +1,6 @@ | |||||
from .lalr_analysis import ACTION_SHIFT | |||||
from ..common import ParseError, UnexpectedToken | from ..common import ParseError, UnexpectedToken | ||||
from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT | |||||
class Parser(object): | class Parser(object): | ||||
def __init__(self, analysis, callback): | def __init__(self, analysis, callback): | ||||