@@ -38,7 +38,7 @@ class LALR_ContextualLexer: | |||||
self.parser = lalr_parser.Parser(parser_conf) | self.parser = lalr_parser.Parser(parser_conf) | ||||
d = {idx:t.keys() for idx, t in self.parser.analysis.states_idx.items()} | |||||
d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()} | |||||
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () | always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () | ||||
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept) | self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept) | ||||
@@ -125,7 +125,7 @@ class GrammarAnalyzer(object): | |||||
if not (is_terminal(sym) or sym in self.rules_by_origin): | if not (is_terminal(sym) or sym in self.rules_by_origin): | ||||
raise GrammarError("Using an undefined rule: %s" % sym) | raise GrammarError("Using an undefined rule: %s" % sym) | ||||
self.init_state = self.expand_rule('$root') | |||||
self.start_state = self.expand_rule('$root') | |||||
self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules) | self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules) | ||||
@@ -14,7 +14,41 @@ from ..common import GrammarError, is_terminal | |||||
from .grammar_analysis import GrammarAnalyzer | from .grammar_analysis import GrammarAnalyzer | ||||
ACTION_SHIFT = 0 | |||||
class Action: | |||||
def __str__(self): | |||||
return self.__name__ | |||||
def __repr__(self): | |||||
return str(self) | |||||
class Shift(Action): pass | |||||
class Reduce(Action): pass | |||||
class ParseTable: | |||||
def __init__(self, states, start_state, end_state): | |||||
self.states = states | |||||
self.start_state = start_state | |||||
self.end_state = end_state | |||||
class IntParseTable(ParseTable): | |||||
@classmethod | |||||
def from_ParseTable(cls, parse_table): | |||||
enum = list(parse_table.states) | |||||
state_to_idx = {s:i for i,s in enumerate(enum)} | |||||
int_states = {} | |||||
for s, la in parse_table.states.items(): | |||||
la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v | |||||
for k,v in la.items()} | |||||
int_states[ state_to_idx[s] ] = la | |||||
start_state = state_to_idx[parse_table.start_state] | |||||
end_state = state_to_idx[parse_table.end_state] | |||||
return cls(int_states, start_state, end_state) | |||||
class LALR_Analyzer(GrammarAnalyzer): | class LALR_Analyzer(GrammarAnalyzer): | ||||
@@ -27,7 +61,7 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied) | sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied) | ||||
for rp in sat: | for rp in sat: | ||||
for term in self.FOLLOW.get(rp.rule.origin, ()): | for term in self.FOLLOW.get(rp.rule.origin, ()): | ||||
lookahead[term].append(('reduce', rp.rule)) | |||||
lookahead[term].append((Reduce, rp.rule)) | |||||
d = classify(unsat, lambda rp: rp.next) | d = classify(unsat, lambda rp: rp.next) | ||||
for sym, rps in d.items(): | for sym, rps in d.items(): | ||||
@@ -38,7 +72,7 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
rps |= self.expand_rule(rp.next) | rps |= self.expand_rule(rp.next) | ||||
new_state = fzset(rps) | new_state = fzset(rps) | ||||
lookahead[sym].append(('shift', new_state)) | |||||
lookahead[sym].append((Shift, new_state)) | |||||
if sym == '$end': | if sym == '$end': | ||||
self.end_states.append( new_state ) | self.end_states.append( new_state ) | ||||
yield fzset(rps) | yield fzset(rps) | ||||
@@ -50,7 +84,7 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
for x in v: | for x in v: | ||||
# XXX resolving shift/reduce into shift, like PLY | # XXX resolving shift/reduce into shift, like PLY | ||||
# Give a proper warning | # Give a proper warning | ||||
if x[0] == 'shift': | |||||
if x[0] is Shift: | |||||
lookahead[k] = [x] | lookahead[k] = [x] | ||||
for k, v in lookahead.items(): | for k, v in lookahead.items(): | ||||
@@ -59,22 +93,15 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
self.states[state] = {k:v[0] for k, v in lookahead.items()} | self.states[state] = {k:v[0] for k, v in lookahead.items()} | ||||
for _ in bfs([self.init_state], step): | |||||
for _ in bfs([self.start_state], step): | |||||
pass | pass | ||||
self.end_state ,= self.end_states | self.end_state ,= self.end_states | ||||
# -- | |||||
self.enum = list(self.states) | |||||
self.enum_rev = {s:i for i,s in enumerate(self.enum)} | |||||
self.states_idx = {} | |||||
for s, la in self.states.items(): | |||||
la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift' | |||||
else (v[0], (v[1], len(v[1].expansion))) # Reduce | |||||
for k,v in la.items()} | |||||
self.states_idx[ self.enum_rev[s] ] = la | |||||
self._parse_table = ParseTable(self.states, self.start_state, self.end_state) | |||||
if self.debug: | |||||
self.parse_table = self._parse_table | |||||
else: | |||||
self.parse_table = IntParseTable.from_ParseTable(self._parse_table) | |||||
self.init_state_idx = self.enum_rev[self.init_state] | |||||
self.end_state_idx = self.enum_rev[self.end_state] |
@@ -5,7 +5,7 @@ | |||||
from ..common import ParseError, UnexpectedToken | from ..common import ParseError, UnexpectedToken | ||||
from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT | |||||
from .lalr_analysis import LALR_Analyzer, Shift | |||||
class FinalReduce: | class FinalReduce: | ||||
def __init__(self, value): | def __init__(self, value): | ||||
@@ -19,14 +19,14 @@ class Parser: | |||||
callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) | callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) | ||||
for rule in analysis.rules} | for rule in analysis.rules} | ||||
self.parser = _Parser(analysis.states_idx, analysis.init_state_idx, analysis.end_state_idx, callbacks) | |||||
self.parser = _Parser(analysis.parse_table, callbacks) | |||||
self.parse = self.parser.parse | self.parse = self.parser.parse | ||||
class _Parser: | class _Parser: | ||||
def __init__(self, states, init_state, end_state, callbacks): | |||||
self.states = states | |||||
self.init_state = init_state | |||||
self.end_state = end_state | |||||
def __init__(self, parse_table, callbacks): | |||||
self.states = parse_table.states | |||||
self.start_state = parse_table.start_state | |||||
self.end_state = parse_table.end_state | |||||
self.callbacks = callbacks | self.callbacks = callbacks | ||||
def parse(self, seq, set_state=None): | def parse(self, seq, set_state=None): | ||||
@@ -35,10 +35,10 @@ class _Parser: | |||||
stream = iter(seq) | stream = iter(seq) | ||||
states = self.states | states = self.states | ||||
state_stack = [self.init_state] | |||||
state_stack = [self.start_state] | |||||
value_stack = [] | value_stack = [] | ||||
if set_state: set_state(self.init_state) | |||||
if set_state: set_state(self.start_state) | |||||
def get_action(key): | def get_action(key): | ||||
state = state_stack[-1] | state = state_stack[-1] | ||||
@@ -49,7 +49,8 @@ class _Parser: | |||||
raise UnexpectedToken(token, expected, seq, i) | raise UnexpectedToken(token, expected, seq, i) | ||||
def reduce(rule, size): | |||||
def reduce(rule): | |||||
size = len(rule.expansion) | |||||
if size: | if size: | ||||
s = value_stack[-size:] | s = value_stack[-size:] | ||||
del state_stack[-size:] | del state_stack[-size:] | ||||
@@ -60,7 +61,7 @@ class _Parser: | |||||
value = self.callbacks[rule](s) | value = self.callbacks[rule](s) | ||||
_action, new_state = get_action(rule.origin) | _action, new_state = get_action(rule.origin) | ||||
assert _action == ACTION_SHIFT | |||||
assert _action is Shift | |||||
state_stack.append(new_state) | state_stack.append(new_state) | ||||
value_stack.append(value) | value_stack.append(value) | ||||
@@ -72,22 +73,22 @@ class _Parser: | |||||
action, arg = get_action(token.type) | action, arg = get_action(token.type) | ||||
assert arg != self.end_state | assert arg != self.end_state | ||||
if action == ACTION_SHIFT: | |||||
if action is Shift: | |||||
state_stack.append(arg) | state_stack.append(arg) | ||||
value_stack.append(token) | value_stack.append(token) | ||||
if set_state: set_state(arg) | if set_state: set_state(arg) | ||||
token = next(stream) | token = next(stream) | ||||
i += 1 | i += 1 | ||||
else: | else: | ||||
reduce(*arg) | |||||
reduce(arg) | |||||
except StopIteration: | except StopIteration: | ||||
pass | pass | ||||
while True: | while True: | ||||
_action, arg = get_action('$end') | _action, arg = get_action('$end') | ||||
if _action == ACTION_SHIFT: | |||||
if _action is Shift: | |||||
assert arg == self.end_state | assert arg == self.end_state | ||||
val ,= value_stack | val ,= value_stack | ||||
return val | return val | ||||
else: | else: | ||||
reduce(*arg) | |||||
reduce(arg) |