@@ -197,14 +197,19 @@ class ContextualLexer: | |||
self.root_lexer = Lexer(tokens, ignore=ignore) | |||
def lex(self, stream, parser): | |||
self.set_parser_state(None) # Needs to be set on the outside | |||
def set_parser_state(self, state): | |||
self.parser_state = state | |||
def lex(self, stream): | |||
lex_pos = 0 | |||
line = 1 | |||
col_start_pos = 0 | |||
newline_types = list(self.root_lexer.newline_types) | |||
ignore_types = list(self.root_lexer.ignore_types) | |||
while True: | |||
lexer = self.lexers[parser.state] | |||
lexer = self.lexers[self.parser_state] | |||
for mre, type_from_index in lexer.mres: | |||
m = mre.match(stream, lex_pos) | |||
if m: | |||
@@ -2,7 +2,6 @@ import re | |||
import sre_parse | |||
from .lexer import Lexer, ContextualLexer | |||
from .parsers.lalr_analysis import LALR_Analyzer | |||
from .common import is_terminal, GrammarError | |||
from .parsers import lalr_parser, earley | |||
@@ -22,11 +21,9 @@ class WithLexer: | |||
class LALR(WithLexer): | |||
def __init__(self, lexer_conf, parser_conf): | |||
WithLexer.__init__(self, lexer_conf) | |||
self.parser_conf = parser_conf | |||
analyzer = LALR_Analyzer(parser_conf.rules, parser_conf.start) | |||
analyzer.compute_lookahead() | |||
self.parser = lalr_parser.Parser(analyzer, parser_conf.callback) | |||
self.parser_conf = parser_conf | |||
self.parser = lalr_parser.Parser(parser_conf) | |||
def parse(self, text): | |||
tokens = list(self.lex(text)) | |||
@@ -37,21 +34,19 @@ class LALR_ContextualLexer: | |||
self.lexer_conf = lexer_conf | |||
self.parser_conf = parser_conf | |||
self.analyzer = LALR_Analyzer(parser_conf.rules, parser_conf.start) | |||
self.analyzer.compute_lookahead() | |||
self.parser = lalr_parser.Parser(parser_conf) | |||
d = {idx:t.keys() for idx, t in self.analyzer.states_idx.items()} | |||
d = {idx:t.keys() for idx, t in self.parser.analysis.states_idx.items()} | |||
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, | |||
always_accept=lexer_conf.postlex.always_accept | |||
if lexer_conf.postlex else ()) | |||
def parse(self, text): | |||
parser = lalr_parser.Parser(self.analyzer, self.parser_conf.callback) | |||
tokens = self.lexer.lex(text, parser) | |||
tokens = self.lexer.lex(text) | |||
if self.lexer_conf.postlex: | |||
tokens = self.lexer_conf.postlex.process(tokens) | |||
return parser.parse(tokens, True) | |||
return self.parser.parse(tokens, self.lexer.set_parser_state) | |||
@@ -3,13 +3,13 @@ from ..common import ParseError, UnexpectedToken | |||
from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT | |||
class Parser(object): | |||
def __init__(self, analysis, callback): | |||
self.analysis = analysis | |||
self.callbacks = {rule: getattr(callback, rule.alias or rule.origin, None) | |||
for rule in analysis.rules} | |||
self.state = self.analysis.init_state_idx | |||
def __init__(self, parser_conf): | |||
self.analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start) | |||
self.analysis.compute_lookahead() | |||
self.callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) | |||
for rule in self.analysis.rules} | |||
def parse(self, seq, set_state=False): | |||
def parse(self, seq, set_state=None): | |||
i = 0 | |||
stream = iter(seq) | |||
states_idx = self.analysis.states_idx | |||
@@ -17,6 +17,8 @@ class Parser(object): | |||
state_stack = [self.analysis.init_state_idx] | |||
value_stack = [] | |||
if set_state: set_state(self.analysis.init_state_idx) | |||
def get_action(key): | |||
state = state_stack[-1] | |||
try: | |||
@@ -54,7 +56,7 @@ class Parser(object): | |||
if action == ACTION_SHIFT: | |||
state_stack.append(arg) | |||
value_stack.append(token) | |||
if set_state: self.state = arg | |||
if set_state: set_state(arg) | |||
token = next(stream) | |||
i += 1 | |||
else: | |||