From e4d3e74f6ae4431b43fd338b48b2389bd185ff35 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Mon, 20 Feb 2017 00:36:59 +0200 Subject: [PATCH] Finished refactoring --- lark/lexer.py | 9 +++++++-- lark/parser_frontends.py | 17 ++++++----------- lark/parsers/lalr_parser.py | 16 +++++++++------- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/lark/lexer.py b/lark/lexer.py index 301d555..db5dde7 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -197,14 +197,19 @@ class ContextualLexer: self.root_lexer = Lexer(tokens, ignore=ignore) - def lex(self, stream, parser): + self.set_parser_state(None) # Needs to be set on the outside + + def set_parser_state(self, state): + self.parser_state = state + + def lex(self, stream): lex_pos = 0 line = 1 col_start_pos = 0 newline_types = list(self.root_lexer.newline_types) ignore_types = list(self.root_lexer.ignore_types) while True: - lexer = self.lexers[parser.state] + lexer = self.lexers[self.parser_state] for mre, type_from_index in lexer.mres: m = mre.match(stream, lex_pos) if m: diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 9e5c248..1c46d35 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -2,7 +2,6 @@ import re import sre_parse from .lexer import Lexer, ContextualLexer -from .parsers.lalr_analysis import LALR_Analyzer from .common import is_terminal, GrammarError from .parsers import lalr_parser, earley @@ -22,11 +21,9 @@ class WithLexer: class LALR(WithLexer): def __init__(self, lexer_conf, parser_conf): WithLexer.__init__(self, lexer_conf) - self.parser_conf = parser_conf - analyzer = LALR_Analyzer(parser_conf.rules, parser_conf.start) - analyzer.compute_lookahead() - self.parser = lalr_parser.Parser(analyzer, parser_conf.callback) + self.parser_conf = parser_conf + self.parser = lalr_parser.Parser(parser_conf) def parse(self, text): tokens = list(self.lex(text)) @@ -37,21 +34,19 @@ class LALR_ContextualLexer: self.lexer_conf = lexer_conf self.parser_conf = parser_conf - self.analyzer = LALR_Analyzer(parser_conf.rules, parser_conf.start) - self.analyzer.compute_lookahead() + self.parser = lalr_parser.Parser(parser_conf) - d = {idx:t.keys() for idx, t in self.analyzer.states_idx.items()} + d = {idx:t.keys() for idx, t in self.parser.analysis.states_idx.items()} self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=lexer_conf.postlex.always_accept if lexer_conf.postlex else ()) def parse(self, text): - parser = lalr_parser.Parser(self.analyzer, self.parser_conf.callback) - tokens = self.lexer.lex(text, parser) + tokens = self.lexer.lex(text) if self.lexer_conf.postlex: tokens = self.lexer_conf.postlex.process(tokens) - return parser.parse(tokens, True) + return self.parser.parse(tokens, self.lexer.set_parser_state) diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 3280d01..7394f91 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -3,13 +3,13 @@ from ..common import ParseError, UnexpectedToken from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT class Parser(object): - def __init__(self, analysis, callback): - self.analysis = analysis - self.callbacks = {rule: getattr(callback, rule.alias or rule.origin, None) - for rule in analysis.rules} - self.state = self.analysis.init_state_idx + def __init__(self, parser_conf): + self.analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start) + self.analysis.compute_lookahead() + self.callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) + for rule in self.analysis.rules} - def parse(self, seq, set_state=False): + def parse(self, seq, set_state=None): i = 0 stream = iter(seq) states_idx = self.analysis.states_idx @@ -17,6 +17,8 @@ class Parser(object): state_stack = [self.analysis.init_state_idx] value_stack = [] + if set_state: set_state(self.analysis.init_state_idx) + def get_action(key): state = state_stack[-1] try: @@ -54,7 +56,7 @@ class Parser(object): if action == ACTION_SHIFT: state_stack.append(arg) value_stack.append(token) - if set_state: self.state = arg + if set_state: set_state(arg) token = next(stream) i += 1 else: