From 284dfe7fd3ea77c4c3bdfe7cecbdf3ec526d1ad8 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Wed, 25 Nov 2020 17:10:12 +0200 Subject: [PATCH 1/4] Refactored parser_frontends. Now significantly simpler --- lark/common.py | 12 +- lark/load_grammar.py | 7 +- lark/parser_frontends.py | 300 +++++++++++++++++------------------- lark/parsers/lalr_parser.py | 3 +- lark/utils.py | 1 + 5 files changed, 156 insertions(+), 167 deletions(-) diff --git a/lark/common.py b/lark/common.py index 4bf04ec..efbab01 100644 --- a/lark/common.py +++ b/lark/common.py @@ -5,7 +5,7 @@ from .lexer import TerminalDef class LexerConf(Serialize): - __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes' + __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes', 'name' __serialize_namespace__ = TerminalDef, def __init__(self, tokens, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False): @@ -18,12 +18,18 @@ class LexerConf(Serialize): self.skip_validation = skip_validation self.use_bytes = use_bytes -###} + self.name = None + +class ParserConf(Serialize): + __serialize_fields__ = 'rules', 'start', 'name' -class ParserConf: def __init__(self, rules, callbacks, start): assert isinstance(start, list) self.rules = rules self.callbacks = callbacks self.start = start + + self.name = None + +###} diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 70fd7eb..36bf849 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -11,7 +11,7 @@ from .utils import bfs, Py36, logger, classify_bool from .lexer import Token, TerminalDef, PatternStr, PatternRE from .parse_tree_builder import ParseTreeBuilder -from .parser_frontends import LALR_TraditionalLexer +from .parser_frontends import ParsingFrontend from .common import LexerConf, ParserConf from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol from .utils import classify, suppress, dedup_list, Str @@ -883,9 +883,10 @@ class GrammarLoader: callback = ParseTreeBuilder(rules, ST).create_callback() import re lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT']) - parser_conf = ParserConf(rules, callback, ['start']) - self.parser = LALR_TraditionalLexer(lexer_conf, parser_conf) + lexer_conf.name = 'standard' + parser_conf.name = 'lalr' + self.parser = ParsingFrontend(lexer_conf, parser_conf, {}) self.canonize_tree = CanonizeTree() self.global_keep_all_tokens = global_keep_all_tokens diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 5d32589..4061811 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -4,9 +4,8 @@ from .parsers.grammar_analysis import GrammarAnalyzer from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef from .parsers import earley, xearley, cyk from .parsers.lalr_parser import LALR_Parser -from .grammar import Rule from .tree import Tree -from .common import LexerConf +from .common import LexerConf, ParserConf try: import regex except ImportError: @@ -27,56 +26,112 @@ def _wrap_lexer(lexer_class): return self.lexer.lex(lexer_state.text) return CustomLexerWrapper + +class MakeParsingFrontend: + def __init__(self, parser, lexer): + self.parser = parser + self.lexer = lexer + + def __call__(self, lexer_conf, parser_conf, options): + assert isinstance(lexer_conf, LexerConf) + assert isinstance(parser_conf, ParserConf) + parser_conf.name = self.parser + lexer_conf.name = self.lexer + return ParsingFrontend(lexer_conf, parser_conf, options) + + @classmethod + def deserialize(cls, data, memo, callbacks, options): + lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo) + parser_conf = ParserConf.deserialize(data['parser_conf'], memo) + parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug) + parser_conf.callbacks = callbacks + + terminals = [item for item in memo.values() if isinstance(item, TerminalDef)] + + lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals) + lexer_conf.re_module = regex if options.regex else re + lexer_conf.use_bytes = options.use_bytes + lexer_conf.g_regex_flags = options.g_regex_flags + lexer_conf.skip_validation = True + lexer_conf.postlex = options.postlex + + return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) + + + + +class ParsingFrontend(Serialize): + __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser', 'options' + + def __init__(self, lexer_conf, parser_conf, options, parser=None): + self.parser_conf = parser_conf + self.lexer_conf = lexer_conf + self.options = options + + # Set-up parser + if parser: # From cache + self.parser = parser + else: + create_parser = { + 'lalr': create_lalr_parser, + 'earley': make_early, + 'cyk': CYK_FrontEnd, + }[parser_conf.name] + self.parser = create_parser(lexer_conf, parser_conf, options) + + # Set-up lexer + self.skip_lexer = False + if lexer_conf.name in ('dynamic', 'dynamic_complete'): + self.skip_lexer = True + return + + try: + create_lexer = { + 'standard': create_traditional_lexer, + 'contextual': create_contextual_lexer, + }[lexer_conf.name] + except KeyError: + assert issubclass(lexer_conf.name, Lexer), lexer_conf.name + self.lexer = _wrap_lexer(lexer_conf.name)(lexer_conf) + else: + self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex) + + if lexer_conf.postlex: + self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex) + + + def _parse(self, start, input, *args): + if start is None: + start = self.parser_conf.start + if len(start) > 1: + raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) + start ,= start + return self.parser.parse(input, start, *args) + + def parse(self, text, start=None): + if self.skip_lexer: + return self._parse(start, text) + + lexer = LexerThread(self.lexer, text) + return self._parse(start, lexer) + + def get_frontend(parser, lexer): if parser=='lalr': if lexer is None: raise ConfigurationError('The LALR parser requires use of a lexer') - elif lexer == 'standard': - return LALR_TraditionalLexer - elif lexer == 'contextual': - return LALR_ContextualLexer - elif issubclass(lexer, Lexer): - wrapped = _wrap_lexer(lexer) - class LALR_CustomLexerWrapper(LALR_WithLexer): - def init_lexer(self): - self.lexer = wrapped(self.lexer_conf) - return LALR_CustomLexerWrapper - else: + if lexer not in ('standard' ,'contextual') and not issubclass(lexer, Lexer): raise ConfigurationError('Unknown lexer: %s' % lexer) elif parser=='earley': - if lexer=='standard': - return Earley_Traditional - elif lexer=='dynamic': - return XEarley - elif lexer=='dynamic_complete': - return XEarley_CompleteLex - elif lexer=='contextual': + if lexer=='contextual': raise ConfigurationError('The Earley parser does not support the contextual parser') - elif issubclass(lexer, Lexer): - wrapped = _wrap_lexer(lexer) - class Earley_CustomLexerWrapper(Earley_WithLexer): - def init_lexer(self, **kw): - self.lexer = wrapped(self.lexer_conf) - return Earley_CustomLexerWrapper - else: - raise ConfigurationError('Unknown lexer: %s' % lexer) elif parser == 'cyk': - if lexer == 'standard': - return CYK - else: + if lexer != 'standard': raise ConfigurationError('CYK parser requires using standard parser.') else: raise ConfigurationError('Unknown parser: %s' % parser) - -class _ParserFrontend(Serialize): - def _parse(self, start, input, *args): - if start is None: - start = self.start - if len(start) > 1: - raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) - start ,= start - return self.parser.parse(input, start, *args) + return MakeParsingFrontend(parser, lexer) def _get_lexer_callbacks(transformer, terminals): @@ -100,119 +155,26 @@ class PostLexConnector: return self.postlexer.process(i) -class WithLexer(_ParserFrontend): - lexer = None - parser = None - lexer_conf = None - start = None - - __serialize_fields__ = 'parser', 'lexer_conf', 'start' - __serialize_namespace__ = LexerConf, - - def __init__(self, lexer_conf, parser_conf, options=None): - self.lexer_conf = lexer_conf - self.start = parser_conf.start - self.postlex = lexer_conf.postlex - - @classmethod - def deserialize(cls, data, memo, callbacks, options): - inst = super(WithLexer, cls).deserialize(data, memo) - - inst.postlex = options.postlex - inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks, options.debug) - - terminals = [item for item in memo.values() if isinstance(item, TerminalDef)] - inst.lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals) - inst.lexer_conf.re_module = regex if options.regex else re - inst.lexer_conf.use_bytes = options.use_bytes - inst.lexer_conf.g_regex_flags = options.g_regex_flags - inst.lexer_conf.skip_validation = True - inst.init_lexer() - - return inst - - def _serialize(self, data, memo): - data['parser'] = data['parser'].serialize(memo) - - def make_lexer(self, text): - lexer = self.lexer - if self.postlex: - lexer = PostLexConnector(self.lexer, self.postlex) - return LexerThread(lexer, text) - - def parse(self, text, start=None): - return self._parse(start, self.make_lexer(text)) - - def init_traditional_lexer(self): - self.lexer = TraditionalLexer(self.lexer_conf) -class LALR_WithLexer(WithLexer): - def __init__(self, lexer_conf, parser_conf, options=None): - debug = options.debug if options else False - self.parser = LALR_Parser(parser_conf, debug=debug) - WithLexer.__init__(self, lexer_conf, parser_conf, options) +def create_traditional_lexer(lexer_conf, parser, postlex): + return TraditionalLexer(lexer_conf) - self.init_lexer() +def create_contextual_lexer(lexer_conf, parser, postlex): + states = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()} + always_accept = postlex.always_accept if postlex else () + return ContextualLexer(lexer_conf, states, always_accept=always_accept) - def init_lexer(self, **kw): - raise NotImplementedError() +def create_lalr_parser(lexer_conf, parser_conf, options=None): + debug = options.debug if options else False + return LALR_Parser(parser_conf, debug=debug) -class LALR_TraditionalLexer(LALR_WithLexer): - def init_lexer(self): - self.init_traditional_lexer() - -class LALR_ContextualLexer(LALR_WithLexer): - def init_lexer(self): - states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()} - always_accept = self.postlex.always_accept if self.postlex else () - self.lexer = ContextualLexer(self.lexer_conf, states, always_accept=always_accept) +make_early = NotImplemented +CYK_FrontEnd = NotImplemented ###} - -class Earley_WithLexer(WithLexer): - def __init__(self, lexer_conf, parser_conf, options=None): - WithLexer.__init__(self, lexer_conf, parser_conf, options) - self.init_lexer() - - resolve_ambiguity = options.ambiguity == 'resolve' - debug = options.debug if options else False - tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None - self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class) - - def match(self, term, token): - return term.name == token.type - - def init_lexer(self, **kw): - raise NotImplementedError() - -class Earley_Traditional(Earley_WithLexer): - def init_lexer(self, **kw): - self.init_traditional_lexer() - - -class XEarley(_ParserFrontend): - def __init__(self, lexer_conf, parser_conf, options=None, **kw): - self.token_by_name = {t.name:t for t in lexer_conf.tokens} - self.start = parser_conf.start - - self._prepare_match(lexer_conf) - resolve_ambiguity = options.ambiguity == 'resolve' - debug = options.debug if options else False - tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None - self.parser = xearley.Parser(parser_conf, - self.match, - ignore=lexer_conf.ignore, - resolve_ambiguity=resolve_ambiguity, - debug=debug, - tree_class=tree_class, - **kw - ) - - def match(self, term, text, index=0): - return self.regexps[term.name].match(text, index) - - def _prepare_match(self, lexer_conf): +class EarleyRegexpMatcher: + def __init__(self, lexer_conf): self.regexps = {} for t in lexer_conf.tokens: if t.priority != 1: @@ -230,31 +192,49 @@ class XEarley(_ParserFrontend): self.regexps[t.name] = lexer_conf.re_module.compile(regexp, lexer_conf.g_regex_flags) - def parse(self, text, start): - return self._parse(start, text) + def match(self, term, text, index=0): + return self.regexps[term.name].match(text, index) -class XEarley_CompleteLex(XEarley): - def __init__(self, *args, **kw): - XEarley.__init__(self, *args, complete_lex=True, **kw) +def make_xearley(lexer_conf, parser_conf, options=None, **kw): + earley_matcher = EarleyRegexpMatcher(lexer_conf) + return xearley.Parser(parser_conf, earley_matcher.match, ignore=lexer_conf.ignore, **kw) +def _match_earley_basic(term, token): + return term.name == token.type -class CYK(WithLexer): +def make_early_basic(lexer_conf, parser_conf, options, **kw): + return earley.Parser(parser_conf, _match_earley_basic, **kw) - def __init__(self, lexer_conf, parser_conf, options=None): - WithLexer.__init__(self, lexer_conf, parser_conf, options) - self.init_traditional_lexer() +def make_early(lexer_conf, parser_conf, options): + resolve_ambiguity = options.ambiguity == 'resolve' + debug = options.debug if options else False + tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None + + extra = {} + if lexer_conf.name == 'dynamic': + f = make_xearley + elif lexer_conf.name == 'dynamic_complete': + extra['complete_lex'] =True + f = make_xearley + else: + f = make_early_basic + return f(lexer_conf, parser_conf, options, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra) + + + +class CYK_FrontEnd: + def __init__(self, lexer_conf, parser_conf, options=None): self._analysis = GrammarAnalyzer(parser_conf) self.parser = cyk.Parser(parser_conf.rules) self.callbacks = parser_conf.callbacks - def parse(self, text, start): - tokens = list(self.make_lexer(text).lex(None)) - parse = self._parse(start, tokens) - parse = self._transform(parse) - return parse + def parse(self, lexer, start): + tokens = list(lexer.lex(None)) + tree = self.parser.parse(tokens, start) + return self._transform(tree) def _transform(self, tree): subtrees = list(tree.iter_subtrees()) diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 3d006e7..f7ff8fe 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -5,13 +5,14 @@ from copy import deepcopy, copy from ..exceptions import UnexpectedInput, UnexpectedToken from ..lexer import Token +from ..utils import Serialize from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable from .lalr_puppet import ParserPuppet ###{standalone -class LALR_Parser(object): +class LALR_Parser(Serialize): def __init__(self, parser_conf, debug=False): analysis = LALR_Analyzer(parser_conf, debug=debug) analysis.compute_lalr() diff --git a/lark/utils.py b/lark/utils.py index 366922b..3b5b8a8 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -302,4 +302,5 @@ def _serialize(value, memo): return list(value) # TODO reversible? elif isinstance(value, dict): return {key:_serialize(elem, memo) for key, elem in value.items()} + # assert value is None or isinstance(value, (int, float, str, tuple)), value return value From 712df517b22e0394f6321b90d8e1fd4e29b7fdfb Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Thu, 26 Nov 2020 16:22:18 +0200 Subject: [PATCH 2/4] Fixes for PR. Custom lexer now works with CYK --- lark/exceptions.py | 7 +++++++ lark/lark.py | 6 +----- lark/parser_frontends.py | 39 +++++++++++++++++---------------------- tests/test_parser.py | 1 + 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/lark/exceptions.py b/lark/exceptions.py index 72f6c6f..46740ed 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -11,6 +11,11 @@ class ConfigurationError(LarkError, ValueError): pass +def assert_config(value, options, msg='Got %r, expected one of %s'): + if value not in options: + raise ConfigurationError(msg % (value, options)) + + class GrammarError(LarkError): pass @@ -198,4 +203,6 @@ class VisitError(LarkError): message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc) super(VisitError, self).__init__(message) + + ###} diff --git a/lark/lark.py b/lark/lark.py index b94f26b..842df5f 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -1,5 +1,5 @@ from __future__ import absolute_import -from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken, ConfigurationError +from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken, ConfigurationError, assert_config import sys, os, pickle, hashlib from io import open @@ -24,10 +24,6 @@ except ImportError: ###{standalone -def assert_config(value, options, msg='Got %r, expected one of %s'): - if value not in options: - raise ConfigurationError(msg % (value, options)) - class LarkOptions(Serialize): """Specifies the options for Lark diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 4061811..e329dfa 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -1,4 +1,4 @@ -from .exceptions import ConfigurationError, GrammarError +from .exceptions import ConfigurationError, GrammarError, assert_config from .utils import get_regexp_width, Serialize from .parsers.grammar_analysis import GrammarAnalyzer from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef @@ -74,7 +74,7 @@ class ParsingFrontend(Serialize): else: create_parser = { 'lalr': create_lalr_parser, - 'earley': make_early, + 'earley': create_earley_parser, 'cyk': CYK_FrontEnd, }[parser_conf.name] self.parser = create_parser(lexer_conf, parser_conf, options) @@ -117,19 +117,14 @@ class ParsingFrontend(Serialize): def get_frontend(parser, lexer): - if parser=='lalr': - if lexer is None: - raise ConfigurationError('The LALR parser requires use of a lexer') - if lexer not in ('standard' ,'contextual') and not issubclass(lexer, Lexer): - raise ConfigurationError('Unknown lexer: %s' % lexer) - elif parser=='earley': - if lexer=='contextual': - raise ConfigurationError('The Earley parser does not support the contextual parser') - elif parser == 'cyk': - if lexer != 'standard': - raise ConfigurationError('CYK parser requires using standard parser.') - else: - raise ConfigurationError('Unknown parser: %s' % parser) + assert_config(parser, ('lalr', 'earley', 'cyk')) + if not isinstance(lexer, type): # not custom lexer? + expected = { + 'lalr': ('standard', 'contextual'), + 'earley': ('standard', 'dynamic', 'dynamic_complete'), + 'cyk': ('standard', ), + }[parser] + assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser) return MakeParsingFrontend(parser, lexer) @@ -169,7 +164,7 @@ def create_lalr_parser(lexer_conf, parser_conf, options=None): return LALR_Parser(parser_conf, debug=debug) -make_early = NotImplemented +create_earley_parser = NotImplemented CYK_FrontEnd = NotImplemented ###} @@ -196,29 +191,29 @@ class EarleyRegexpMatcher: return self.regexps[term.name].match(text, index) -def make_xearley(lexer_conf, parser_conf, options=None, **kw): +def create_earley_parser__dynamic(lexer_conf, parser_conf, options=None, **kw): earley_matcher = EarleyRegexpMatcher(lexer_conf) return xearley.Parser(parser_conf, earley_matcher.match, ignore=lexer_conf.ignore, **kw) def _match_earley_basic(term, token): return term.name == token.type -def make_early_basic(lexer_conf, parser_conf, options, **kw): +def create_earley_parser__basic(lexer_conf, parser_conf, options, **kw): return earley.Parser(parser_conf, _match_earley_basic, **kw) -def make_early(lexer_conf, parser_conf, options): +def create_earley_parser(lexer_conf, parser_conf, options): resolve_ambiguity = options.ambiguity == 'resolve' debug = options.debug if options else False tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None extra = {} if lexer_conf.name == 'dynamic': - f = make_xearley + f = create_earley_parser__dynamic elif lexer_conf.name == 'dynamic_complete': extra['complete_lex'] =True - f = make_xearley + f = create_earley_parser__dynamic else: - f = make_early_basic + f = create_earley_parser__basic return f(lexer_conf, parser_conf, options, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra) diff --git a/tests/test_parser.py b/tests/test_parser.py index 86a6be1..0b836ad 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -2471,6 +2471,7 @@ _TO_TEST = [ ('contextual', 'lalr'), ('custom_new', 'lalr'), + ('custom_new', 'cyk'), ('custom_old', 'earley'), ] From 679c415673ae90ef55665e97201ee76ede8ee5a5 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Thu, 26 Nov 2020 17:24:40 +0200 Subject: [PATCH 3/4] Small refactor to adjust PR --- lark/common.py | 8 ++++---- lark/load_grammar.py | 4 ++-- lark/parser_frontends.py | 40 ++++++++++++++++++++-------------------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/lark/common.py b/lark/common.py index efbab01..e217063 100644 --- a/lark/common.py +++ b/lark/common.py @@ -5,7 +5,7 @@ from .lexer import TerminalDef class LexerConf(Serialize): - __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes', 'name' + __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type' __serialize_namespace__ = TerminalDef, def __init__(self, tokens, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False): @@ -18,11 +18,11 @@ class LexerConf(Serialize): self.skip_validation = skip_validation self.use_bytes = use_bytes - self.name = None + self.lexer_type = None class ParserConf(Serialize): - __serialize_fields__ = 'rules', 'start', 'name' + __serialize_fields__ = 'rules', 'start', 'parser_type' def __init__(self, rules, callbacks, start): assert isinstance(start, list) @@ -30,6 +30,6 @@ class ParserConf(Serialize): self.callbacks = callbacks self.start = start - self.name = None + self.parser_type = None ###} diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 36bf849..76834f4 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -884,8 +884,8 @@ class GrammarLoader: import re lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT']) parser_conf = ParserConf(rules, callback, ['start']) - lexer_conf.name = 'standard' - parser_conf.name = 'lalr' + lexer_conf.lexer_type = 'standard' + parser_conf.parser_type = 'lalr' self.parser = ParsingFrontend(lexer_conf, parser_conf, {}) self.canonize_tree = CanonizeTree() diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index e329dfa..0dd21a0 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -28,15 +28,15 @@ def _wrap_lexer(lexer_class): class MakeParsingFrontend: - def __init__(self, parser, lexer): - self.parser = parser - self.lexer = lexer + def __init__(self, parser_type, lexer_type): + self.parser_type = parser_type + self.lexer_type = lexer_type def __call__(self, lexer_conf, parser_conf, options): assert isinstance(lexer_conf, LexerConf) assert isinstance(parser_conf, ParserConf) - parser_conf.name = self.parser - lexer_conf.name = self.lexer + parser_conf.parser_type = self.parser_type + lexer_conf.lexer_type = self.lexer_type return ParsingFrontend(lexer_conf, parser_conf, options) @classmethod @@ -76,12 +76,14 @@ class ParsingFrontend(Serialize): 'lalr': create_lalr_parser, 'earley': create_earley_parser, 'cyk': CYK_FrontEnd, - }[parser_conf.name] + }[parser_conf.parser_type] self.parser = create_parser(lexer_conf, parser_conf, options) # Set-up lexer + lexer_type = lexer_conf.lexer_type + lexer_type = lexer_conf.lexer_type self.skip_lexer = False - if lexer_conf.name in ('dynamic', 'dynamic_complete'): + if lexer_type in ('dynamic', 'dynamic_complete'): self.skip_lexer = True return @@ -89,10 +91,10 @@ class ParsingFrontend(Serialize): create_lexer = { 'standard': create_traditional_lexer, 'contextual': create_contextual_lexer, - }[lexer_conf.name] + }[lexer_type] except KeyError: - assert issubclass(lexer_conf.name, Lexer), lexer_conf.name - self.lexer = _wrap_lexer(lexer_conf.name)(lexer_conf) + assert issubclass(lexer_type, Lexer), lexer_type + self.lexer = _wrap_lexer(lexer_type)(lexer_conf) else: self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex) @@ -100,20 +102,18 @@ class ParsingFrontend(Serialize): self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex) - def _parse(self, start, input, *args): + def parse(self, text, start=None): if start is None: start = self.parser_conf.start if len(start) > 1: raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) start ,= start - return self.parser.parse(input, start, *args) - def parse(self, text, start=None): if self.skip_lexer: - return self._parse(start, text) + return self.parser.parse(text, start) - lexer = LexerThread(self.lexer, text) - return self._parse(start, lexer) + lexer_thread = LexerThread(self.lexer, text) + return self.parser.parse(lexer_thread, start) def get_frontend(parser, lexer): @@ -207,9 +207,9 @@ def create_earley_parser(lexer_conf, parser_conf, options): tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None extra = {} - if lexer_conf.name == 'dynamic': + if lexer_conf.lexer_type == 'dynamic': f = create_earley_parser__dynamic - elif lexer_conf.name == 'dynamic_complete': + elif lexer_conf.lexer_type == 'dynamic_complete': extra['complete_lex'] =True f = create_earley_parser__dynamic else: @@ -226,8 +226,8 @@ class CYK_FrontEnd: self.callbacks = parser_conf.callbacks - def parse(self, lexer, start): - tokens = list(lexer.lex(None)) + def parse(self, lexer_thread, start): + tokens = list(lexer_thread.lex(None)) tree = self.parser.parse(tokens, start) return self._transform(tree) From ed065eeb76774f8b562ef8f28b5b820b1a4b1a79 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Thu, 26 Nov 2020 17:28:53 +0200 Subject: [PATCH 4/4] Removed duplicate line --- lark/parser_frontends.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 0dd21a0..5cffdb1 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -81,7 +81,6 @@ class ParsingFrontend(Serialize): # Set-up lexer lexer_type = lexer_conf.lexer_type - lexer_type = lexer_conf.lexer_type self.skip_lexer = False if lexer_type in ('dynamic', 'dynamic_complete'): self.skip_lexer = True