From 284dfe7fd3ea77c4c3bdfe7cecbdf3ec526d1ad8 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 25 Nov 2020 17:10:12 +0200
Subject: [PATCH 1/4] Refactored parser_frontends. Now significantly simpler

---
 lark/common.py              |  12 +-
 lark/load_grammar.py        |   7 +-
 lark/parser_frontends.py    | 300 +++++++++++++++++-------------------
 lark/parsers/lalr_parser.py |   3 +-
 lark/utils.py               |   1 +
 5 files changed, 156 insertions(+), 167 deletions(-)

diff --git a/lark/common.py b/lark/common.py
index 4bf04ec..efbab01 100644
--- a/lark/common.py
+++ b/lark/common.py
@@ -5,7 +5,7 @@ from .lexer import TerminalDef
 
 
 class LexerConf(Serialize):
-    __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes'
+    __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes', 'name'
     __serialize_namespace__ = TerminalDef,
 
     def __init__(self, tokens, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False):
@@ -18,12 +18,18 @@ class LexerConf(Serialize):
         self.skip_validation = skip_validation
         self.use_bytes = use_bytes
 
-###}
+        self.name = None
+
 
+class ParserConf(Serialize):
+    __serialize_fields__ = 'rules', 'start', 'name'
 
-class ParserConf:
     def __init__(self, rules, callbacks, start):
         assert isinstance(start, list)
         self.rules = rules
         self.callbacks = callbacks
         self.start = start
+
+        self.name = None
+
+###}
diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 70fd7eb..36bf849 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -11,7 +11,7 @@ from .utils import bfs, Py36, logger, classify_bool
 from .lexer import Token, TerminalDef, PatternStr, PatternRE
 
 from .parse_tree_builder import ParseTreeBuilder
-from .parser_frontends import LALR_TraditionalLexer
+from .parser_frontends import ParsingFrontend
 from .common import LexerConf, ParserConf
 from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
 from .utils import classify, suppress, dedup_list, Str
@@ -883,9 +883,10 @@ class GrammarLoader:
         callback = ParseTreeBuilder(rules, ST).create_callback()
         import re
         lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT'])
-
         parser_conf = ParserConf(rules, callback, ['start'])
-        self.parser = LALR_TraditionalLexer(lexer_conf, parser_conf)
+        lexer_conf.name = 'standard'
+        parser_conf.name = 'lalr'
+        self.parser = ParsingFrontend(lexer_conf, parser_conf, {})
 
         self.canonize_tree = CanonizeTree()
         self.global_keep_all_tokens = global_keep_all_tokens
diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index 5d32589..4061811 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -4,9 +4,8 @@ from .parsers.grammar_analysis import GrammarAnalyzer
 from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef
 from .parsers import earley, xearley, cyk
 from .parsers.lalr_parser import LALR_Parser
-from .grammar import Rule
 from .tree import Tree
-from .common import LexerConf
+from .common import LexerConf, ParserConf
 try:
     import regex
 except ImportError:
@@ -27,56 +26,112 @@ def _wrap_lexer(lexer_class):
                 return self.lexer.lex(lexer_state.text)
         return CustomLexerWrapper
 
+
+class MakeParsingFrontend:
+    def __init__(self, parser, lexer):
+        self.parser = parser
+        self.lexer = lexer
+
+    def __call__(self, lexer_conf, parser_conf, options):
+        assert isinstance(lexer_conf, LexerConf)
+        assert isinstance(parser_conf, ParserConf)
+        parser_conf.name = self.parser
+        lexer_conf.name = self.lexer
+        return ParsingFrontend(lexer_conf, parser_conf, options)
+
+    @classmethod
+    def deserialize(cls, data, memo, callbacks, options):
+        lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
+        parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
+        parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug)
+        parser_conf.callbacks = callbacks
+
+        terminals = [item for item in memo.values() if isinstance(item, TerminalDef)]
+
+        lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals)
+        lexer_conf.re_module = regex if options.regex else re
+        lexer_conf.use_bytes = options.use_bytes
+        lexer_conf.g_regex_flags = options.g_regex_flags
+        lexer_conf.skip_validation = True
+        lexer_conf.postlex = options.postlex
+
+        return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)
+
+
+
+
+class ParsingFrontend(Serialize):
+    __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser', 'options'
+
+    def __init__(self, lexer_conf, parser_conf, options, parser=None):
+        self.parser_conf = parser_conf
+        self.lexer_conf = lexer_conf
+        self.options = options
+
+        # Set-up parser
+        if parser:  # From cache
+            self.parser = parser
+        else:
+            create_parser = {
+                'lalr': create_lalr_parser,
+                'earley': make_early,
+                'cyk': CYK_FrontEnd,
+            }[parser_conf.name]
+            self.parser = create_parser(lexer_conf, parser_conf, options)
+
+        # Set-up lexer
+        self.skip_lexer = False
+        if lexer_conf.name in ('dynamic', 'dynamic_complete'):
+            self.skip_lexer = True
+            return
+
+        try:
+            create_lexer = {
+                'standard': create_traditional_lexer,
+                'contextual': create_contextual_lexer,
+            }[lexer_conf.name]
+        except KeyError:
+            assert issubclass(lexer_conf.name, Lexer), lexer_conf.name
+            self.lexer = _wrap_lexer(lexer_conf.name)(lexer_conf)
+        else:
+            self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex)
+
+        if lexer_conf.postlex:
+            self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
+
+
+    def _parse(self, start, input, *args):
+        if start is None:
+            start = self.parser_conf.start
+            if len(start) > 1:
+                raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
+            start ,= start
+        return self.parser.parse(input, start, *args)
+
+    def parse(self, text, start=None):
+        if self.skip_lexer:
+            return self._parse(start, text)
+
+        lexer = LexerThread(self.lexer, text)
+        return self._parse(start, lexer)
+
+
 def get_frontend(parser, lexer):
     if parser=='lalr':
         if lexer is None:
             raise ConfigurationError('The LALR parser requires use of a lexer')
-        elif lexer == 'standard':
-            return LALR_TraditionalLexer
-        elif lexer == 'contextual':
-            return LALR_ContextualLexer
-        elif issubclass(lexer, Lexer):
-            wrapped = _wrap_lexer(lexer)
-            class LALR_CustomLexerWrapper(LALR_WithLexer):
-                def init_lexer(self):
-                    self.lexer = wrapped(self.lexer_conf)
-            return LALR_CustomLexerWrapper
-        else:
+        if lexer not in ('standard' ,'contextual') and not issubclass(lexer, Lexer):
             raise ConfigurationError('Unknown lexer: %s' % lexer)
     elif parser=='earley':
-        if lexer=='standard':
-            return Earley_Traditional
-        elif lexer=='dynamic':
-            return XEarley
-        elif lexer=='dynamic_complete':
-            return XEarley_CompleteLex
-        elif lexer=='contextual':
+        if lexer=='contextual':
             raise ConfigurationError('The Earley parser does not support the contextual parser')
-        elif issubclass(lexer, Lexer):
-            wrapped = _wrap_lexer(lexer)
-            class Earley_CustomLexerWrapper(Earley_WithLexer):
-                def init_lexer(self, **kw):
-                    self.lexer = wrapped(self.lexer_conf)
-            return Earley_CustomLexerWrapper
-        else:
-            raise ConfigurationError('Unknown lexer: %s' % lexer)
     elif parser == 'cyk':
-        if lexer == 'standard':
-            return CYK
-        else:
+        if lexer != 'standard':
             raise ConfigurationError('CYK parser requires using standard parser.')
     else:
         raise ConfigurationError('Unknown parser: %s' % parser)
 
-
-class _ParserFrontend(Serialize):
-    def _parse(self, start, input, *args):
-        if start is None:
-            start = self.start
-            if len(start) > 1:
-                raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
-            start ,= start
-        return self.parser.parse(input, start, *args)
+    return MakeParsingFrontend(parser, lexer)
 
 
 def _get_lexer_callbacks(transformer, terminals):
@@ -100,119 +155,26 @@ class PostLexConnector:
         return self.postlexer.process(i)
 
 
-class WithLexer(_ParserFrontend):
-    lexer = None
-    parser = None
-    lexer_conf = None
-    start = None
-
-    __serialize_fields__ = 'parser', 'lexer_conf', 'start'
-    __serialize_namespace__ = LexerConf,
-
-    def __init__(self, lexer_conf, parser_conf, options=None):
-        self.lexer_conf = lexer_conf
-        self.start = parser_conf.start
-        self.postlex = lexer_conf.postlex
-
-    @classmethod
-    def deserialize(cls, data, memo, callbacks, options):
-        inst = super(WithLexer, cls).deserialize(data, memo)
-
-        inst.postlex = options.postlex
-        inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks, options.debug)
-
-        terminals = [item for item in memo.values() if isinstance(item, TerminalDef)]
-        inst.lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals)
-        inst.lexer_conf.re_module = regex if options.regex else re
-        inst.lexer_conf.use_bytes = options.use_bytes
-        inst.lexer_conf.g_regex_flags = options.g_regex_flags
-        inst.lexer_conf.skip_validation = True
-        inst.init_lexer()
-
-        return inst
-
-    def _serialize(self, data, memo):
-        data['parser'] = data['parser'].serialize(memo)
-
-    def make_lexer(self, text):
-        lexer = self.lexer
-        if self.postlex:
-            lexer = PostLexConnector(self.lexer, self.postlex)
-        return LexerThread(lexer, text)
-
-    def parse(self, text, start=None):
-        return self._parse(start, self.make_lexer(text))
-
-    def init_traditional_lexer(self):
-        self.lexer = TraditionalLexer(self.lexer_conf)
 
-class LALR_WithLexer(WithLexer):
-    def __init__(self, lexer_conf, parser_conf, options=None):
-        debug = options.debug if options else False
-        self.parser = LALR_Parser(parser_conf, debug=debug)
-        WithLexer.__init__(self, lexer_conf, parser_conf, options)
+def create_traditional_lexer(lexer_conf, parser, postlex):
+    return TraditionalLexer(lexer_conf)
 
-        self.init_lexer()
+def create_contextual_lexer(lexer_conf, parser, postlex):
+    states = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()}
+    always_accept = postlex.always_accept if postlex else ()
+    return ContextualLexer(lexer_conf, states, always_accept=always_accept)
 
-    def init_lexer(self, **kw):
-        raise NotImplementedError()
+def create_lalr_parser(lexer_conf, parser_conf, options=None):
+    debug = options.debug if options else False
+    return LALR_Parser(parser_conf, debug=debug)
 
-class LALR_TraditionalLexer(LALR_WithLexer):
-    def init_lexer(self):
-        self.init_traditional_lexer()
-
-class LALR_ContextualLexer(LALR_WithLexer):
-    def init_lexer(self):
-        states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()}
-        always_accept = self.postlex.always_accept if self.postlex else ()
-        self.lexer = ContextualLexer(self.lexer_conf, states, always_accept=always_accept)
 
+make_early = NotImplemented
+CYK_FrontEnd = NotImplemented
 ###}
 
-
-class Earley_WithLexer(WithLexer):
-    def __init__(self, lexer_conf, parser_conf, options=None):
-        WithLexer.__init__(self, lexer_conf, parser_conf, options)
-        self.init_lexer()
-
-        resolve_ambiguity = options.ambiguity == 'resolve'
-        debug = options.debug if options else False
-        tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
-        self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class)
-
-    def match(self, term, token):
-        return term.name == token.type
-
-    def init_lexer(self, **kw):
-        raise NotImplementedError()
-
-class Earley_Traditional(Earley_WithLexer):
-    def init_lexer(self, **kw):
-        self.init_traditional_lexer()
-
-
-class XEarley(_ParserFrontend):
-    def __init__(self, lexer_conf, parser_conf, options=None, **kw):
-        self.token_by_name = {t.name:t for t in lexer_conf.tokens}
-        self.start = parser_conf.start
-
-        self._prepare_match(lexer_conf)
-        resolve_ambiguity = options.ambiguity == 'resolve'
-        debug = options.debug if options else False
-        tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
-        self.parser = xearley.Parser(parser_conf,
-                                    self.match,
-                                    ignore=lexer_conf.ignore,
-                                    resolve_ambiguity=resolve_ambiguity,
-                                    debug=debug,
-                                    tree_class=tree_class,
-                                    **kw
-                                    )
-
-    def match(self, term, text, index=0):
-        return self.regexps[term.name].match(text, index)
-
-    def _prepare_match(self, lexer_conf):
+class EarleyRegexpMatcher:
+    def __init__(self, lexer_conf):
         self.regexps = {}
         for t in lexer_conf.tokens:
             if t.priority != 1:
@@ -230,31 +192,49 @@ class XEarley(_ParserFrontend):
 
             self.regexps[t.name] = lexer_conf.re_module.compile(regexp, lexer_conf.g_regex_flags)
 
-    def parse(self, text, start):
-        return self._parse(start, text)
+    def match(self, term, text, index=0):
+        return self.regexps[term.name].match(text, index)
 
-class XEarley_CompleteLex(XEarley):
-    def __init__(self, *args, **kw):
-        XEarley.__init__(self, *args, complete_lex=True, **kw)
 
+def make_xearley(lexer_conf, parser_conf, options=None, **kw):
+        earley_matcher = EarleyRegexpMatcher(lexer_conf)
+        return xearley.Parser(parser_conf, earley_matcher.match, ignore=lexer_conf.ignore, **kw)
 
+def _match_earley_basic(term, token):
+    return term.name == token.type
 
-class CYK(WithLexer):
+def make_early_basic(lexer_conf, parser_conf, options, **kw):
+    return earley.Parser(parser_conf, _match_earley_basic, **kw)
 
-    def __init__(self, lexer_conf, parser_conf, options=None):
-        WithLexer.__init__(self, lexer_conf, parser_conf, options)
-        self.init_traditional_lexer()
+def make_early(lexer_conf, parser_conf, options):
+    resolve_ambiguity = options.ambiguity == 'resolve'
+    debug = options.debug if options else False
+    tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
+
+    extra = {}
+    if lexer_conf.name == 'dynamic':
+        f = make_xearley
+    elif lexer_conf.name == 'dynamic_complete':
+        extra['complete_lex'] =True
+        f = make_xearley
+    else:
+        f = make_early_basic
 
+    return f(lexer_conf, parser_conf, options, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra)
+
+
+
+class CYK_FrontEnd:
+    def __init__(self, lexer_conf, parser_conf, options=None):
         self._analysis = GrammarAnalyzer(parser_conf)
         self.parser = cyk.Parser(parser_conf.rules)
 
         self.callbacks = parser_conf.callbacks
 
-    def parse(self, text, start):
-        tokens = list(self.make_lexer(text).lex(None))
-        parse = self._parse(start, tokens)
-        parse = self._transform(parse)
-        return parse
+    def parse(self, lexer, start):
+        tokens = list(lexer.lex(None))
+        tree = self.parser.parse(tokens, start)
+        return self._transform(tree)
 
     def _transform(self, tree):
         subtrees = list(tree.iter_subtrees())
diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py
index 3d006e7..f7ff8fe 100644
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -5,13 +5,14 @@
 from copy import deepcopy, copy
 from ..exceptions import UnexpectedInput, UnexpectedToken
 from ..lexer import Token
+from ..utils import Serialize
 
 from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
 from .lalr_puppet import ParserPuppet
 
 ###{standalone
 
-class LALR_Parser(object):
+class LALR_Parser(Serialize):
     def __init__(self, parser_conf, debug=False):
         analysis = LALR_Analyzer(parser_conf, debug=debug)
         analysis.compute_lalr()
diff --git a/lark/utils.py b/lark/utils.py
index 366922b..3b5b8a8 100644
--- a/lark/utils.py
+++ b/lark/utils.py
@@ -302,4 +302,5 @@ def _serialize(value, memo):
         return list(value)  # TODO reversible?
     elif isinstance(value, dict):
         return {key:_serialize(elem, memo) for key, elem in value.items()}
+    # assert value is None or isinstance(value, (int, float, str, tuple)), value
     return value

From 712df517b22e0394f6321b90d8e1fd4e29b7fdfb Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 26 Nov 2020 16:22:18 +0200
Subject: [PATCH 2/4] Fixes for PR. Custom lexer now works with CYK

---
 lark/exceptions.py       |  7 +++++++
 lark/lark.py             |  6 +-----
 lark/parser_frontends.py | 39 +++++++++++++++++----------------------
 tests/test_parser.py     |  1 +
 4 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/lark/exceptions.py b/lark/exceptions.py
index 72f6c6f..46740ed 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -11,6 +11,11 @@ class ConfigurationError(LarkError, ValueError):
     pass
 
 
+def assert_config(value, options, msg='Got %r, expected one of %s'):
+    if value not in options:
+        raise ConfigurationError(msg % (value, options))
+
+
 class GrammarError(LarkError):
     pass
 
@@ -198,4 +203,6 @@ class VisitError(LarkError):
 
         message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
         super(VisitError, self).__init__(message)
+
+
 ###}
diff --git a/lark/lark.py b/lark/lark.py
index b94f26b..842df5f 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -1,5 +1,5 @@
 from __future__ import absolute_import
-from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken, ConfigurationError
+from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken, ConfigurationError, assert_config
 
 import sys, os, pickle, hashlib
 from io import open
@@ -24,10 +24,6 @@ except ImportError:
 
 ###{standalone
 
-def assert_config(value, options, msg='Got %r, expected one of %s'):
-    if value not in options:
-        raise ConfigurationError(msg % (value, options))
-
 
 class LarkOptions(Serialize):
     """Specifies the options for Lark
diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index 4061811..e329dfa 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -1,4 +1,4 @@
-from .exceptions import ConfigurationError, GrammarError
+from .exceptions import ConfigurationError, GrammarError, assert_config
 from .utils import get_regexp_width, Serialize
 from .parsers.grammar_analysis import GrammarAnalyzer
 from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef
@@ -74,7 +74,7 @@ class ParsingFrontend(Serialize):
         else:
             create_parser = {
                 'lalr': create_lalr_parser,
-                'earley': make_early,
+                'earley': create_earley_parser,
                 'cyk': CYK_FrontEnd,
             }[parser_conf.name]
             self.parser = create_parser(lexer_conf, parser_conf, options)
@@ -117,19 +117,14 @@ class ParsingFrontend(Serialize):
 
 
 def get_frontend(parser, lexer):
-    if parser=='lalr':
-        if lexer is None:
-            raise ConfigurationError('The LALR parser requires use of a lexer')
-        if lexer not in ('standard' ,'contextual') and not issubclass(lexer, Lexer):
-            raise ConfigurationError('Unknown lexer: %s' % lexer)
-    elif parser=='earley':
-        if lexer=='contextual':
-            raise ConfigurationError('The Earley parser does not support the contextual parser')
-    elif parser == 'cyk':
-        if lexer != 'standard':
-            raise ConfigurationError('CYK parser requires using standard parser.')
-    else:
-        raise ConfigurationError('Unknown parser: %s' % parser)
+    assert_config(parser, ('lalr', 'earley', 'cyk'))
+    if not isinstance(lexer, type):     # not custom lexer?
+        expected = {
+            'lalr': ('standard', 'contextual'),
+            'earley': ('standard', 'dynamic', 'dynamic_complete'),
+            'cyk': ('standard', ),
+         }[parser]
+        assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser)
 
     return MakeParsingFrontend(parser, lexer)
 
@@ -169,7 +164,7 @@ def create_lalr_parser(lexer_conf, parser_conf, options=None):
     return LALR_Parser(parser_conf, debug=debug)
 
 
-make_early = NotImplemented
+create_earley_parser = NotImplemented
 CYK_FrontEnd = NotImplemented
 ###}
 
@@ -196,29 +191,29 @@ class EarleyRegexpMatcher:
         return self.regexps[term.name].match(text, index)
 
 
-def make_xearley(lexer_conf, parser_conf, options=None, **kw):
+def create_earley_parser__dynamic(lexer_conf, parser_conf, options=None, **kw):
         earley_matcher = EarleyRegexpMatcher(lexer_conf)
         return xearley.Parser(parser_conf, earley_matcher.match, ignore=lexer_conf.ignore, **kw)
 
 def _match_earley_basic(term, token):
     return term.name == token.type
 
-def make_early_basic(lexer_conf, parser_conf, options, **kw):
+def create_earley_parser__basic(lexer_conf, parser_conf, options, **kw):
     return earley.Parser(parser_conf, _match_earley_basic, **kw)
 
-def make_early(lexer_conf, parser_conf, options):
+def create_earley_parser(lexer_conf, parser_conf, options):
     resolve_ambiguity = options.ambiguity == 'resolve'
     debug = options.debug if options else False
     tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
 
     extra = {}
     if lexer_conf.name == 'dynamic':
-        f = make_xearley
+        f = create_earley_parser__dynamic
     elif lexer_conf.name == 'dynamic_complete':
         extra['complete_lex'] =True
-        f = make_xearley
+        f = create_earley_parser__dynamic
     else:
-        f = make_early_basic
+        f = create_earley_parser__basic
 
     return f(lexer_conf, parser_conf, options, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra)
 
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 86a6be1..0b836ad 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -2471,6 +2471,7 @@ _TO_TEST = [
         ('contextual', 'lalr'),
 
         ('custom_new', 'lalr'),
+        ('custom_new', 'cyk'),
         ('custom_old', 'earley'),
 ]
 

From 679c415673ae90ef55665e97201ee76ede8ee5a5 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 26 Nov 2020 17:24:40 +0200
Subject: [PATCH 3/4] Small refactor to adjust PR

---
 lark/common.py           |  8 ++++----
 lark/load_grammar.py     |  4 ++--
 lark/parser_frontends.py | 40 ++++++++++++++++++++--------------------
 3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/lark/common.py b/lark/common.py
index efbab01..e217063 100644
--- a/lark/common.py
+++ b/lark/common.py
@@ -5,7 +5,7 @@ from .lexer import TerminalDef
 
 
 class LexerConf(Serialize):
-    __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes', 'name'
+    __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type'
     __serialize_namespace__ = TerminalDef,
 
     def __init__(self, tokens, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False):
@@ -18,11 +18,11 @@ class LexerConf(Serialize):
         self.skip_validation = skip_validation
         self.use_bytes = use_bytes
 
-        self.name = None
+        self.lexer_type = None
 
 
 class ParserConf(Serialize):
-    __serialize_fields__ = 'rules', 'start', 'name'
+    __serialize_fields__ = 'rules', 'start', 'parser_type'
 
     def __init__(self, rules, callbacks, start):
         assert isinstance(start, list)
@@ -30,6 +30,6 @@ class ParserConf(Serialize):
         self.callbacks = callbacks
         self.start = start
 
-        self.name = None
+        self.parser_type = None
 
 ###}
diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 36bf849..76834f4 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -884,8 +884,8 @@ class GrammarLoader:
         import re
         lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT'])
         parser_conf = ParserConf(rules, callback, ['start'])
-        lexer_conf.name = 'standard'
-        parser_conf.name = 'lalr'
+        lexer_conf.lexer_type = 'standard'
+        parser_conf.parser_type = 'lalr'
         self.parser = ParsingFrontend(lexer_conf, parser_conf, {})
 
         self.canonize_tree = CanonizeTree()
diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index e329dfa..0dd21a0 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -28,15 +28,15 @@ def _wrap_lexer(lexer_class):
 
 
 class MakeParsingFrontend:
-    def __init__(self, parser, lexer):
-        self.parser = parser
-        self.lexer = lexer
+    def __init__(self, parser_type, lexer_type):
+        self.parser_type = parser_type
+        self.lexer_type = lexer_type
 
     def __call__(self, lexer_conf, parser_conf, options):
         assert isinstance(lexer_conf, LexerConf)
         assert isinstance(parser_conf, ParserConf)
-        parser_conf.name = self.parser
-        lexer_conf.name = self.lexer
+        parser_conf.parser_type = self.parser_type
+        lexer_conf.lexer_type = self.lexer_type
         return ParsingFrontend(lexer_conf, parser_conf, options)
 
     @classmethod
@@ -76,12 +76,14 @@ class ParsingFrontend(Serialize):
                 'lalr': create_lalr_parser,
                 'earley': create_earley_parser,
                 'cyk': CYK_FrontEnd,
-            }[parser_conf.name]
+            }[parser_conf.parser_type]
             self.parser = create_parser(lexer_conf, parser_conf, options)
 
         # Set-up lexer
+        lexer_type = lexer_conf.lexer_type
+        lexer_type = lexer_conf.lexer_type
         self.skip_lexer = False
-        if lexer_conf.name in ('dynamic', 'dynamic_complete'):
+        if lexer_type in ('dynamic', 'dynamic_complete'):
             self.skip_lexer = True
             return
 
@@ -89,10 +91,10 @@ class ParsingFrontend(Serialize):
             create_lexer = {
                 'standard': create_traditional_lexer,
                 'contextual': create_contextual_lexer,
-            }[lexer_conf.name]
+            }[lexer_type]
         except KeyError:
-            assert issubclass(lexer_conf.name, Lexer), lexer_conf.name
-            self.lexer = _wrap_lexer(lexer_conf.name)(lexer_conf)
+            assert issubclass(lexer_type, Lexer), lexer_type
+            self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
         else:
             self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex)
 
@@ -100,20 +102,18 @@ class ParsingFrontend(Serialize):
             self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
 
 
-    def _parse(self, start, input, *args):
+    def parse(self, text, start=None):
         if start is None:
             start = self.parser_conf.start
             if len(start) > 1:
                 raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
             start ,= start
-        return self.parser.parse(input, start, *args)
 
-    def parse(self, text, start=None):
         if self.skip_lexer:
-            return self._parse(start, text)
+            return self.parser.parse(text, start)
 
-        lexer = LexerThread(self.lexer, text)
-        return self._parse(start, lexer)
+        lexer_thread = LexerThread(self.lexer, text)
+        return self.parser.parse(lexer_thread, start)
 
 
 def get_frontend(parser, lexer):
@@ -207,9 +207,9 @@ def create_earley_parser(lexer_conf, parser_conf, options):
     tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
 
     extra = {}
-    if lexer_conf.name == 'dynamic':
+    if lexer_conf.lexer_type == 'dynamic':
         f = create_earley_parser__dynamic
-    elif lexer_conf.name == 'dynamic_complete':
+    elif lexer_conf.lexer_type == 'dynamic_complete':
         extra['complete_lex'] =True
         f = create_earley_parser__dynamic
     else:
@@ -226,8 +226,8 @@ class CYK_FrontEnd:
 
         self.callbacks = parser_conf.callbacks
 
-    def parse(self, lexer, start):
-        tokens = list(lexer.lex(None))
+    def parse(self, lexer_thread, start):
+        tokens = list(lexer_thread.lex(None))
         tree = self.parser.parse(tokens, start)
         return self._transform(tree)
 

From ed065eeb76774f8b562ef8f28b5b820b1a4b1a79 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 26 Nov 2020 17:28:53 +0200
Subject: [PATCH 4/4] Removed duplicate line

---
 lark/parser_frontends.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index 0dd21a0..5cffdb1 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -81,7 +81,6 @@ class ParsingFrontend(Serialize):
 
         # Set-up lexer
         lexer_type = lexer_conf.lexer_type
-        lexer_type = lexer_conf.lexer_type
         self.skip_lexer = False
         if lexer_type in ('dynamic', 'dynamic_complete'):
             self.skip_lexer = True