Added custom lexer to earley.

4 years ago · cf25c06420
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -28,10 +28,7 @@ def get_frontend(parser, lexer):
                    self.lexer = lexer(lexer_conf)
                def lex(self, lexer_state, parser_state):
                    return self.lexer.lex(lexer_state.text)

            class LALR_CustomLexerWrapper(LALR_WithLexer):
                def __init__(self, lexer_conf, parser_conf, options=None):
                    super(LALR_CustomLexerWrapper, self).__init__(lexer_conf, parser_conf, options=options)
                def init_lexer(self):
                    future_interface = getattr(lexer, '__future_interface__', False)
                    if future_interface:
@@ -44,13 +41,19 @@ def get_frontend(parser, lexer):
            raise ValueError('Unknown lexer: %s' % lexer)
    elif parser=='earley':
        if lexer=='standard':
            return Earley
            return Earley_Traditional
        elif lexer=='dynamic':
            return XEarley
        elif lexer=='dynamic_complete':
            return XEarley_CompleteLex
        elif lexer=='contextual':
            raise ValueError('The Earley parser does not support the contextual parser')
        elif issubclass(lexer, Lexer):
            assert not getattr(lexer, '__future_interface__', False), "Earley doesn't support the future interface right now"
            class Earley_CustomLexerWrapper(Earley_WithLexer):
                def init_lexer(self, **kw):
                    self.lexer = lexer(self.lexer_conf)
            return Earley_CustomLexerWrapper
        else:
            raise ValueError('Unknown lexer: %s' % lexer)
    elif parser == 'cyk':
@@ -163,10 +166,10 @@ class LALR_ContextualLexer(LALR_WithLexer):
 ###}


 class Earley(WithLexer):
 class Earley_WithLexer(WithLexer):
    def __init__(self, lexer_conf, parser_conf, options=None):
        WithLexer.__init__(self, lexer_conf, parser_conf, options)
        self.init_traditional_lexer()
        self.init_lexer()

        resolve_ambiguity = options.ambiguity == 'resolve'
        debug = options.debug if options else False
@@ -179,6 +182,13 @@ class Earley(WithLexer):
    def match(self, term, token):
        return term.name == token.type

    def init_lexer(self, **kw):
        raise NotImplementedError()

 class Earley_Traditional(Earley_WithLexer):
    def init_lexer(self, **kw):
        self.init_traditional_lexer()


 class XEarley(_ParserFrontend):
    def __init__(self, lexer_conf, parser_conf, options=None, **kw):
--- a/tests/main.py
+++ b/tests/main.py
@@ -20,20 +20,7 @@ except ImportError:

 from .test_logger import Testlogger

 from .test_parser import (
        TestLalrStandard,
        TestEarleyStandard,
        TestCykStandard,
        TestLalrContextual,
        TestEarleyDynamic,
        TestLalrCustom,

        # TestFullEarleyStandard,
        TestFullEarleyDynamic,
        TestFullEarleyDynamic_complete,

        TestParsers,
        )
 from .test_parser import * # We define __all__ to list which TestSuites to run

 logger.setLevel(logging.INFO)

--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -39,8 +39,7 @@ from lark.grammar import Rule
 from lark.lexer import TerminalDef, Lexer, TraditionalLexer
 from lark.indenter import Indenter

 logger.setLevel(logging.INFO)

 __all__ = ['TestParsers']

 __path__ = os.path.dirname(__file__)
 def _read(n, *args):
@@ -856,8 +855,9 @@ def _make_full_earley_test(LEXER):
    _NAME = "TestFullEarley" + LEXER.capitalize()
    _TestFullEarley.__name__ = _NAME
    globals()[_NAME] = _TestFullEarley
    __all__.append(_NAME)

 class CustomLexer(Lexer):
 class CustomLexerNew(Lexer):
    """
    Purpose of this custom lexer is to test the integration,
    so it uses the traditionalparser as implementation without custom lexing behaviour.
@@ -868,6 +868,18 @@ class CustomLexer(Lexer):
        return self.lexer.lex(*args, **kwargs)
    
    __future_interface__ = True
    
 class CustomLexerOld(Lexer):
    """
    Purpose of this custom lexer is to test the integration,
    so it uses the traditionalparser as implementation without custom lexing behaviour.
    """
    def __init__(self, lexer_conf):
        self.lexer = TraditionalLexer(copy(lexer_conf))
    def lex(self, *args, **kwargs):
        return self.lexer.lex(*args, **kwargs)
    
    __future_interface__ = False

 def _tree_structure_check(a, b):
    """
@@ -941,12 +953,18 @@ class DualBytesLark:
            self.bytes_lark.load(f)

 def _make_parser_test(LEXER, PARSER):
    lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER
    if LEXER == 'custom_new':
        lexer_class_or_name = CustomLexerNew
    elif LEXER == 'custom_old':
        lexer_class_or_name = CustomLexerOld
    else:
        lexer_class_or_name = LEXER
    def _Lark(grammar, **kwargs):
        return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
    def _Lark_open(gfilename, **kwargs):
        return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)


    class _TestParser(unittest.TestCase):
        def test_basic1(self):
            g = _Lark("""start: a+ b a* "b" a*
@@ -1992,7 +2010,7 @@ def _make_parser_test(LEXER, PARSER):



        @unittest.skipIf(PARSER != 'earley' or LEXER == 'standard', "Currently only Earley supports priority sum in rules")
        @unittest.skipIf(PARSER != 'earley' or 'dynamic' not in LEXER, "Currently only Earley supports priority sum in rules")
        def test_prioritization_sum(self):
            "Tests effect of priority on result"

@@ -2296,7 +2314,7 @@ def _make_parser_test(LEXER, PARSER):
            parser = _Lark(grammar)


        @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
        @unittest.skipIf(PARSER!='lalr' or 'custom' in LEXER, "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
        def test_serialize(self):
            grammar = """
                start: _ANY b "C"
@@ -2400,17 +2418,21 @@ def _make_parser_test(LEXER, PARSER):
    _TestParser.__name__ = _NAME
    _TestParser.__qualname__ = "tests.test_parser." + _NAME
    globals()[_NAME] = _TestParser
    __all__.append(_NAME)

 # Note: You still have to import them in __main__ for the tests to run
 _TO_TEST = [
        ('standard', 'earley'),
        ('standard', 'cyk'),
        ('standard', 'lalr'),
    
        ('dynamic', 'earley'),
        ('dynamic_complete', 'earley'),
        ('standard', 'lalr'),
    
        ('contextual', 'lalr'),
        ('custom', 'lalr'),
        # (None, 'earley'),
    
        ('custom_new', 'lalr'),
        ('custom_old', 'earley'),
 ]

 for _LEXER, _PARSER in _TO_TEST: