From cf9d0d915e325426268667d67f058d7c59d1bcd4 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Mon, 23 Nov 2020 23:26:50 +0100 Subject: [PATCH] Fixes for custom lexer. --- lark/parser_frontends.py | 29 ++++++++++++++++------------- tests/test_parser.py | 36 ++++++++++++++++++------------------ 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 4e18363..65135bb 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -14,6 +14,18 @@ import re ###{standalone +def _wrap_lexer(lexer_class): + future_interface = getattr(lexer_class, '__future_interface__', False) + if future_interface: + return lexer_class + else: + class CustomLexerWrapper(Lexer): + def __init__(self, lexer_conf): + self.lexer = lexer_class(lexer_conf) + def lex(self, lexer_state, parser_state): + return self.lexer.lex(lexer_state.text) + return CustomLexerWrapper + def get_frontend(parser, lexer): if parser=='lalr': if lexer is None: @@ -23,19 +35,10 @@ def get_frontend(parser, lexer): elif lexer == 'contextual': return LALR_ContextualLexer elif issubclass(lexer, Lexer): - class CustomLexerWrapper(Lexer): - def __init__(self, lexer_conf): - self.lexer = lexer(lexer_conf) - def lex(self, lexer_state, parser_state): - return self.lexer.lex(lexer_state.text) + wrapped = _wrap_lexer(lexer) class LALR_CustomLexerWrapper(LALR_WithLexer): def init_lexer(self): - future_interface = getattr(lexer, '__future_interface__', False) - if future_interface: - self.lexer = lexer(self.lexer_conf) - else: - self.lexer = CustomLexerWrapper(self.lexer_conf) - + self.lexer = wrapped(self.lexer_conf) return LALR_CustomLexerWrapper else: raise ValueError('Unknown lexer: %s' % lexer) @@ -49,10 +52,10 @@ def get_frontend(parser, lexer): elif lexer=='contextual': raise ValueError('The Earley parser does not support the contextual parser') elif issubclass(lexer, Lexer): - assert not getattr(lexer, '__future_interface__', False), "Earley doesn't support the future interface right now" + wrapped = _wrap_lexer(lexer) class Earley_CustomLexerWrapper(Earley_WithLexer): def init_lexer(self, **kw): - self.lexer = lexer(self.lexer_conf) + self.lexer = wrapped(self.lexer_conf) return Earley_CustomLexerWrapper else: raise ValueError('Unknown lexer: %s' % lexer) diff --git a/tests/test_parser.py b/tests/test_parser.py index bae5ae9..b7de71d 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -864,8 +864,8 @@ class CustomLexerNew(Lexer): """ def __init__(self, lexer_conf): self.lexer = TraditionalLexer(copy(lexer_conf)) - def lex(self, *args, **kwargs): - return self.lexer.lex(*args, **kwargs) + def lex(self, lexer_state, parser_state): + return self.lexer.lex(lexer_state, parser_state) __future_interface__ = True @@ -876,8 +876,9 @@ class CustomLexerOld(Lexer): """ def __init__(self, lexer_conf): self.lexer = TraditionalLexer(copy(lexer_conf)) - def lex(self, *args, **kwargs): - return self.lexer.lex(*args, **kwargs) + def lex(self, text): + ls = self.lexer.make_lexer_state(text) + return self.lexer.lex(ls, None) __future_interface__ = False @@ -1520,7 +1521,7 @@ def _make_parser_test(LEXER, PARSER): %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items()))) def test_float_without_lexer(self): - expected_error = UnexpectedCharacters if LEXER.startswith('dynamic') else UnexpectedToken + expected_error = UnexpectedCharacters if 'dynamic' in LEXER else UnexpectedToken if PARSER == 'cyk': expected_error = ParseError @@ -1653,13 +1654,13 @@ def _make_parser_test(LEXER, PARSER): self.assertEqual(d.line, 2) self.assertEqual(d.column, 2) - if LEXER != 'dynamic': - self.assertEqual(a.end_line, 1) - self.assertEqual(a.end_column, 2) - self.assertEqual(bc.end_line, 2) - self.assertEqual(bc.end_column, 2) - self.assertEqual(d.end_line, 2) - self.assertEqual(d.end_column, 3) + # if LEXER != 'dynamic': + self.assertEqual(a.end_line, 1) + self.assertEqual(a.end_column, 2) + self.assertEqual(bc.end_line, 2) + self.assertEqual(bc.end_column, 2) + self.assertEqual(d.end_line, 2) + self.assertEqual(d.end_column, 3) @@ -1890,7 +1891,7 @@ def _make_parser_test(LEXER, PARSER): """ self.assertRaises(IOError, _Lark, grammar) - @unittest.skipIf(LEXER=='dynamic', "%declare/postlex doesn't work with dynamic") + @unittest.skipIf('dynamic' in LEXER, "%declare/postlex doesn't work with dynamic") def test_postlex_declare(self): # Note: this test does a lot. maybe split it up? class TestPostLexer: def process(self, stream): @@ -1913,7 +1914,7 @@ def _make_parser_test(LEXER, PARSER): tree = parser.parse(test_file) self.assertEqual(tree.children, [Token('B', 'A')]) - @unittest.skipIf(LEXER=='dynamic', "%declare/postlex doesn't work with dynamic") + @unittest.skipIf('dynamic' in LEXER, "%declare/postlex doesn't work with dynamic") def test_postlex_indenter(self): class CustomIndenter(Indenter): NL_type = 'NEWLINE' @@ -2221,9 +2222,9 @@ def _make_parser_test(LEXER, PARSER): self.assertEqual(tok, text) self.assertEqual(tok.line, 1) self.assertEqual(tok.column, 1) - if _LEXER != 'dynamic': - self.assertEqual(tok.end_line, 2) - self.assertEqual(tok.end_column, 6) + # if _LEXER != 'dynamic': + self.assertEqual(tok.end_line, 2) + self.assertEqual(tok.end_column, 6) @unittest.skipIf(PARSER=='cyk', "Empty rules") def test_empty_end(self): @@ -2420,7 +2421,6 @@ def _make_parser_test(LEXER, PARSER): globals()[_NAME] = _TestParser __all__.append(_NAME) -# Note: You still have to import them in __main__ for the tests to run _TO_TEST = [ ('standard', 'earley'), ('standard', 'cyk'),