From 09e80a5c9ef45214340708a48bc1f0edad6efd06 Mon Sep 17 00:00:00 2001 From: julienmalard Date: Sun, 28 Jun 2020 13:46:22 -0400 Subject: [PATCH] Fixed tests --- lark-stubs/lexer.pyi | 1 + lark/lexer.py | 14 +++++++------- tests/__main__.py | 2 +- tests/test_parser.py | 20 ++++++++++++++++++++ tests/test_regex.py | 37 ------------------------------------- 5 files changed, 29 insertions(+), 45 deletions(-) delete mode 100644 tests/test_regex.py diff --git a/lark-stubs/lexer.pyi b/lark-stubs/lexer.pyi index 1ae861d..ae7d68a 100644 --- a/lark-stubs/lexer.pyi +++ b/lark-stubs/lexer.pyi @@ -107,6 +107,7 @@ class TraditionalLexer(Lexer): user_callbacks: Dict[str, _Callback] callback: Dict[str, _Callback] mres: List[Tuple[REPattern, Dict[int, str]]] + re: ModuleType def __init__( self, diff --git a/lark/lexer.py b/lark/lexer.py index 4d5c498..9a0fc65 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -251,13 +251,13 @@ def _create_unless(terminals, g_regex_flags, re_): if strtok.pattern.flags <= retok.pattern.flags: embedded_strs.add(strtok) if unless: - callback[retok.name] = UnlessCallback(build_mres(unless, g_regex_flags, match_whole=True)) + callback[retok.name] = UnlessCallback(build_mres(unless, g_regex_flags, re_, match_whole=True)) terminals = [t for t in terminals if t not in embedded_strs] return terminals, callback -def _build_mres(terminals, max_size, g_regex_flags, match_whole): +def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_): # Python sets an unreasonable group limit (currently 100) in its re module # Worse, the only way to know we reached it is by catching an AssertionError! # This function recursively tries less and less groups until it's successful. @@ -265,17 +265,17 @@ def _build_mres(terminals, max_size, g_regex_flags, match_whole): mres = [] while terminals: try: - mre = re.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()+postfix) for t in terminals[:max_size]), g_regex_flags) + mre = re_.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()+postfix) for t in terminals[:max_size]), g_regex_flags) except AssertionError: # Yes, this is what Python provides us.. :/ - return _build_mres(terminals, max_size//2, g_regex_flags, match_whole) + return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_) # terms_from_name = {t.name: t for t in terminals[:max_size]} mres.append((mre, {i:n for n,i in mre.groupindex.items()} )) terminals = terminals[max_size:] return mres -def build_mres(terminals, g_regex_flags, match_whole=False): - return _build_mres(terminals, len(terminals), g_regex_flags, match_whole) +def build_mres(terminals, g_regex_flags, re_, match_whole=False): + return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_) def _regexp_has_newline(r): r"""Expressions that may indicate newlines in a regexp: @@ -336,7 +336,7 @@ class TraditionalLexer(Lexer): else: self.callback[type_] = f - self.mres = build_mres(terminals, g_regex_flags) + self.mres = build_mres(terminals, g_regex_flags, self.re) def match(self, stream, pos): for mre, type_from_index in self.mres: diff --git a/tests/__main__.py b/tests/__main__.py index 6b8f513..cb26eb4 100644 --- a/tests/__main__.py +++ b/tests/__main__.py @@ -7,7 +7,7 @@ from .test_trees import TestTrees from .test_tools import TestStandalone from .test_cache import TestCache from .test_reconstructor import TestReconstructor -from .test_regex import TestRegex + try: from .test_nearley.test_nearley import TestNearley except ImportError: diff --git a/tests/test_parser.py b/tests/test_parser.py index f8f37df..ac84c61 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1787,6 +1787,26 @@ def _make_parser_test(LEXER, PARSER): self.assertEqual(a.line, 1) self.assertEqual(b.line, 2) + @unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.') + def test_unicode_class(self): + "Tests that character classes from the `regex` module work correctly." + g = _Lark(r""" + ?start: NAME + NAME: ID_START ID_CONTINUE* + ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/ + ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}·]+/ + """, regex=True) + + self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') + + @unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.') + def test_unicode_word(self): + "Tests that a persistent bug in the `re` module works when `regex` is enabled." + g = _Lark(r""" + ?start: NAME + NAME: /[\w]+/ + """, regex=True) + self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() _TestParser.__name__ = _NAME diff --git a/tests/test_regex.py b/tests/test_regex.py deleted file mode 100644 index d20a8bf..0000000 --- a/tests/test_regex.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import - -import logging -import sys -import unittest - -logging.basicConfig(level=logging.INFO) - -from lark.lark import Lark - - -class TestRegex(unittest.TestCase): - @unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.') - def test_unicode_class(self): - "Tests that character classes from the `regex` module work correctly." - g = Lark(r""" - ?start: NAME - NAME: ID_START ID_CONTINUE* - ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/ - ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}·]+/ - """, regex=True) - - self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') - - @unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.') - def test_unicode_word(self): - "Tests that a persistent bug in the `re` module works when `regex` is enabled." - g = Lark(r""" - ?start: NAME - NAME: /[\w]+/ - """, regex=True) - self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') - - -if __name__ == '__main__': - unittest.main()