From 09e80a5c9ef45214340708a48bc1f0edad6efd06 Mon Sep 17 00:00:00 2001
From: julienmalard <julien.malard@mail.mcgill.ca>
Date: Sun, 28 Jun 2020 13:46:22 -0400
Subject: [PATCH] Fixed tests

---
 lark-stubs/lexer.pyi |  1 +
 lark/lexer.py        | 14 +++++++-------
 tests/__main__.py    |  2 +-
 tests/test_parser.py | 20 ++++++++++++++++++++
 tests/test_regex.py  | 37 -------------------------------------
 5 files changed, 29 insertions(+), 45 deletions(-)
 delete mode 100644 tests/test_regex.py

diff --git a/lark-stubs/lexer.pyi b/lark-stubs/lexer.pyi
index 1ae861d..ae7d68a 100644
--- a/lark-stubs/lexer.pyi
+++ b/lark-stubs/lexer.pyi
@@ -107,6 +107,7 @@ class TraditionalLexer(Lexer):
     user_callbacks: Dict[str, _Callback]
     callback: Dict[str, _Callback]
     mres: List[Tuple[REPattern, Dict[int, str]]]
+    re: ModuleType
 
     def __init__(
         self,
diff --git a/lark/lexer.py b/lark/lexer.py
index 4d5c498..9a0fc65 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -251,13 +251,13 @@ def _create_unless(terminals, g_regex_flags, re_):
                 if strtok.pattern.flags <= retok.pattern.flags:
                     embedded_strs.add(strtok)
         if unless:
-            callback[retok.name] = UnlessCallback(build_mres(unless, g_regex_flags, match_whole=True))
+            callback[retok.name] = UnlessCallback(build_mres(unless, g_regex_flags, re_, match_whole=True))
 
     terminals = [t for t in terminals if t not in embedded_strs]
     return terminals, callback
 
 
-def _build_mres(terminals, max_size, g_regex_flags, match_whole):
+def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_):
     # Python sets an unreasonable group limit (currently 100) in its re module
     # Worse, the only way to know we reached it is by catching an AssertionError!
     # This function recursively tries less and less groups until it's successful.
@@ -265,17 +265,17 @@ def _build_mres(terminals, max_size, g_regex_flags, match_whole):
     mres = []
     while terminals:
         try:
-            mre = re.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()+postfix) for t in terminals[:max_size]), g_regex_flags)
+            mre = re_.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()+postfix) for t in terminals[:max_size]), g_regex_flags)
         except AssertionError:  # Yes, this is what Python provides us.. :/
-            return _build_mres(terminals, max_size//2, g_regex_flags, match_whole)
+            return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_)
 
         # terms_from_name = {t.name: t for t in terminals[:max_size]}
         mres.append((mre, {i:n for n,i in mre.groupindex.items()} ))
         terminals = terminals[max_size:]
     return mres
 
-def build_mres(terminals, g_regex_flags, match_whole=False):
-    return _build_mres(terminals, len(terminals), g_regex_flags, match_whole)
+def build_mres(terminals, g_regex_flags, re_, match_whole=False):
+    return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_)
 
 def _regexp_has_newline(r):
     r"""Expressions that may indicate newlines in a regexp:
@@ -336,7 +336,7 @@ class TraditionalLexer(Lexer):
             else:
                 self.callback[type_] = f
 
-        self.mres = build_mres(terminals, g_regex_flags)
+        self.mres = build_mres(terminals, g_regex_flags, self.re)
 
     def match(self, stream, pos):
         for mre, type_from_index in self.mres:
diff --git a/tests/__main__.py b/tests/__main__.py
index 6b8f513..cb26eb4 100644
--- a/tests/__main__.py
+++ b/tests/__main__.py
@@ -7,7 +7,7 @@ from .test_trees import TestTrees
 from .test_tools import TestStandalone
 from .test_cache import TestCache
 from .test_reconstructor import TestReconstructor
-from .test_regex import TestRegex
+
 try:
     from .test_nearley.test_nearley import TestNearley
 except ImportError:
diff --git a/tests/test_parser.py b/tests/test_parser.py
index f8f37df..ac84c61 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1787,6 +1787,26 @@ def _make_parser_test(LEXER, PARSER):
                 self.assertEqual(a.line, 1)
                 self.assertEqual(b.line, 2)
 
+        @unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
+        def test_unicode_class(self):
+            "Tests that character classes from the `regex` module work correctly."
+            g = _Lark(r"""
+                            ?start: NAME
+                            NAME: ID_START ID_CONTINUE*
+                            ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/
+                            ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}·]+/
+                        """, regex=True)
+
+            self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')
+
+        @unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
+        def test_unicode_word(self):
+            "Tests that a persistent bug in the `re` module works when `regex` is enabled."
+            g = _Lark(r"""
+                            ?start: NAME
+                            NAME: /[\w]+/
+                        """, regex=True)
+            self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')
 
     _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
     _TestParser.__name__ = _NAME
diff --git a/tests/test_regex.py b/tests/test_regex.py
deleted file mode 100644
index d20a8bf..0000000
--- a/tests/test_regex.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
-import logging
-import sys
-import unittest
-
-logging.basicConfig(level=logging.INFO)
-
-from lark.lark import Lark
-
-
-class TestRegex(unittest.TestCase):
-    @unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
-    def test_unicode_class(self):
-        "Tests that character classes from the `regex` module work correctly."
-        g = Lark(r"""
-                    ?start: NAME
-                    NAME: ID_START ID_CONTINUE*
-                    ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/
-                    ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}·]+/
-                """, regex=True)
-
-        self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')
-
-    @unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
-    def test_unicode_word(self):
-        "Tests that a persistent bug in the `re` module works when `regex` is enabled."
-        g = Lark(r"""
-                    ?start: NAME
-                    NAME: /[\w]+/
-                """, regex=True)
-        self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')
-
-
-if __name__ == '__main__':
-    unittest.main()