Browse Source

Fixed tests

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.9.0
julienmalard 4 years ago
parent
commit
09e80a5c9e
5 changed files with 29 additions and 45 deletions
  1. +1
    -0
      lark-stubs/lexer.pyi
  2. +7
    -7
      lark/lexer.py
  3. +1
    -1
      tests/__main__.py
  4. +20
    -0
      tests/test_parser.py
  5. +0
    -37
      tests/test_regex.py

+ 1
- 0
lark-stubs/lexer.pyi View File

@@ -107,6 +107,7 @@ class TraditionalLexer(Lexer):
user_callbacks: Dict[str, _Callback]
callback: Dict[str, _Callback]
mres: List[Tuple[REPattern, Dict[int, str]]]
re: ModuleType

def __init__(
self,


+ 7
- 7
lark/lexer.py View File

@@ -251,13 +251,13 @@ def _create_unless(terminals, g_regex_flags, re_):
if strtok.pattern.flags <= retok.pattern.flags:
embedded_strs.add(strtok)
if unless:
callback[retok.name] = UnlessCallback(build_mres(unless, g_regex_flags, match_whole=True))
callback[retok.name] = UnlessCallback(build_mres(unless, g_regex_flags, re_, match_whole=True))

terminals = [t for t in terminals if t not in embedded_strs]
return terminals, callback


def _build_mres(terminals, max_size, g_regex_flags, match_whole):
def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_):
# Python sets an unreasonable group limit (currently 100) in its re module
# Worse, the only way to know we reached it is by catching an AssertionError!
# This function recursively tries less and less groups until it's successful.
@@ -265,17 +265,17 @@ def _build_mres(terminals, max_size, g_regex_flags, match_whole):
mres = []
while terminals:
try:
mre = re.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()+postfix) for t in terminals[:max_size]), g_regex_flags)
mre = re_.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()+postfix) for t in terminals[:max_size]), g_regex_flags)
except AssertionError: # Yes, this is what Python provides us.. :/
return _build_mres(terminals, max_size//2, g_regex_flags, match_whole)
return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_)

# terms_from_name = {t.name: t for t in terminals[:max_size]}
mres.append((mre, {i:n for n,i in mre.groupindex.items()} ))
terminals = terminals[max_size:]
return mres

def build_mres(terminals, g_regex_flags, match_whole=False):
return _build_mres(terminals, len(terminals), g_regex_flags, match_whole)
def build_mres(terminals, g_regex_flags, re_, match_whole=False):
return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_)

def _regexp_has_newline(r):
r"""Expressions that may indicate newlines in a regexp:
@@ -336,7 +336,7 @@ class TraditionalLexer(Lexer):
else:
self.callback[type_] = f

self.mres = build_mres(terminals, g_regex_flags)
self.mres = build_mres(terminals, g_regex_flags, self.re)

def match(self, stream, pos):
for mre, type_from_index in self.mres:


+ 1
- 1
tests/__main__.py View File

@@ -7,7 +7,7 @@ from .test_trees import TestTrees
from .test_tools import TestStandalone
from .test_cache import TestCache
from .test_reconstructor import TestReconstructor
from .test_regex import TestRegex
try:
from .test_nearley.test_nearley import TestNearley
except ImportError:


+ 20
- 0
tests/test_parser.py View File

@@ -1787,6 +1787,26 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(a.line, 1)
self.assertEqual(b.line, 2)

@unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
def test_unicode_class(self):
"Tests that character classes from the `regex` module work correctly."
g = _Lark(r"""
?start: NAME
NAME: ID_START ID_CONTINUE*
ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/
ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}·]+/
""", regex=True)

self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')

@unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
def test_unicode_word(self):
"Tests that a persistent bug in the `re` module works when `regex` is enabled."
g = _Lark(r"""
?start: NAME
NAME: /[\w]+/
""", regex=True)
self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')

_NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
_TestParser.__name__ = _NAME


+ 0
- 37
tests/test_regex.py View File

@@ -1,37 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import

import logging
import sys
import unittest

logging.basicConfig(level=logging.INFO)

from lark.lark import Lark


class TestRegex(unittest.TestCase):
@unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
def test_unicode_class(self):
"Tests that character classes from the `regex` module work correctly."
g = Lark(r"""
?start: NAME
NAME: ID_START ID_CONTINUE*
ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/
ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}·]+/
""", regex=True)

self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')

@unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
def test_unicode_word(self):
"Tests that a persistent bug in the `re` module works when `regex` is enabled."
g = Lark(r"""
?start: NAME
NAME: /[\w]+/
""", regex=True)
self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')


if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save