Browse Source

Small fixes

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.9.0
Erez Sh 4 years ago
parent
commit
12d95c37af
2 changed files with 14 additions and 12 deletions
  1. +6
    -6
      lark/utils.py
  2. +8
    -6
      tests/test_parser.py

+ 6
- 6
lark/utils.py View File

@@ -176,20 +176,20 @@ Py36 = (sys.version_info[:2] >= (3, 6))
import sre_parse
import sre_constants
categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
def get_regexp_width(regexp):
def get_regexp_width(expr):
if regex:
# Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
# a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
# match here below.
regexp_final = re.sub(categ_pattern, 'A', regexp)
regexp_final = re.sub(categ_pattern, 'A', expr)
else:
if re.search(categ_pattern, regexp):
raise ImportError('`regex` module must be installed in order to use Unicode categories.', regexp)
regexp_final = regexp
if re.search(categ_pattern, expr):
raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
regexp_final = expr
try:
return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
except sre_constants.error:
raise ValueError(regexp)
raise ValueError(expr)

###}



+ 8
- 6
tests/test_parser.py View File

@@ -1,10 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import

try:
import regex as re
except ImportError:
import re
import re
import unittest
import logging
import os
@@ -23,6 +20,11 @@ from io import (

logging.basicConfig(level=logging.INFO)

try:
import regex
except ImportError:
regex = None

from lark.lark import Lark
from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
from lark.tree import Tree
@@ -1787,7 +1789,7 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(a.line, 1)
self.assertEqual(b.line, 2)

@unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
@unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
def test_unicode_class(self):
"Tests that character classes from the `regex` module work correctly."
g = _Lark(r"""?start: NAME
@@ -1797,7 +1799,7 @@ def _make_parser_test(LEXER, PARSER):

self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')

@unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
@unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
def test_unicode_word(self):
"Tests that a persistent bug in the `re` module works when `regex` is enabled."
g = _Lark(r"""?start: NAME


Loading…
Cancel
Save