Small fixes

4 years ago · 12d95c37af
--- a/lark/utils.py
+++ b/lark/utils.py
@@ -176,20 +176,20 @@ Py36 = (sys.version_info[:2] >= (3, 6))
 import sre_parse
 import sre_constants
 categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
 def get_regexp_width(regexp):
 def get_regexp_width(expr):
    if regex:
        # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
        # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
        # match here below.
        regexp_final = re.sub(categ_pattern, 'A', regexp)
        regexp_final = re.sub(categ_pattern, 'A', expr)
    else:
        if re.search(categ_pattern, regexp):
            raise ImportError('`regex` module must be installed in order to use Unicode categories.', regexp)
        regexp_final = regexp
        if re.search(categ_pattern, expr):
            raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
        regexp_final = expr
    try:
        return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
    except sre_constants.error:
        raise ValueError(regexp)
        raise ValueError(expr)

 ###}

--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1,10 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import

 try:
    import regex as re
 except ImportError:
    import re
 import re
 import unittest
 import logging
 import os
@@ -23,6 +20,11 @@ from io import (

 logging.basicConfig(level=logging.INFO)

 try:
    import regex
 except ImportError:
    regex = None

 from lark.lark import Lark
 from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
 from lark.tree import Tree
@@ -1787,7 +1789,7 @@ def _make_parser_test(LEXER, PARSER):
                self.assertEqual(a.line, 1)
                self.assertEqual(b.line, 2)

        @unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
        @unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
        def test_unicode_class(self):
            "Tests that character classes from the `regex` module work correctly."
            g = _Lark(r"""?start: NAME
@@ -1797,7 +1799,7 @@ def _make_parser_test(LEXER, PARSER):

            self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')

        @unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
        @unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
        def test_unicode_word(self):
            "Tests that a persistent bug in the `re` module works when `regex` is enabled."
            g = _Lark(r"""?start: NAME