Browse Source

Small fixes

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.9.0
Erez Sh 4 years ago
parent
commit
12d95c37af
2 changed files with 14 additions and 12 deletions
  1. +6
    -6
      lark/utils.py
  2. +8
    -6
      tests/test_parser.py

+ 6
- 6
lark/utils.py View File

@@ -176,20 +176,20 @@ Py36 = (sys.version_info[:2] >= (3, 6))
import sre_parse import sre_parse
import sre_constants import sre_constants
categ_pattern = re.compile(r'\\p{[A-Za-z_]+}') categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
def get_regexp_width(regexp):
def get_regexp_width(expr):
if regex: if regex:
# Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
# a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
# match here below. # match here below.
regexp_final = re.sub(categ_pattern, 'A', regexp)
regexp_final = re.sub(categ_pattern, 'A', expr)
else: else:
if re.search(categ_pattern, regexp):
raise ImportError('`regex` module must be installed in order to use Unicode categories.', regexp)
regexp_final = regexp
if re.search(categ_pattern, expr):
raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
regexp_final = expr
try: try:
return [int(x) for x in sre_parse.parse(regexp_final).getwidth()] return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
except sre_constants.error: except sre_constants.error:
raise ValueError(regexp)
raise ValueError(expr)


###} ###}




+ 8
- 6
tests/test_parser.py View File

@@ -1,10 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import from __future__ import absolute_import


try:
import regex as re
except ImportError:
import re
import re
import unittest import unittest
import logging import logging
import os import os
@@ -23,6 +20,11 @@ from io import (


logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)


try:
import regex
except ImportError:
regex = None

from lark.lark import Lark from lark.lark import Lark
from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
from lark.tree import Tree from lark.tree import Tree
@@ -1787,7 +1789,7 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(a.line, 1) self.assertEqual(a.line, 1)
self.assertEqual(b.line, 2) self.assertEqual(b.line, 2)


@unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
@unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
def test_unicode_class(self): def test_unicode_class(self):
"Tests that character classes from the `regex` module work correctly." "Tests that character classes from the `regex` module work correctly."
g = _Lark(r"""?start: NAME g = _Lark(r"""?start: NAME
@@ -1797,7 +1799,7 @@ def _make_parser_test(LEXER, PARSER):


self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')


@unittest.skipIf(sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
@unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
def test_unicode_word(self): def test_unicode_word(self):
"Tests that a persistent bug in the `re` module works when `regex` is enabled." "Tests that a persistent bug in the `re` module works when `regex` is enabled."
g = _Lark(r"""?start: NAME g = _Lark(r"""?start: NAME


Loading…
Cancel
Save