Procházet zdrojové kódy

Added fallback to `get_regexp_width` to not fail on unsupported regex features.

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.12.0
MegaIng1 před 3 roky
rodič
revize
2e0e55ea2a
2 změnil soubory, kde provedl 28 přidání a 3 odebrání
  1. +11
    -2
      lark/utils.py
  2. +17
    -1
      tests/test_parser.py

+ 11
- 2
lark/utils.py Zobrazit soubor

@@ -175,8 +175,17 @@ def get_regexp_width(expr):
try:
return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
except sre_constants.error:
raise ValueError(expr)

if not regex:
raise ValueError(expr)
else:
# sre_parse does not support the new features in regex. To not completely fail in that case,
# we manually test for the most important info (whether the empty string is matched)
c = regex.compile(regexp_final)
if c.match('') is None:
return 1, sre_constants.MAXREPEAT
else:
return 0, sre_constants.MAXREPEAT
###}




+ 17
- 1
tests/test_parser.py Zobrazit soubor

@@ -10,7 +10,7 @@ from copy import copy, deepcopy

from lark.utils import Py36, isascii

from lark import Token, Transformer_NonRecursive
from lark import Token, Transformer_NonRecursive, LexError

try:
from cStringIO import StringIO as cStringIO
@@ -2394,6 +2394,22 @@ def _make_parser_test(LEXER, PARSER):
NAME: /[\w]+/
""", regex=True)
self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')
@unittest.skipIf(not regex, "regex not installed")
def test_regex_width_fallback(self):
g = r"""
start: NAME NAME?
NAME: /(?(?=\d)\d+|\w+)/
"""
self.assertRaises((GrammarError, LexError, re.error), _Lark, g)
p = _Lark(g, regex=True)
self.assertEqual(p.parse("123abc"), Tree('start', ['123', 'abc']))
g = r"""
start: NAME NAME?
NAME: /(?(?=\d)\d+|\w*)/
"""
self.assertRaises((GrammarError, LexError, re.error), _Lark, g, regex=True)

@unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment")
def test_parser_interactive_parser(self):


Načítá se…
Zrušit
Uložit