Ver a proveniência

Merge pull request #878 from MegaIng/better-regex-support

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.12.0
Erez Shinan há 3 anos
committed by GitHub
ascendente
cometimento
1a16989c8a
Não foi encontrada uma chave conhecida para esta assinatura, na base de dados ID da chave GPG: 4AEE18F83AFDEB23
2 ficheiros alterados com 28 adições e 3 eliminações
  1. +11
    -2
      lark/utils.py
  2. +17
    -1
      tests/test_parser.py

+ 11
- 2
lark/utils.py Ver ficheiro

@@ -177,8 +177,17 @@ def get_regexp_width(expr):
try:
return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
except sre_constants.error:
raise ValueError(expr)

if not regex:
raise ValueError(expr)
else:
# sre_parse does not support the new features in regex. To not completely fail in that case,
# we manually test for the most important info (whether the empty string is matched)
c = regex.compile(regexp_final)
if c.match('') is None:
return 1, sre_constants.MAXREPEAT
else:
return 0, sre_constants.MAXREPEAT
###}




+ 17
- 1
tests/test_parser.py Ver ficheiro

@@ -10,7 +10,7 @@ from copy import copy, deepcopy

from lark.utils import Py36, isascii

from lark import Token, Transformer_NonRecursive
from lark import Token, Transformer_NonRecursive, LexError

try:
from cStringIO import StringIO as cStringIO
@@ -2409,6 +2409,22 @@ def _make_parser_test(LEXER, PARSER):
NAME: /[\w]+/
""", regex=True)
self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')
@unittest.skipIf(not regex, "regex not installed")
def test_regex_width_fallback(self):
g = r"""
start: NAME NAME?
NAME: /(?(?=\d)\d+|\w+)/
"""
self.assertRaises((GrammarError, LexError, re.error), _Lark, g)
p = _Lark(g, regex=True)
self.assertEqual(p.parse("123abc"), Tree('start', ['123', 'abc']))
g = r"""
start: NAME NAME?
NAME: /(?(?=\d)\d+|\w*)/
"""
self.assertRaises((GrammarError, LexError, re.error), _Lark, g, regex=True)

@unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment")
def test_parser_interactive_parser(self):


Carregando…
Cancelar
Guardar