diff --git a/lark/common.py b/lark/common.py index 1717fe7..f745018 100644 --- a/lark/common.py +++ b/lark/common.py @@ -1,7 +1,8 @@ import re -import sre_parse import sys +from .utils import get_regexp_width + Py36 = (sys.version_info[:2] >= (3, 6)) @@ -95,10 +96,10 @@ class PatternRE(Pattern): @property def min_width(self): - return sre_parse.parse(self.to_regexp()).getwidth()[0] + return get_regexp_width(self.to_regexp())[0] @property def max_width(self): - return sre_parse.parse(self.to_regexp()).getwidth()[1] + return get_regexp_width(self.to_regexp())[1] class TokenDef(object): def __init__(self, name, pattern, priority=1): diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index e8e7ab8..db6cdcc 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -1,5 +1,5 @@ import re -import sre_parse +from .utils import get_regexp_width from .lexer import Lexer, ContextualLexer, Token @@ -77,7 +77,7 @@ class Earley_NoLex: self.regexps = {} for t in lexer_conf.tokens: regexp = t.pattern.to_regexp() - width = sre_parse.parse(regexp).getwidth() + width = get_regexp_width(regexp) if width != (1,1): raise GrammarError('Scanless parsing (lexer=None) requires all tokens to have a width of 1 (terminal %s: %s is %s)' % (sym, regexp, width)) self.regexps[t.name] = re.compile(regexp) @@ -121,7 +121,10 @@ class XEarley: self.regexps = {} for t in lexer_conf.tokens: regexp = t.pattern.to_regexp() - assert sre_parse.parse(regexp).getwidth() + try: + assert get_regexp_width(regexp) + except ValueError: + raise ValueError("Bad regexp in token %s: %s" % (t.name, regexp)) self.regexps[t.name] = re.compile(regexp) def parse(self, text): diff --git a/lark/utils.py b/lark/utils.py index 01c70a1..abe036f 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -112,3 +112,10 @@ except NameError: return -1 +import sre_parse +import sre_constants +def get_regexp_width(regexp): + try: + return sre_parse.parse(regexp).getwidth() + except sre_constants.error: + raise ValueError(regexp)