From 9d6b496f3e23ec9d4cac42b3dd4ee19de11f753d Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Thu, 9 Mar 2017 18:41:31 +0200 Subject: [PATCH] Fixed bug in unless: Now uses regexp, not string matching --- lark/lexer.py | 52 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/lark/lexer.py b/lark/lexer.py index d0339d9..92decb9 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -49,9 +49,15 @@ def _regexp_has_newline(r): return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r) def _create_unless_callback(strs): + mres = build_mres(strs, match_whole=True) def unless_callback(t): - if t in strs: - t.type = strs[t] + # if t in strs: + # t.type = strs[t] + for mre, type_from_index in mres: + m = mre.match(t.value) + if m: + value = m.group(0) + t.type = type_from_index[m.lastindex] return t return unless_callback @@ -61,13 +67,14 @@ def _create_unless(tokens): embedded_strs = set() callback = {} for retok in tokens_by_type.get(PatternRE, []): - unless = {} + unless = [] # {} for strtok in tokens_by_type.get(PatternStr, []): s = strtok.pattern.value m = re.match(retok.pattern.value, s) if m and m.group(0) == s: embedded_strs.add(strtok.name) - unless[s] = strtok.name + #unless[s] = strtok.name + unless.append(strtok) if unless: callback[retok.name] = _create_unless_callback(unless) @@ -75,6 +82,26 @@ def _create_unless(tokens): return tokens, callback +def _build_mres(tokens, max_size, match_whole): + # Python sets an unreasonable group limit (currently 100) in its re module + # Worse, the only way to know we reached it is by catching an AssertionError! + # This function recursively tries less and less groups until it's successful. + postfix = '$' if match_whole else '' + mres = [] + while tokens: + try: + mre = re.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()) for t in tokens[:max_size])+postfix) + except AssertionError: # Yes, this is what Python provides us.. :/ + return _build_mres(tokens, max_size//2, match_whole) + + mres.append((mre, {i:n for n,i in mre.groupindex.items()} )) + tokens = tokens[max_size:] + return mres + +def build_mres(tokens, match_whole=False): + return _build_mres(tokens, len(tokens), match_whole) + + class Lexer(object): def __init__(self, tokens, ignore=()): assert all(isinstance(t, TokenDef) for t in tokens), tokens @@ -110,23 +137,8 @@ class Lexer(object): self.tokens = tokens - self.mres = self._build_mres(tokens, len(tokens)) - - - def _build_mres(self, tokens, max_size): - # Python sets an unreasonable group limit (currently 100) in its re module - # Worse, the only way to know we reached it is by catching an AssertionError! - # This function recursively tries less and less groups until it's successful. - mres = [] - while tokens: - try: - mre = re.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()) for t in tokens[:max_size])) - except AssertionError: # Yes, this is what Python provides us.. :/ - return self._build_mres(tokens, max_size//2) + self.mres = build_mres(tokens) - mres.append((mre, {i:n for n,i in mre.groupindex.items()} )) - tokens = tokens[max_size:] - return mres def lex(self, stream): lex_pos = 0