| @@ -49,9 +49,15 @@ def _regexp_has_newline(r): | |||
| return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r) | |||
| def _create_unless_callback(strs): | |||
| mres = build_mres(strs, match_whole=True) | |||
| def unless_callback(t): | |||
| if t in strs: | |||
| t.type = strs[t] | |||
| # if t in strs: | |||
| # t.type = strs[t] | |||
| for mre, type_from_index in mres: | |||
| m = mre.match(t.value) | |||
| if m: | |||
| value = m.group(0) | |||
| t.type = type_from_index[m.lastindex] | |||
| return t | |||
| return unless_callback | |||
| @@ -61,13 +67,14 @@ def _create_unless(tokens): | |||
| embedded_strs = set() | |||
| callback = {} | |||
| for retok in tokens_by_type.get(PatternRE, []): | |||
| unless = {} | |||
| unless = [] # {} | |||
| for strtok in tokens_by_type.get(PatternStr, []): | |||
| s = strtok.pattern.value | |||
| m = re.match(retok.pattern.value, s) | |||
| if m and m.group(0) == s: | |||
| embedded_strs.add(strtok.name) | |||
| unless[s] = strtok.name | |||
| #unless[s] = strtok.name | |||
| unless.append(strtok) | |||
| if unless: | |||
| callback[retok.name] = _create_unless_callback(unless) | |||
| @@ -75,6 +82,26 @@ def _create_unless(tokens): | |||
| return tokens, callback | |||
| def _build_mres(tokens, max_size, match_whole): | |||
| # Python sets an unreasonable group limit (currently 100) in its re module | |||
| # Worse, the only way to know we reached it is by catching an AssertionError! | |||
| # This function recursively tries less and less groups until it's successful. | |||
| postfix = '$' if match_whole else '' | |||
| mres = [] | |||
| while tokens: | |||
| try: | |||
| mre = re.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()) for t in tokens[:max_size])+postfix) | |||
| except AssertionError: # Yes, this is what Python provides us.. :/ | |||
| return _build_mres(tokens, max_size//2, match_whole) | |||
| mres.append((mre, {i:n for n,i in mre.groupindex.items()} )) | |||
| tokens = tokens[max_size:] | |||
| return mres | |||
| def build_mres(tokens, match_whole=False): | |||
| return _build_mres(tokens, len(tokens), match_whole) | |||
| class Lexer(object): | |||
| def __init__(self, tokens, ignore=()): | |||
| assert all(isinstance(t, TokenDef) for t in tokens), tokens | |||
| @@ -110,23 +137,8 @@ class Lexer(object): | |||
| self.tokens = tokens | |||
| self.mres = self._build_mres(tokens, len(tokens)) | |||
| def _build_mres(self, tokens, max_size): | |||
| # Python sets an unreasonable group limit (currently 100) in its re module | |||
| # Worse, the only way to know we reached it is by catching an AssertionError! | |||
| # This function recursively tries less and less groups until it's successful. | |||
| mres = [] | |||
| while tokens: | |||
| try: | |||
| mre = re.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()) for t in tokens[:max_size])) | |||
| except AssertionError: # Yes, this is what Python provides us.. :/ | |||
| return self._build_mres(tokens, max_size//2) | |||
| self.mres = build_mres(tokens) | |||
| mres.append((mre, {i:n for n,i in mre.groupindex.items()} )) | |||
| tokens = tokens[max_size:] | |||
| return mres | |||
| def lex(self, stream): | |||
| lex_pos = 0 | |||