diff --git a/lark/common.py b/lark/common.py index 2c940bd..f9b0990 100644 --- a/lark/common.py +++ b/lark/common.py @@ -1,4 +1,5 @@ import re +import sre_parse class GrammarError(Exception): pass @@ -57,9 +58,9 @@ class Pattern(object): # Pattern Hashing assumes all subclasses have a different priority! def __hash__(self): - return hash((self.priority, self.value)) + return hash((type(self), self.value)) def __eq__(self, other): - return self.priority == other.priority and self.value == other.value + return type(self) == type(other) and self.value == other.value def _get_flags(self): if self.flags: @@ -71,13 +72,21 @@ class PatternStr(Pattern): def to_regexp(self): return self._get_flags() + re.escape(self.value) - priority = 0 + @property + def min_width(self): + return len(self.value) + max_width = min_width class PatternRE(Pattern): def to_regexp(self): return self._get_flags() + self.value - priority = 1 + @property + def min_width(self): + return sre_parse.parse(self.to_regexp()).getwidth()[0] + @property + def max_width(self): + return sre_parse.parse(self.to_regexp()).getwidth()[1] class TokenDef(object): def __init__(self, name, pattern): diff --git a/lark/lexer.py b/lark/lexer.py index 86d976f..4e6d5b9 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -1,7 +1,6 @@ ## Lexer Implementation import re -import sre_parse from .utils import Str, classify from .common import is_terminal, PatternStr, PatternRE, TokenDef @@ -120,8 +119,7 @@ class Lexer(object): except: raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) - width = sre_parse.parse(t.pattern.to_regexp()).getwidth() - if width[0] == 0: + if t.pattern.min_width == 0: raise LexError("Lexer does not allow zero-width tokens. (%s: %s)" % (t.name, t.pattern)) token_names = {t.name for t in tokens} @@ -133,7 +131,7 @@ class Lexer(object): self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())] self.ignore_types = [t for t in ignore] - tokens.sort(key=lambda x:(x.pattern.priority, len(x.pattern.value)), reverse=True) + tokens.sort(key=lambda x:x.pattern.max_width, reverse=True) tokens, self.callback = _create_unless(tokens) assert all(self.callback.values())