@@ -1,4 +1,5 @@ | |||||
import re | import re | ||||
import sre_parse | |||||
class GrammarError(Exception): | class GrammarError(Exception): | ||||
pass | pass | ||||
@@ -57,9 +58,9 @@ class Pattern(object): | |||||
# Pattern Hashing assumes all subclasses have a different priority! | # Pattern Hashing assumes all subclasses have a different priority! | ||||
def __hash__(self): | def __hash__(self): | ||||
return hash((self.priority, self.value)) | |||||
return hash((type(self), self.value)) | |||||
def __eq__(self, other): | def __eq__(self, other): | ||||
return self.priority == other.priority and self.value == other.value | |||||
return type(self) == type(other) and self.value == other.value | |||||
def _get_flags(self): | def _get_flags(self): | ||||
if self.flags: | if self.flags: | ||||
@@ -71,13 +72,21 @@ class PatternStr(Pattern): | |||||
def to_regexp(self): | def to_regexp(self): | ||||
return self._get_flags() + re.escape(self.value) | return self._get_flags() + re.escape(self.value) | ||||
priority = 0 | |||||
@property | |||||
def min_width(self): | |||||
return len(self.value) | |||||
max_width = min_width | |||||
class PatternRE(Pattern): | class PatternRE(Pattern): | ||||
def to_regexp(self): | def to_regexp(self): | ||||
return self._get_flags() + self.value | return self._get_flags() + self.value | ||||
priority = 1 | |||||
@property | |||||
def min_width(self): | |||||
return sre_parse.parse(self.to_regexp()).getwidth()[0] | |||||
@property | |||||
def max_width(self): | |||||
return sre_parse.parse(self.to_regexp()).getwidth()[1] | |||||
class TokenDef(object): | class TokenDef(object): | ||||
def __init__(self, name, pattern): | def __init__(self, name, pattern): | ||||
@@ -1,7 +1,6 @@ | |||||
## Lexer Implementation | ## Lexer Implementation | ||||
import re | import re | ||||
import sre_parse | |||||
from .utils import Str, classify | from .utils import Str, classify | ||||
from .common import is_terminal, PatternStr, PatternRE, TokenDef | from .common import is_terminal, PatternStr, PatternRE, TokenDef | ||||
@@ -120,8 +119,7 @@ class Lexer(object): | |||||
except: | except: | ||||
raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) | raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) | ||||
width = sre_parse.parse(t.pattern.to_regexp()).getwidth() | |||||
if width[0] == 0: | |||||
if t.pattern.min_width == 0: | |||||
raise LexError("Lexer does not allow zero-width tokens. (%s: %s)" % (t.name, t.pattern)) | raise LexError("Lexer does not allow zero-width tokens. (%s: %s)" % (t.name, t.pattern)) | ||||
token_names = {t.name for t in tokens} | token_names = {t.name for t in tokens} | ||||
@@ -133,7 +131,7 @@ class Lexer(object): | |||||
self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())] | self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())] | ||||
self.ignore_types = [t for t in ignore] | self.ignore_types = [t for t in ignore] | ||||
tokens.sort(key=lambda x:(x.pattern.priority, len(x.pattern.value)), reverse=True) | |||||
tokens.sort(key=lambda x:x.pattern.max_width, reverse=True) | |||||
tokens, self.callback = _create_unless(tokens) | tokens, self.callback = _create_unless(tokens) | ||||
assert all(self.callback.values()) | assert all(self.callback.values()) | ||||