Fixed main bug in test_token_collision2.

8 years ago · 188386cf04
--- a/lark/common.py
+++ b/lark/common.py
@@ -1,4 +1,5 @@
 import re
 import sre_parse

 class GrammarError(Exception):
    pass
@@ -57,9 +58,9 @@ class Pattern(object):

    # Pattern Hashing assumes all subclasses have a different priority!
    def __hash__(self):
        return hash((self.priority, self.value))
        return hash((type(self), self.value))
    def __eq__(self, other):
        return self.priority == other.priority and self.value == other.value
        return type(self) == type(other) and self.value == other.value

    def _get_flags(self):
        if self.flags:
@@ -71,13 +72,21 @@ class PatternStr(Pattern):
    def to_regexp(self):
        return self._get_flags() + re.escape(self.value)

    priority = 0
    @property
    def min_width(self):
        return len(self.value)
    max_width = min_width

 class PatternRE(Pattern):
    def to_regexp(self):
        return self._get_flags() + self.value

    priority = 1
    @property
    def min_width(self):
        return sre_parse.parse(self.to_regexp()).getwidth()[0]
    @property
    def max_width(self):
        return sre_parse.parse(self.to_regexp()).getwidth()[1]

 class TokenDef(object):
    def __init__(self, name, pattern):
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -1,7 +1,6 @@
 ## Lexer Implementation

 import re
 import sre_parse

 from .utils import Str, classify
 from .common import is_terminal, PatternStr, PatternRE, TokenDef
@@ -120,8 +119,7 @@ class Lexer(object):
            except:
                raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))

            width = sre_parse.parse(t.pattern.to_regexp()).getwidth()
            if width[0] == 0:
            if t.pattern.min_width == 0:
                raise LexError("Lexer does not allow zero-width tokens. (%s: %s)" % (t.name, t.pattern))

        token_names = {t.name for t in tokens}
@@ -133,7 +131,7 @@ class Lexer(object):
        self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())]
        self.ignore_types = [t for t in ignore]

        tokens.sort(key=lambda x:(x.pattern.priority, len(x.pattern.value)), reverse=True)
        tokens.sort(key=lambda x:x.pattern.max_width, reverse=True)

        tokens, self.callback = _create_unless(tokens)
        assert all(self.callback.values())