Browse Source

Fixed main bug in test_token_collision2.

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 years ago
parent
commit
188386cf04
2 changed files with 15 additions and 8 deletions
  1. +13
    -4
      lark/common.py
  2. +2
    -4
      lark/lexer.py

+ 13
- 4
lark/common.py View File

@@ -1,4 +1,5 @@
import re import re
import sre_parse


class GrammarError(Exception): class GrammarError(Exception):
pass pass
@@ -57,9 +58,9 @@ class Pattern(object):


# Pattern Hashing assumes all subclasses have a different priority! # Pattern Hashing assumes all subclasses have a different priority!
def __hash__(self): def __hash__(self):
return hash((self.priority, self.value))
return hash((type(self), self.value))
def __eq__(self, other): def __eq__(self, other):
return self.priority == other.priority and self.value == other.value
return type(self) == type(other) and self.value == other.value


def _get_flags(self): def _get_flags(self):
if self.flags: if self.flags:
@@ -71,13 +72,21 @@ class PatternStr(Pattern):
def to_regexp(self): def to_regexp(self):
return self._get_flags() + re.escape(self.value) return self._get_flags() + re.escape(self.value)


priority = 0
@property
def min_width(self):
return len(self.value)
max_width = min_width


class PatternRE(Pattern): class PatternRE(Pattern):
def to_regexp(self): def to_regexp(self):
return self._get_flags() + self.value return self._get_flags() + self.value


priority = 1
@property
def min_width(self):
return sre_parse.parse(self.to_regexp()).getwidth()[0]
@property
def max_width(self):
return sre_parse.parse(self.to_regexp()).getwidth()[1]


class TokenDef(object): class TokenDef(object):
def __init__(self, name, pattern): def __init__(self, name, pattern):


+ 2
- 4
lark/lexer.py View File

@@ -1,7 +1,6 @@
## Lexer Implementation ## Lexer Implementation


import re import re
import sre_parse


from .utils import Str, classify from .utils import Str, classify
from .common import is_terminal, PatternStr, PatternRE, TokenDef from .common import is_terminal, PatternStr, PatternRE, TokenDef
@@ -120,8 +119,7 @@ class Lexer(object):
except: except:
raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))


width = sre_parse.parse(t.pattern.to_regexp()).getwidth()
if width[0] == 0:
if t.pattern.min_width == 0:
raise LexError("Lexer does not allow zero-width tokens. (%s: %s)" % (t.name, t.pattern)) raise LexError("Lexer does not allow zero-width tokens. (%s: %s)" % (t.name, t.pattern))


token_names = {t.name for t in tokens} token_names = {t.name for t in tokens}
@@ -133,7 +131,7 @@ class Lexer(object):
self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())] self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())]
self.ignore_types = [t for t in ignore] self.ignore_types = [t for t in ignore]


tokens.sort(key=lambda x:(x.pattern.priority, len(x.pattern.value)), reverse=True)
tokens.sort(key=lambda x:x.pattern.max_width, reverse=True)


tokens, self.callback = _create_unless(tokens) tokens, self.callback = _create_unless(tokens)
assert all(self.callback.values()) assert all(self.callback.values())


Loading…
Cancel
Save