瀏覽代碼

More flags work

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 年之前
父節點
當前提交
794a1c4965
共有 4 個檔案被更改,包括 32 行新增9 行删除
  1. +1
    -1
      lark/common.py
  2. +9
    -6
      lark/lexer.py
  3. +5
    -2
      lark/load_grammar.py
  4. +17
    -0
      tests/test_parser.py

+ 1
- 1
lark/common.py 查看文件

@@ -53,7 +53,7 @@ class Pattern(object):
self.flags = flags

def __repr__(self):
return repr(self.value)
return repr(self._get_flags() + self.value)

# Pattern Hashing assumes all subclasses have a different priority!
def __hash__(self):


+ 9
- 6
lark/lexer.py 查看文件

@@ -58,6 +58,7 @@ def _create_unless_callback(strs):
if m:
value = m.group(0)
t.type = type_from_index[m.lastindex]
break
return t
return unless_callback

@@ -65,20 +66,22 @@ def _create_unless(tokens):
tokens_by_type = classify(tokens, lambda t: type(t.pattern))
assert len(tokens_by_type) <= 2, tokens_by_type.keys()
embedded_strs = set()
delayed_strs = []
callback = {}
for retok in tokens_by_type.get(PatternRE, []):
unless = [] # {}
for strtok in tokens_by_type.get(PatternStr, []):
s = strtok.pattern.value
m = re.match(retok.pattern.value, s)
m = re.match(retok.pattern.to_regexp(), s)
if m and m.group(0) == s:
if strtok.pattern.flags:
delayed_strs.append(strtok)
embedded_strs.add(strtok.name)
#unless[s] = strtok.name
unless.append(strtok)
if unless:
callback[retok.name] = _create_unless_callback(unless)

tokens = [t for t in tokens if t.name not in embedded_strs]
tokens = [t for t in tokens if t.name not in embedded_strs] + delayed_strs
return tokens, callback


@@ -90,7 +93,7 @@ def _build_mres(tokens, max_size, match_whole):
mres = []
while tokens:
try:
mre = re.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()) for t in tokens[:max_size])+postfix)
mre = re.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()+postfix) for t in tokens[:max_size]))
except AssertionError: # Yes, this is what Python provides us.. :/
return _build_mres(tokens, max_size//2, match_whole)

@@ -130,11 +133,11 @@ class Lexer(object):
self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())]
self.ignore_types = [t for t in ignore]

tokens.sort(key=lambda x:(x.pattern.priority, len(x.pattern.value)), reverse=True)

tokens, self.callback = _create_unless(tokens)
assert all(self.callback.values())

tokens.sort(key=lambda x:(x.pattern.priority, len(x.pattern.value)), reverse=True)

self.tokens = tokens

self.mres = build_mres(tokens)


+ 5
- 2
lark/load_grammar.py 查看文件

@@ -324,16 +324,19 @@ class TokenTreeToPattern(Transformer):
def expansion(self, items):
if len(items) == 1:
return items[0]
return PatternRE(''.join(i.to_regexp() for i in items))
if len(set(i.flags for i in items)) > 1:
raise GrammarError("Lark doesn't support joining tokens with conflicting flags!")
return PatternRE(''.join(i.to_regexp() for i in items), items[0].flags)

def expansions(self, exps):
if len(exps) == 1:
return exps[0]
assert all(i.flags is None for i in exps)
return PatternRE('(?:%s)' % ('|'.join(i.to_regexp() for i in exps)))

def expr(self, args):
inner, op = args
return PatternRE('(?:%s)%s' % (inner.to_regexp(), op))
return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags)


def interleave(l, item):


+ 17
- 0
tests/test_parser.py 查看文件

@@ -525,6 +525,23 @@ def _make_parser_test(LEXER, PARSER):
"""
self.assertRaises(GrammarError, _Lark, g)

g = """start: NAME "," "a"
NAME: /[a-z_]/i /[a-z0-9_]/i*
"""
l = _Lark(g)
tree = l.parse('ab,a')
self.assertEqual(tree.children, ['ab'])
tree = l.parse('AB,a')
self.assertEqual(tree.children, ['AB'])

def test_token_flags2(self):
g = """!start: ("a"i | /a/ /b/?)+
"""
l = _Lark(g)
tree = l.parse('aA')
self.assertEqual(tree.children, ['a', 'A'])







Loading…
取消
儲存