Browse Source

Fixed placeholder code, should work as expected (Issue #285)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.66
Erez Shinan 6 years ago
parent
commit
467483553b
3 changed files with 81 additions and 40 deletions
  1. +5
    -3
      lark/load_grammar.py
  2. +60
    -28
      lark/parse_tree_builder.py
  3. +16
    -9
      tests/test_parser.py

+ 5
- 3
lark/load_grammar.py View File

@@ -179,6 +179,8 @@ class EBNF_to_BNF(Transformer_InPlace):
if isinstance(rule, Terminal) and rule.filter_out and not (
self.rule_options and self.rule_options.keep_all_tokens):
empty = ST('expansion', [])
elif isinstance(rule, NonTerminal) and rule.name.startswith('_'):
empty = ST('expansion', [])
else:
empty = _EMPTY
return ST('expansions', [rule, empty])
@@ -506,11 +508,11 @@ class Grammar:
if alias and name.startswith('_'):
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias))

empty_indices = [i for i, x in enumerate(expansion) if x==_EMPTY]
if empty_indices:
empty_indices = [x==_EMPTY for i, x in enumerate(expansion)]
if any(empty_indices):
assert options
exp_options = copy(options)
exp_options.empty_indices = len(expansion), empty_indices
exp_options.empty_indices = empty_indices
expansion = [x for x in expansion if x!=_EMPTY]
else:
exp_options = options


+ 60
- 28
lark/parse_tree_builder.py View File

@@ -17,24 +17,6 @@ class ExpandSingleChild:
else:
return self.node_builder(children)

class AddMaybePlaceholder:
def __init__(self, empty_indices, node_builder):
self.node_builder = node_builder
self.empty_indices = empty_indices

def __call__(self, children):
t = self.node_builder(children)
if self.empty_indices:
exp_len, empty_indices = self.empty_indices
# Calculate offset to handle repetition correctly
# e.g. ("a" "b"?)+
# For non-repetitive rules, offset should be 0
offset = len(t.children) - (exp_len - len(empty_indices))
for i in empty_indices:
t.children.insert(i + offset, None)
return t


class PropagatePositions:
def __init__(self, node_builder):
self.node_builder = node_builder
@@ -77,23 +59,54 @@ class PropagatePositions:


class ChildFilter:
def __init__(self, to_include, node_builder):
def __init__(self, to_include, append_none, node_builder):
self.node_builder = node_builder
self.to_include = to_include
self.append_none = append_none

def __call__(self, children):
filtered = []
for i, to_expand in self.to_include:

for i, to_expand, add_none in self.to_include:
if add_none:
filtered += [None] * add_none
if to_expand:
filtered += children[i].children
else:
filtered.append(children[i])

if self.append_none:
filtered += [None] * self.append_none

return self.node_builder(filtered)

class ChildFilterLALR(ChildFilter):
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"

def __call__(self, children):
filtered = []
for i, to_expand, add_none in self.to_include:
if add_none:
filtered += [None] * add_none
if to_expand:
if filtered:
filtered += children[i].children
else: # Optimize for left-recursion
filtered = children[i].children
else:
filtered.append(children[i])

if self.append_none:
filtered += [None] * self.append_none

return self.node_builder(filtered)

class ChildFilterLALR_NoPlaceholders(ChildFilter):
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
def __init__(self, to_include, node_builder):
self.node_builder = node_builder
self.to_include = to_include

def __call__(self, children):
filtered = []
for i, to_expand in self.to_include:
@@ -110,12 +123,32 @@ class ChildFilterLALR(ChildFilter):
def _should_expand(sym):
return not sym.is_term and sym.name.startswith('_')

def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous):
to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion)
if keep_all_tokens or not (sym.is_term and sym.filter_out)]

if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include):
return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include)
def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices):
# Prepare empty_indices as: How many Nones to insert at each index?
if _empty_indices:
assert _empty_indices.count(False) == len(expansion)
s = ''.join(str(int(b)) for b in _empty_indices)
empty_indices = [len(ones) for ones in s.split('0')]
assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
else:
empty_indices = [0] * (len(expansion)+1)

to_include = []
nones_to_add = 0
for i, sym in enumerate(expansion):
nones_to_add += empty_indices[i]
if keep_all_tokens or not (sym.is_term and sym.filter_out):
to_include.append((i, _should_expand(sym), nones_to_add))
nones_to_add = 0

nones_to_add += empty_indices[len(expansion)]

if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include):
if _empty_indices or ambiguous:
return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
else:
# LALR without placeholders
return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])


class Callback(object):
@@ -150,8 +183,7 @@ class ParseTreeBuilder:

wrapper_chain = filter(None, [
(expand_single_child and not rule.alias) and ExpandSingleChild,
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous),
self.maybe_placeholders and partial(AddMaybePlaceholder, options.empty_indices if options else None),
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders and options else None),
self.propagate_positions and PropagatePositions,
])



+ 16
- 9
tests/test_parser.py View File

@@ -1035,7 +1035,7 @@ def _make_parser_test(LEXER, PARSER):
bb_.1: "bb"
"""

l = _Lark(grammar, ambiguity='resolve__antiscore_sum')
l = Lark(grammar, ambiguity='resolve__antiscore_sum')
res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')

@@ -1048,7 +1048,7 @@ def _make_parser_test(LEXER, PARSER):
bb_: "bb"
"""

l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
l = Lark(grammar, ambiguity='resolve__antiscore_sum')
res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'indirection')

@@ -1061,7 +1061,7 @@ def _make_parser_test(LEXER, PARSER):
bb_.3: "bb"
"""

l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
l = Lark(grammar, ambiguity='resolve__antiscore_sum')
res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')

@@ -1074,7 +1074,7 @@ def _make_parser_test(LEXER, PARSER):
bb_.3: "bb"
"""

l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
l = Lark(grammar, ambiguity='resolve__antiscore_sum')
res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'indirection')

@@ -1248,18 +1248,19 @@ def _make_parser_test(LEXER, PARSER):
res = p.parse('B')
self.assertEqual(len(res.children), 3)

@unittest.skipIf(PARSER=='cyk', "Empty rules")
def test_maybe_placeholders(self):
# Anonymous tokens shouldn't count
p = Lark("""start: "a"? "b"? "c"? """, maybe_placeholders=True)
p = _Lark("""start: "a"? "b"? "c"? """, maybe_placeholders=True)
self.assertEqual(p.parse("").children, [])

# Anonymous tokens shouldn't count, other constructs should
p = Lark("""start: A? "b"? _c?
p = _Lark("""start: A? "b"? _c?
A: "a"
_c: "c" """, maybe_placeholders=True)
self.assertEqual(p.parse("").children, [None, None])
self.assertEqual(p.parse("").children, [None])

p = Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True)
p = _Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True)
self.assertEqual(p.parse("").children, [None, None, None])
self.assertEqual(p.parse("a").children, ['a', None, None])
self.assertEqual(p.parse("b").children, [None, 'b', None])
@@ -1269,7 +1270,7 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])

p = Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True)
p = _Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True)
self.assertEqual(p.parse("b").children, [None, 'b', None])
self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
@@ -1280,6 +1281,12 @@ def _make_parser_test(LEXER, PARSER):
'a', 'b', 'c',
None, 'b', None])

p = _Lark("""!start: "a"? "c"? "b"+ "a"? "d"? """, maybe_placeholders=True)
self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])
self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None])



_NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()


Loading…
Cancel
Save