@@ -179,6 +179,8 @@ class EBNF_to_BNF(Transformer_InPlace): | |||
if isinstance(rule, Terminal) and rule.filter_out and not ( | |||
self.rule_options and self.rule_options.keep_all_tokens): | |||
empty = ST('expansion', []) | |||
elif isinstance(rule, NonTerminal) and rule.name.startswith('_'): | |||
empty = ST('expansion', []) | |||
else: | |||
empty = _EMPTY | |||
return ST('expansions', [rule, empty]) | |||
@@ -506,11 +508,11 @@ class Grammar: | |||
if alias and name.startswith('_'): | |||
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | |||
empty_indices = [i for i, x in enumerate(expansion) if x==_EMPTY] | |||
if empty_indices: | |||
empty_indices = [x==_EMPTY for i, x in enumerate(expansion)] | |||
if any(empty_indices): | |||
assert options | |||
exp_options = copy(options) | |||
exp_options.empty_indices = len(expansion), empty_indices | |||
exp_options.empty_indices = empty_indices | |||
expansion = [x for x in expansion if x!=_EMPTY] | |||
else: | |||
exp_options = options | |||
@@ -17,24 +17,6 @@ class ExpandSingleChild: | |||
else: | |||
return self.node_builder(children) | |||
class AddMaybePlaceholder: | |||
def __init__(self, empty_indices, node_builder): | |||
self.node_builder = node_builder | |||
self.empty_indices = empty_indices | |||
def __call__(self, children): | |||
t = self.node_builder(children) | |||
if self.empty_indices: | |||
exp_len, empty_indices = self.empty_indices | |||
# Calculate offset to handle repetition correctly | |||
# e.g. ("a" "b"?)+ | |||
# For non-repetitive rules, offset should be 0 | |||
offset = len(t.children) - (exp_len - len(empty_indices)) | |||
for i in empty_indices: | |||
t.children.insert(i + offset, None) | |||
return t | |||
class PropagatePositions: | |||
def __init__(self, node_builder): | |||
self.node_builder = node_builder | |||
@@ -77,23 +59,54 @@ class PropagatePositions: | |||
class ChildFilter: | |||
def __init__(self, to_include, node_builder): | |||
def __init__(self, to_include, append_none, node_builder): | |||
self.node_builder = node_builder | |||
self.to_include = to_include | |||
self.append_none = append_none | |||
def __call__(self, children): | |||
filtered = [] | |||
for i, to_expand in self.to_include: | |||
for i, to_expand, add_none in self.to_include: | |||
if add_none: | |||
filtered += [None] * add_none | |||
if to_expand: | |||
filtered += children[i].children | |||
else: | |||
filtered.append(children[i]) | |||
if self.append_none: | |||
filtered += [None] * self.append_none | |||
return self.node_builder(filtered) | |||
class ChildFilterLALR(ChildFilter): | |||
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | |||
def __call__(self, children): | |||
filtered = [] | |||
for i, to_expand, add_none in self.to_include: | |||
if add_none: | |||
filtered += [None] * add_none | |||
if to_expand: | |||
if filtered: | |||
filtered += children[i].children | |||
else: # Optimize for left-recursion | |||
filtered = children[i].children | |||
else: | |||
filtered.append(children[i]) | |||
if self.append_none: | |||
filtered += [None] * self.append_none | |||
return self.node_builder(filtered) | |||
class ChildFilterLALR_NoPlaceholders(ChildFilter): | |||
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | |||
def __init__(self, to_include, node_builder): | |||
self.node_builder = node_builder | |||
self.to_include = to_include | |||
def __call__(self, children): | |||
filtered = [] | |||
for i, to_expand in self.to_include: | |||
@@ -110,12 +123,32 @@ class ChildFilterLALR(ChildFilter): | |||
def _should_expand(sym): | |||
return not sym.is_term and sym.name.startswith('_') | |||
def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous): | |||
to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) | |||
if keep_all_tokens or not (sym.is_term and sym.filter_out)] | |||
if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): | |||
return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include) | |||
def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): | |||
# Prepare empty_indices as: How many Nones to insert at each index? | |||
if _empty_indices: | |||
assert _empty_indices.count(False) == len(expansion) | |||
s = ''.join(str(int(b)) for b in _empty_indices) | |||
empty_indices = [len(ones) for ones in s.split('0')] | |||
assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion)) | |||
else: | |||
empty_indices = [0] * (len(expansion)+1) | |||
to_include = [] | |||
nones_to_add = 0 | |||
for i, sym in enumerate(expansion): | |||
nones_to_add += empty_indices[i] | |||
if keep_all_tokens or not (sym.is_term and sym.filter_out): | |||
to_include.append((i, _should_expand(sym), nones_to_add)) | |||
nones_to_add = 0 | |||
nones_to_add += empty_indices[len(expansion)] | |||
if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include): | |||
if _empty_indices or ambiguous: | |||
return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add) | |||
else: | |||
# LALR without placeholders | |||
return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) | |||
class Callback(object): | |||
@@ -150,8 +183,7 @@ class ParseTreeBuilder: | |||
wrapper_chain = filter(None, [ | |||
(expand_single_child and not rule.alias) and ExpandSingleChild, | |||
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous), | |||
self.maybe_placeholders and partial(AddMaybePlaceholder, options.empty_indices if options else None), | |||
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders and options else None), | |||
self.propagate_positions and PropagatePositions, | |||
]) | |||
@@ -1035,7 +1035,7 @@ def _make_parser_test(LEXER, PARSER): | |||
bb_.1: "bb" | |||
""" | |||
l = _Lark(grammar, ambiguity='resolve__antiscore_sum') | |||
l = Lark(grammar, ambiguity='resolve__antiscore_sum') | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | |||
@@ -1048,7 +1048,7 @@ def _make_parser_test(LEXER, PARSER): | |||
bb_: "bb" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||
l = Lark(grammar, ambiguity='resolve__antiscore_sum') | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'indirection') | |||
@@ -1061,7 +1061,7 @@ def _make_parser_test(LEXER, PARSER): | |||
bb_.3: "bb" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||
l = Lark(grammar, ambiguity='resolve__antiscore_sum') | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | |||
@@ -1074,7 +1074,7 @@ def _make_parser_test(LEXER, PARSER): | |||
bb_.3: "bb" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||
l = Lark(grammar, ambiguity='resolve__antiscore_sum') | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'indirection') | |||
@@ -1248,18 +1248,19 @@ def _make_parser_test(LEXER, PARSER): | |||
res = p.parse('B') | |||
self.assertEqual(len(res.children), 3) | |||
@unittest.skipIf(PARSER=='cyk', "Empty rules") | |||
def test_maybe_placeholders(self): | |||
# Anonymous tokens shouldn't count | |||
p = Lark("""start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||
p = _Lark("""start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||
self.assertEqual(p.parse("").children, []) | |||
# Anonymous tokens shouldn't count, other constructs should | |||
p = Lark("""start: A? "b"? _c? | |||
p = _Lark("""start: A? "b"? _c? | |||
A: "a" | |||
_c: "c" """, maybe_placeholders=True) | |||
self.assertEqual(p.parse("").children, [None, None]) | |||
self.assertEqual(p.parse("").children, [None]) | |||
p = Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||
p = _Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||
self.assertEqual(p.parse("").children, [None, None, None]) | |||
self.assertEqual(p.parse("a").children, ['a', None, None]) | |||
self.assertEqual(p.parse("b").children, [None, 'b', None]) | |||
@@ -1269,7 +1270,7 @@ def _make_parser_test(LEXER, PARSER): | |||
self.assertEqual(p.parse("bc").children, [None, 'b', 'c']) | |||
self.assertEqual(p.parse("abc").children, ['a', 'b', 'c']) | |||
p = Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True) | |||
p = _Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True) | |||
self.assertEqual(p.parse("b").children, [None, 'b', None]) | |||
self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None]) | |||
self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c']) | |||
@@ -1280,6 +1281,12 @@ def _make_parser_test(LEXER, PARSER): | |||
'a', 'b', 'c', | |||
None, 'b', None]) | |||
p = _Lark("""!start: "a"? "c"? "b"+ "a"? "d"? """, maybe_placeholders=True) | |||
self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None]) | |||
self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd']) | |||
self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None]) | |||
self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None]) | |||
_NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() | |||