| @@ -179,6 +179,8 @@ class EBNF_to_BNF(Transformer_InPlace): | |||||
| if isinstance(rule, Terminal) and rule.filter_out and not ( | if isinstance(rule, Terminal) and rule.filter_out and not ( | ||||
| self.rule_options and self.rule_options.keep_all_tokens): | self.rule_options and self.rule_options.keep_all_tokens): | ||||
| empty = ST('expansion', []) | empty = ST('expansion', []) | ||||
| elif isinstance(rule, NonTerminal) and rule.name.startswith('_'): | |||||
| empty = ST('expansion', []) | |||||
| else: | else: | ||||
| empty = _EMPTY | empty = _EMPTY | ||||
| return ST('expansions', [rule, empty]) | return ST('expansions', [rule, empty]) | ||||
| @@ -506,11 +508,11 @@ class Grammar: | |||||
| if alias and name.startswith('_'): | if alias and name.startswith('_'): | ||||
| raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | ||||
| empty_indices = [i for i, x in enumerate(expansion) if x==_EMPTY] | |||||
| if empty_indices: | |||||
| empty_indices = [x==_EMPTY for i, x in enumerate(expansion)] | |||||
| if any(empty_indices): | |||||
| assert options | assert options | ||||
| exp_options = copy(options) | exp_options = copy(options) | ||||
| exp_options.empty_indices = len(expansion), empty_indices | |||||
| exp_options.empty_indices = empty_indices | |||||
| expansion = [x for x in expansion if x!=_EMPTY] | expansion = [x for x in expansion if x!=_EMPTY] | ||||
| else: | else: | ||||
| exp_options = options | exp_options = options | ||||
| @@ -17,24 +17,6 @@ class ExpandSingleChild: | |||||
| else: | else: | ||||
| return self.node_builder(children) | return self.node_builder(children) | ||||
| class AddMaybePlaceholder: | |||||
| def __init__(self, empty_indices, node_builder): | |||||
| self.node_builder = node_builder | |||||
| self.empty_indices = empty_indices | |||||
| def __call__(self, children): | |||||
| t = self.node_builder(children) | |||||
| if self.empty_indices: | |||||
| exp_len, empty_indices = self.empty_indices | |||||
| # Calculate offset to handle repetition correctly | |||||
| # e.g. ("a" "b"?)+ | |||||
| # For non-repetitive rules, offset should be 0 | |||||
| offset = len(t.children) - (exp_len - len(empty_indices)) | |||||
| for i in empty_indices: | |||||
| t.children.insert(i + offset, None) | |||||
| return t | |||||
| class PropagatePositions: | class PropagatePositions: | ||||
| def __init__(self, node_builder): | def __init__(self, node_builder): | ||||
| self.node_builder = node_builder | self.node_builder = node_builder | ||||
| @@ -77,23 +59,54 @@ class PropagatePositions: | |||||
| class ChildFilter: | class ChildFilter: | ||||
| def __init__(self, to_include, node_builder): | |||||
| def __init__(self, to_include, append_none, node_builder): | |||||
| self.node_builder = node_builder | self.node_builder = node_builder | ||||
| self.to_include = to_include | self.to_include = to_include | ||||
| self.append_none = append_none | |||||
| def __call__(self, children): | def __call__(self, children): | ||||
| filtered = [] | filtered = [] | ||||
| for i, to_expand in self.to_include: | |||||
| for i, to_expand, add_none in self.to_include: | |||||
| if add_none: | |||||
| filtered += [None] * add_none | |||||
| if to_expand: | if to_expand: | ||||
| filtered += children[i].children | filtered += children[i].children | ||||
| else: | else: | ||||
| filtered.append(children[i]) | filtered.append(children[i]) | ||||
| if self.append_none: | |||||
| filtered += [None] * self.append_none | |||||
| return self.node_builder(filtered) | return self.node_builder(filtered) | ||||
| class ChildFilterLALR(ChildFilter): | class ChildFilterLALR(ChildFilter): | ||||
| "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | ||||
| def __call__(self, children): | |||||
| filtered = [] | |||||
| for i, to_expand, add_none in self.to_include: | |||||
| if add_none: | |||||
| filtered += [None] * add_none | |||||
| if to_expand: | |||||
| if filtered: | |||||
| filtered += children[i].children | |||||
| else: # Optimize for left-recursion | |||||
| filtered = children[i].children | |||||
| else: | |||||
| filtered.append(children[i]) | |||||
| if self.append_none: | |||||
| filtered += [None] * self.append_none | |||||
| return self.node_builder(filtered) | |||||
| class ChildFilterLALR_NoPlaceholders(ChildFilter): | |||||
| "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | |||||
| def __init__(self, to_include, node_builder): | |||||
| self.node_builder = node_builder | |||||
| self.to_include = to_include | |||||
| def __call__(self, children): | def __call__(self, children): | ||||
| filtered = [] | filtered = [] | ||||
| for i, to_expand in self.to_include: | for i, to_expand in self.to_include: | ||||
| @@ -110,12 +123,32 @@ class ChildFilterLALR(ChildFilter): | |||||
| def _should_expand(sym): | def _should_expand(sym): | ||||
| return not sym.is_term and sym.name.startswith('_') | return not sym.is_term and sym.name.startswith('_') | ||||
| def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous): | |||||
| to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) | |||||
| if keep_all_tokens or not (sym.is_term and sym.filter_out)] | |||||
| if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): | |||||
| return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include) | |||||
| def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): | |||||
| # Prepare empty_indices as: How many Nones to insert at each index? | |||||
| if _empty_indices: | |||||
| assert _empty_indices.count(False) == len(expansion) | |||||
| s = ''.join(str(int(b)) for b in _empty_indices) | |||||
| empty_indices = [len(ones) for ones in s.split('0')] | |||||
| assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion)) | |||||
| else: | |||||
| empty_indices = [0] * (len(expansion)+1) | |||||
| to_include = [] | |||||
| nones_to_add = 0 | |||||
| for i, sym in enumerate(expansion): | |||||
| nones_to_add += empty_indices[i] | |||||
| if keep_all_tokens or not (sym.is_term and sym.filter_out): | |||||
| to_include.append((i, _should_expand(sym), nones_to_add)) | |||||
| nones_to_add = 0 | |||||
| nones_to_add += empty_indices[len(expansion)] | |||||
| if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include): | |||||
| if _empty_indices or ambiguous: | |||||
| return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add) | |||||
| else: | |||||
| # LALR without placeholders | |||||
| return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) | |||||
| class Callback(object): | class Callback(object): | ||||
| @@ -150,8 +183,7 @@ class ParseTreeBuilder: | |||||
| wrapper_chain = filter(None, [ | wrapper_chain = filter(None, [ | ||||
| (expand_single_child and not rule.alias) and ExpandSingleChild, | (expand_single_child and not rule.alias) and ExpandSingleChild, | ||||
| maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous), | |||||
| self.maybe_placeholders and partial(AddMaybePlaceholder, options.empty_indices if options else None), | |||||
| maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders and options else None), | |||||
| self.propagate_positions and PropagatePositions, | self.propagate_positions and PropagatePositions, | ||||
| ]) | ]) | ||||
| @@ -1035,7 +1035,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
| bb_.1: "bb" | bb_.1: "bb" | ||||
| """ | """ | ||||
| l = _Lark(grammar, ambiguity='resolve__antiscore_sum') | |||||
| l = Lark(grammar, ambiguity='resolve__antiscore_sum') | |||||
| res = l.parse('abba') | res = l.parse('abba') | ||||
| self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | ||||
| @@ -1048,7 +1048,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
| bb_: "bb" | bb_: "bb" | ||||
| """ | """ | ||||
| l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||||
| l = Lark(grammar, ambiguity='resolve__antiscore_sum') | |||||
| res = l.parse('abba') | res = l.parse('abba') | ||||
| self.assertEqual(''.join(child.data for child in res.children), 'indirection') | self.assertEqual(''.join(child.data for child in res.children), 'indirection') | ||||
| @@ -1061,7 +1061,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
| bb_.3: "bb" | bb_.3: "bb" | ||||
| """ | """ | ||||
| l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||||
| l = Lark(grammar, ambiguity='resolve__antiscore_sum') | |||||
| res = l.parse('abba') | res = l.parse('abba') | ||||
| self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | ||||
| @@ -1074,7 +1074,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
| bb_.3: "bb" | bb_.3: "bb" | ||||
| """ | """ | ||||
| l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||||
| l = Lark(grammar, ambiguity='resolve__antiscore_sum') | |||||
| res = l.parse('abba') | res = l.parse('abba') | ||||
| self.assertEqual(''.join(child.data for child in res.children), 'indirection') | self.assertEqual(''.join(child.data for child in res.children), 'indirection') | ||||
| @@ -1248,18 +1248,19 @@ def _make_parser_test(LEXER, PARSER): | |||||
| res = p.parse('B') | res = p.parse('B') | ||||
| self.assertEqual(len(res.children), 3) | self.assertEqual(len(res.children), 3) | ||||
| @unittest.skipIf(PARSER=='cyk', "Empty rules") | |||||
| def test_maybe_placeholders(self): | def test_maybe_placeholders(self): | ||||
| # Anonymous tokens shouldn't count | # Anonymous tokens shouldn't count | ||||
| p = Lark("""start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||||
| p = _Lark("""start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||||
| self.assertEqual(p.parse("").children, []) | self.assertEqual(p.parse("").children, []) | ||||
| # Anonymous tokens shouldn't count, other constructs should | # Anonymous tokens shouldn't count, other constructs should | ||||
| p = Lark("""start: A? "b"? _c? | |||||
| p = _Lark("""start: A? "b"? _c? | |||||
| A: "a" | A: "a" | ||||
| _c: "c" """, maybe_placeholders=True) | _c: "c" """, maybe_placeholders=True) | ||||
| self.assertEqual(p.parse("").children, [None, None]) | |||||
| self.assertEqual(p.parse("").children, [None]) | |||||
| p = Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||||
| p = _Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||||
| self.assertEqual(p.parse("").children, [None, None, None]) | self.assertEqual(p.parse("").children, [None, None, None]) | ||||
| self.assertEqual(p.parse("a").children, ['a', None, None]) | self.assertEqual(p.parse("a").children, ['a', None, None]) | ||||
| self.assertEqual(p.parse("b").children, [None, 'b', None]) | self.assertEqual(p.parse("b").children, [None, 'b', None]) | ||||
| @@ -1269,7 +1270,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
| self.assertEqual(p.parse("bc").children, [None, 'b', 'c']) | self.assertEqual(p.parse("bc").children, [None, 'b', 'c']) | ||||
| self.assertEqual(p.parse("abc").children, ['a', 'b', 'c']) | self.assertEqual(p.parse("abc").children, ['a', 'b', 'c']) | ||||
| p = Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True) | |||||
| p = _Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True) | |||||
| self.assertEqual(p.parse("b").children, [None, 'b', None]) | self.assertEqual(p.parse("b").children, [None, 'b', None]) | ||||
| self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None]) | self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None]) | ||||
| self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c']) | self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c']) | ||||
| @@ -1280,6 +1281,12 @@ def _make_parser_test(LEXER, PARSER): | |||||
| 'a', 'b', 'c', | 'a', 'b', 'c', | ||||
| None, 'b', None]) | None, 'b', None]) | ||||
| p = _Lark("""!start: "a"? "c"? "b"+ "a"? "d"? """, maybe_placeholders=True) | |||||
| self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None]) | |||||
| self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd']) | |||||
| self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None]) | |||||
| self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None]) | |||||
| _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() | _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() | ||||