Browse Source

Fixed placeholder code, should work as expected (Issue #285)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.66
Erez Shinan 7 years ago
parent
commit
467483553b
3 changed files with 81 additions and 40 deletions
  1. +5
    -3
      lark/load_grammar.py
  2. +60
    -28
      lark/parse_tree_builder.py
  3. +16
    -9
      tests/test_parser.py

+ 5
- 3
lark/load_grammar.py View File

@@ -179,6 +179,8 @@ class EBNF_to_BNF(Transformer_InPlace):
if isinstance(rule, Terminal) and rule.filter_out and not ( if isinstance(rule, Terminal) and rule.filter_out and not (
self.rule_options and self.rule_options.keep_all_tokens): self.rule_options and self.rule_options.keep_all_tokens):
empty = ST('expansion', []) empty = ST('expansion', [])
elif isinstance(rule, NonTerminal) and rule.name.startswith('_'):
empty = ST('expansion', [])
else: else:
empty = _EMPTY empty = _EMPTY
return ST('expansions', [rule, empty]) return ST('expansions', [rule, empty])
@@ -506,11 +508,11 @@ class Grammar:
if alias and name.startswith('_'): if alias and name.startswith('_'):
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias))


empty_indices = [i for i, x in enumerate(expansion) if x==_EMPTY]
if empty_indices:
empty_indices = [x==_EMPTY for i, x in enumerate(expansion)]
if any(empty_indices):
assert options assert options
exp_options = copy(options) exp_options = copy(options)
exp_options.empty_indices = len(expansion), empty_indices
exp_options.empty_indices = empty_indices
expansion = [x for x in expansion if x!=_EMPTY] expansion = [x for x in expansion if x!=_EMPTY]
else: else:
exp_options = options exp_options = options


+ 60
- 28
lark/parse_tree_builder.py View File

@@ -17,24 +17,6 @@ class ExpandSingleChild:
else: else:
return self.node_builder(children) return self.node_builder(children)


class AddMaybePlaceholder:
def __init__(self, empty_indices, node_builder):
self.node_builder = node_builder
self.empty_indices = empty_indices

def __call__(self, children):
t = self.node_builder(children)
if self.empty_indices:
exp_len, empty_indices = self.empty_indices
# Calculate offset to handle repetition correctly
# e.g. ("a" "b"?)+
# For non-repetitive rules, offset should be 0
offset = len(t.children) - (exp_len - len(empty_indices))
for i in empty_indices:
t.children.insert(i + offset, None)
return t


class PropagatePositions: class PropagatePositions:
def __init__(self, node_builder): def __init__(self, node_builder):
self.node_builder = node_builder self.node_builder = node_builder
@@ -77,23 +59,54 @@ class PropagatePositions:




class ChildFilter: class ChildFilter:
def __init__(self, to_include, node_builder):
def __init__(self, to_include, append_none, node_builder):
self.node_builder = node_builder self.node_builder = node_builder
self.to_include = to_include self.to_include = to_include
self.append_none = append_none


def __call__(self, children): def __call__(self, children):
filtered = [] filtered = []
for i, to_expand in self.to_include:

for i, to_expand, add_none in self.to_include:
if add_none:
filtered += [None] * add_none
if to_expand: if to_expand:
filtered += children[i].children filtered += children[i].children
else: else:
filtered.append(children[i]) filtered.append(children[i])


if self.append_none:
filtered += [None] * self.append_none

return self.node_builder(filtered) return self.node_builder(filtered)


class ChildFilterLALR(ChildFilter): class ChildFilterLALR(ChildFilter):
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"


def __call__(self, children):
filtered = []
for i, to_expand, add_none in self.to_include:
if add_none:
filtered += [None] * add_none
if to_expand:
if filtered:
filtered += children[i].children
else: # Optimize for left-recursion
filtered = children[i].children
else:
filtered.append(children[i])

if self.append_none:
filtered += [None] * self.append_none

return self.node_builder(filtered)

class ChildFilterLALR_NoPlaceholders(ChildFilter):
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
def __init__(self, to_include, node_builder):
self.node_builder = node_builder
self.to_include = to_include

def __call__(self, children): def __call__(self, children):
filtered = [] filtered = []
for i, to_expand in self.to_include: for i, to_expand in self.to_include:
@@ -110,12 +123,32 @@ class ChildFilterLALR(ChildFilter):
def _should_expand(sym): def _should_expand(sym):
return not sym.is_term and sym.name.startswith('_') return not sym.is_term and sym.name.startswith('_')


def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous):
to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion)
if keep_all_tokens or not (sym.is_term and sym.filter_out)]

if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include):
return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include)
def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices):
# Prepare empty_indices as: How many Nones to insert at each index?
if _empty_indices:
assert _empty_indices.count(False) == len(expansion)
s = ''.join(str(int(b)) for b in _empty_indices)
empty_indices = [len(ones) for ones in s.split('0')]
assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
else:
empty_indices = [0] * (len(expansion)+1)

to_include = []
nones_to_add = 0
for i, sym in enumerate(expansion):
nones_to_add += empty_indices[i]
if keep_all_tokens or not (sym.is_term and sym.filter_out):
to_include.append((i, _should_expand(sym), nones_to_add))
nones_to_add = 0

nones_to_add += empty_indices[len(expansion)]

if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include):
if _empty_indices or ambiguous:
return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
else:
# LALR without placeholders
return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])




class Callback(object): class Callback(object):
@@ -150,8 +183,7 @@ class ParseTreeBuilder:


wrapper_chain = filter(None, [ wrapper_chain = filter(None, [
(expand_single_child and not rule.alias) and ExpandSingleChild, (expand_single_child and not rule.alias) and ExpandSingleChild,
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous),
self.maybe_placeholders and partial(AddMaybePlaceholder, options.empty_indices if options else None),
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders and options else None),
self.propagate_positions and PropagatePositions, self.propagate_positions and PropagatePositions,
]) ])




+ 16
- 9
tests/test_parser.py View File

@@ -1035,7 +1035,7 @@ def _make_parser_test(LEXER, PARSER):
bb_.1: "bb" bb_.1: "bb"
""" """


l = _Lark(grammar, ambiguity='resolve__antiscore_sum')
l = Lark(grammar, ambiguity='resolve__antiscore_sum')
res = l.parse('abba') res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')


@@ -1048,7 +1048,7 @@ def _make_parser_test(LEXER, PARSER):
bb_: "bb" bb_: "bb"
""" """


l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
l = Lark(grammar, ambiguity='resolve__antiscore_sum')
res = l.parse('abba') res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'indirection') self.assertEqual(''.join(child.data for child in res.children), 'indirection')


@@ -1061,7 +1061,7 @@ def _make_parser_test(LEXER, PARSER):
bb_.3: "bb" bb_.3: "bb"
""" """


l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
l = Lark(grammar, ambiguity='resolve__antiscore_sum')
res = l.parse('abba') res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')


@@ -1074,7 +1074,7 @@ def _make_parser_test(LEXER, PARSER):
bb_.3: "bb" bb_.3: "bb"
""" """


l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
l = Lark(grammar, ambiguity='resolve__antiscore_sum')
res = l.parse('abba') res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'indirection') self.assertEqual(''.join(child.data for child in res.children), 'indirection')


@@ -1248,18 +1248,19 @@ def _make_parser_test(LEXER, PARSER):
res = p.parse('B') res = p.parse('B')
self.assertEqual(len(res.children), 3) self.assertEqual(len(res.children), 3)


@unittest.skipIf(PARSER=='cyk', "Empty rules")
def test_maybe_placeholders(self): def test_maybe_placeholders(self):
# Anonymous tokens shouldn't count # Anonymous tokens shouldn't count
p = Lark("""start: "a"? "b"? "c"? """, maybe_placeholders=True)
p = _Lark("""start: "a"? "b"? "c"? """, maybe_placeholders=True)
self.assertEqual(p.parse("").children, []) self.assertEqual(p.parse("").children, [])


# Anonymous tokens shouldn't count, other constructs should # Anonymous tokens shouldn't count, other constructs should
p = Lark("""start: A? "b"? _c?
p = _Lark("""start: A? "b"? _c?
A: "a" A: "a"
_c: "c" """, maybe_placeholders=True) _c: "c" """, maybe_placeholders=True)
self.assertEqual(p.parse("").children, [None, None])
self.assertEqual(p.parse("").children, [None])


p = Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True)
p = _Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True)
self.assertEqual(p.parse("").children, [None, None, None]) self.assertEqual(p.parse("").children, [None, None, None])
self.assertEqual(p.parse("a").children, ['a', None, None]) self.assertEqual(p.parse("a").children, ['a', None, None])
self.assertEqual(p.parse("b").children, [None, 'b', None]) self.assertEqual(p.parse("b").children, [None, 'b', None])
@@ -1269,7 +1270,7 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(p.parse("bc").children, [None, 'b', 'c']) self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
self.assertEqual(p.parse("abc").children, ['a', 'b', 'c']) self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])


p = Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True)
p = _Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True)
self.assertEqual(p.parse("b").children, [None, 'b', None]) self.assertEqual(p.parse("b").children, [None, 'b', None])
self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None]) self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c']) self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
@@ -1280,6 +1281,12 @@ def _make_parser_test(LEXER, PARSER):
'a', 'b', 'c', 'a', 'b', 'c',
None, 'b', None]) None, 'b', None])


p = _Lark("""!start: "a"? "c"? "b"+ "a"? "d"? """, maybe_placeholders=True)
self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])
self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None])





_NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()


Loading…
Cancel
Save