Browse Source

Fixed bug in Earley: A tree builder optimization clashed with explicit ambiguity

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.6
Erez Shinan 6 years ago
parent
commit
25c3c51b1c
3 changed files with 43 additions and 5 deletions
  1. +1
    -1
      lark/lark.py
  2. +18
    -4
      lark/parse_tree_builder.py
  3. +24
    -0
      tests/test_parser.py

+ 1
- 1
lark/lark.py View File

@@ -172,7 +172,7 @@ class Lark:
def _build_parser(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer)

self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens)
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr')
callback = self._parse_tree_builder.create_callback(self.options.transformer)
if self.profiler:
for f in dir(callback):


+ 18
- 4
lark/parse_tree_builder.py View File

@@ -57,6 +57,19 @@ class ChildFilter:
self.node_builder = node_builder
self.to_include = to_include

def __call__(self, children):
filtered = []
for i, to_expand in self.to_include:
if to_expand:
filtered += children[i].children
else:
filtered.append(children[i])

return self.node_builder(filtered)

class ChildFilterLALR(ChildFilter):
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"

def __call__(self, children):
filtered = []
for i, to_expand in self.to_include:
@@ -73,21 +86,22 @@ class ChildFilter:
def _should_expand(sym):
return not is_terminal(sym) and sym.startswith('_')

def maybe_create_child_filter(expansion, filter_out):
def maybe_create_child_filter(expansion, filter_out, ambiguous):
to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) if sym not in filter_out]

if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include):
return partial(ChildFilter, to_include)
return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include)


class Callback(object):
pass

class ParseTreeBuilder:
def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False):
def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False):
self.tree_class = tree_class
self.propagate_positions = propagate_positions
self.always_keep_all_tokens = keep_all_tokens
self.ambiguous = ambiguous

self.rule_builders = list(self._init_builders(rules))

@@ -107,7 +121,7 @@ class ParseTreeBuilder:
wrapper_chain = filter(None, [
create_token and partial(CreateToken, create_token),
(expand_single_child and not rule.alias) and ExpandSingleChild,
maybe_create_child_filter(rule.expansion, () if keep_all_tokens else filter_out),
maybe_create_child_filter(rule.expansion, () if keep_all_tokens else filter_out, self.ambiguous),
self.propagate_positions and PropagatePositions,
])



+ 24
- 0
tests/test_parser.py View File

@@ -293,6 +293,30 @@ def _make_full_earley_test(LEXER):
self.assertEqual(res, expected)


def test_explicit_ambiguity(self):
grammar = r"""
start: NAME+

NAME: /\w+/

%ignore " "
"""

text = """cat"""

parser = Lark(grammar, start='start', ambiguity='explicit')
tree = parser.parse(text)
self.assertEqual(tree.data, '_ambig')

combinations = {tuple(str(s) for s in t.children) for t in tree.children}
self.assertEqual(combinations, {
('cat',),
('ca', 't'),
('c', 'at'),
('c', 'a' ,'t')
})







Loading…
Cancel
Save