From 25c3c51b1c6c096b6e4bcfe63d543fece572b72d Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Thu, 5 Apr 2018 15:40:33 +0300 Subject: [PATCH] Fixed bug in Earley: A tree builder optimization clashed with explicit ambiguity --- lark/lark.py | 2 +- lark/parse_tree_builder.py | 22 ++++++++++++++++++---- tests/test_parser.py | 24 ++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/lark/lark.py b/lark/lark.py index fb5e04f..2660bd7 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -172,7 +172,7 @@ class Lark: def _build_parser(self): self.parser_class = get_frontend(self.options.parser, self.options.lexer) - self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens) + self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr') callback = self._parse_tree_builder.create_callback(self.options.transformer) if self.profiler: for f in dir(callback): diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index e84b01d..7c74178 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -57,6 +57,19 @@ class ChildFilter: self.node_builder = node_builder self.to_include = to_include + def __call__(self, children): + filtered = [] + for i, to_expand in self.to_include: + if to_expand: + filtered += children[i].children + else: + filtered.append(children[i]) + + return self.node_builder(filtered) + +class ChildFilterLALR(ChildFilter): + "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" + def __call__(self, children): filtered = [] for i, to_expand in self.to_include: @@ -73,21 +86,22 @@ class ChildFilter: def _should_expand(sym): return not is_terminal(sym) and sym.startswith('_') -def maybe_create_child_filter(expansion, filter_out): +def maybe_create_child_filter(expansion, filter_out, ambiguous): to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) if sym not in filter_out] if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): - return partial(ChildFilter, to_include) + return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include) class Callback(object): pass class ParseTreeBuilder: - def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False): + def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False): self.tree_class = tree_class self.propagate_positions = propagate_positions self.always_keep_all_tokens = keep_all_tokens + self.ambiguous = ambiguous self.rule_builders = list(self._init_builders(rules)) @@ -107,7 +121,7 @@ class ParseTreeBuilder: wrapper_chain = filter(None, [ create_token and partial(CreateToken, create_token), (expand_single_child and not rule.alias) and ExpandSingleChild, - maybe_create_child_filter(rule.expansion, () if keep_all_tokens else filter_out), + maybe_create_child_filter(rule.expansion, () if keep_all_tokens else filter_out, self.ambiguous), self.propagate_positions and PropagatePositions, ]) diff --git a/tests/test_parser.py b/tests/test_parser.py index a948bd5..47d0e3d 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -293,6 +293,30 @@ def _make_full_earley_test(LEXER): self.assertEqual(res, expected) + def test_explicit_ambiguity(self): + grammar = r""" + start: NAME+ + + NAME: /\w+/ + + %ignore " " + """ + + text = """cat""" + + parser = Lark(grammar, start='start', ambiguity='explicit') + tree = parser.parse(text) + self.assertEqual(tree.data, '_ambig') + + combinations = {tuple(str(s) for s in t.children) for t in tree.children} + self.assertEqual(combinations, { + ('cat',), + ('ca', 't'), + ('c', 'at'), + ('c', 'a' ,'t') + }) + +