diff --git a/lark/lark.py b/lark/lark.py index 9979ab1..005fb4d 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -140,7 +140,7 @@ class Lark: self.options.ambiguity = 'resolve' else: assert self.options.parser == 'earley', "Only Earley supports disambiguation right now" - assert self.options.ambiguity in ('resolve', 'explicit', 'auto') + assert self.options.ambiguity in ('resolve', 'explicit', 'auto', 'resolve__antiscore_sum') # Parse the grammar file and compose the grammars (TODO) self.grammar = load_grammar(grammar, source) diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 3d29576..c4524ca 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -50,7 +50,9 @@ class LALR_ContextualLexer: def get_ambiguity_resolver(options): if not options or options.ambiguity == 'resolve': - return resolve_ambig.resolve_ambig + return resolve_ambig.standard_resolve_ambig + elif options.ambiguity == 'resolve__antiscore_sum': + return resolve_ambig.antiscore_sum_resolve_ambig elif options.ambiguity == 'explicit': return None raise ValueError(options) diff --git a/lark/parsers/resolve_ambig.py b/lark/parsers/resolve_ambig.py index 302223b..ca0d0f6 100644 --- a/lark/parsers/resolve_ambig.py +++ b/lark/parsers/resolve_ambig.py @@ -3,6 +3,11 @@ from functools import cmp_to_key from ..tree import Tree, Visitor_NoRecurse + +# Standard ambiguity resolver (uses comparison) +# +# Author: Erez Sh + def _compare_rules(rule1, rule2): if rule1.origin != rule2.origin: if rule1.options and rule2.options: @@ -31,9 +36,9 @@ def _compare_drv(tree1, tree2): # when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be # computationally inefficient. So we handle it here. if tree1.data == '_ambig': - _resolve_ambig(tree1) + _standard_resolve_ambig(tree1) if tree2.data == '_ambig': - _resolve_ambig(tree2) + _standard_resolve_ambig(tree2) c = _compare_rules(tree1.rule, tree2.rule) if c: @@ -48,21 +53,54 @@ def _compare_drv(tree1, tree2): return compare(len(tree1.children), len(tree2.children)) -def _resolve_ambig(tree): +def _standard_resolve_ambig(tree): assert tree.data == '_ambig' - best = min(tree.children, key=cmp_to_key(_compare_drv)) assert best.data == 'drv' tree.set('drv', best.children) tree.rule = best.rule # needed for applying callbacks - assert tree.data != '_ambig' +def standard_resolve_ambig(tree): + for ambig in tree.find_data('_ambig'): + _standard_resolve_ambig(ambig) + + return tree + + -class ResolveAmbig(Visitor_NoRecurse): - def _ambig(self, tree): - _resolve_ambig(tree) +# Anti-score Sum +# +# Author: Uriva (https://github.com/uriva) + +def _antiscore_sum_drv(tree): + if not isinstance(tree, Tree): + return 0 + + # XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse, + # when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be + # computationally inefficient. So we handle it here. + if tree.data == '_ambig': + _antiscore_sum_resolve_ambig(tree) + + try: + priority = tree.rule.options.priority + except AttributeError: + # Probably trees that don't take part in this parse (better way to distinguish?) + priority = None + + return (priority or 0) + sum(map(_antiscore_sum_drv, tree.children), 0) + +def _antiscore_sum_resolve_ambig(tree): + assert tree.data == '_ambig' + + best = min(tree.children, key=_antiscore_sum_drv) + assert best.data == 'drv' + tree.set('drv', best.children) + tree.rule = best.rule # needed for applying callbacks + +def antiscore_sum_resolve_ambig(tree): + for ambig in tree.find_data('_ambig'): + _antiscore_sum_resolve_ambig(ambig) -def resolve_ambig(tree): - ResolveAmbig().visit(tree) return tree diff --git a/lark/tree.py b/lark/tree.py index a0ced5a..290b9a7 100644 --- a/lark/tree.py +++ b/lark/tree.py @@ -1,3 +1,8 @@ +try: + from future_builtins import filter +except ImportError: + pass + from copy import deepcopy from .utils import inline_args @@ -44,13 +49,7 @@ class Tree(object): return hash((self.data, tuple(self.children))) def find_pred(self, pred): - if pred(self): - yield self - - for c in self.children: - if isinstance(c, Tree): - for t in c.find_pred(pred): - yield t + return filter(pred, self.iter_subtrees()) def find_data(self, data): return self.find_pred(lambda t: t.data == data) diff --git a/tests/test_parser.py b/tests/test_parser.py index b50cd50..cce6a37 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -650,6 +650,62 @@ def _make_parser_test(LEXER, PARSER): self.assertEqual(res.children[0].data, 'a') + @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") + def test_earley_prioritization_sum(self): + "Tests effect of priority on result" + + grammar = """ + start: ab_ b_ a_ | indirection + indirection: a_ bb_ a_ + a_: "a" + b_: "b" + ab_: "ab" + bb_.1: "bb" + """ + + l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') + res = l.parse('abba') + self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') + + grammar = """ + start: ab_ b_ a_ | indirection + indirection: a_ bb_ a_ + a_: "a" + b_: "b" + ab_.1: "ab" + bb_: "bb" + """ + + l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') + res = l.parse('abba') + self.assertEqual(''.join(child.data for child in res.children), 'indirection') + + grammar = """ + start: ab_ b_ a_ | indirection + indirection: a_ bb_ a_ + a_.2: "a" + b_.1: "b" + ab_.3: "ab" + bb_.3: "bb" + """ + + l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') + res = l.parse('abba') + self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') + + grammar = """ + start: ab_ b_ a_ | indirection + indirection: a_ bb_ a_ + a_.1: "a" + b_.1: "b" + ab_.4: "ab" + bb_.3: "bb" + """ + + l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') + res = l.parse('abba') + self.assertEqual(''.join(child.data for child in res.children), 'indirection') + _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() _TestParser.__name__ = _NAME