From ddae93f92f38e0b513d7e1c43930287c3eed4547 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sun, 10 Dec 2017 19:32:41 +0200 Subject: [PATCH] BUGFIX: Ambiguity resolution now sums priority (Issue #46) --- lark/load_grammar.py | 4 +-- lark/parsers/resolve_ambig.py | 62 +++++++++++++++++++---------------- 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 0414230..092c118 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -435,9 +435,9 @@ class Grammar: for name, (tree, priority) in term_defs: # TODO transfer priority to rule? if name.startswith('_'): - options = RuleOptions(filter_out=True, priority=priority) + options = RuleOptions(filter_out=True, priority=-priority) else: - options = RuleOptions(keep_all_tokens=True, create_token=name, priority=priority) + options = RuleOptions(keep_all_tokens=True, create_token=name, priority=-priority) name = new_terminal_names[name] inner_name = name + '_inner' diff --git a/lark/parsers/resolve_ambig.py b/lark/parsers/resolve_ambig.py index f1a4431..c965433 100644 --- a/lark/parsers/resolve_ambig.py +++ b/lark/parsers/resolve_ambig.py @@ -9,56 +9,60 @@ from ..tree import Tree, Visitor_NoRecurse # Author: Erez Sh def _compare_rules(rule1, rule2): - if rule1.origin != rule2.origin: - if rule1.options and rule2.options: - if rule1.options.priority is not None and rule2.options.priority is not None: - assert rule1.options.priority != rule2.options.priority, "Priority is the same between both rules: %s == %s" % (rule1, rule2) - return -compare(rule1.options.priority, rule2.options.priority) - - return 0 - - c = compare( len(rule1.expansion), len(rule2.expansion)) - if rule1.origin.startswith('__'): # XXX hack! We need to set priority in parser, not here + c = -compare( len(rule1.expansion), len(rule2.expansion)) + if rule1.origin.startswith('__'): # XXX hack! We should set priority in parser, not here c = -c return c -def _compare_drv(tree1, tree2): - if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)): + +def _sum_priority(tree): + p = 0 + + for n in tree.iter_subtrees(): try: - return -compare(tree1, tree2) - except TypeError: - return 0 + p += n.rule.options.priority or 0 + except AttributeError: + pass + + return p + +def _compare_priority(tree1, tree2): + tree1.iter_subtrees() +def _compare_drv(tree1, tree2): try: rule1, rule2 = tree1.rule, tree2.rule except AttributeError: - # Probably trees that don't take part in this parse (better way to distinguish?) - return -compare(tree1, tree2) + # Probably non-trees, or user trees that weren't created by the parse (better way to distinguish?) + return compare(tree1, tree2) - # XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse, - # when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be - # computationally inefficient. So we handle it here. - if tree1.data == '_ambig': - _standard_resolve_ambig(tree1) - if tree2.data == '_ambig': - _standard_resolve_ambig(tree2) + assert tree1.data != '_ambig' + assert tree2.data != '_ambig' + + p1 = _sum_priority(tree1) + p2 = _sum_priority(tree2) + c = (p1 or p2) and compare(p1, p2) + if c: + return c c = _compare_rules(tree1.rule, tree2.rule) if c: return c # rules are "equal", so compare trees - for t1, t2 in zip(tree1.children, tree2.children): - c = _compare_drv(t1, t2) - if c: - return c + if len(tree1.children) == len(tree2.children): + for t1, t2 in zip(tree1.children, tree2.children): + c = _compare_drv(t1, t2) + if c: + return c return compare(len(tree1.children), len(tree2.children)) def _standard_resolve_ambig(tree): assert tree.data == '_ambig' - best = min(tree.children, key=cmp_to_key(_compare_drv)) + key_f = cmp_to_key(_compare_drv) + best = max(tree.children, key=key_f) assert best.data == 'drv' tree.set('drv', best.children) tree.rule = best.rule # needed for applying callbacks