@@ -140,7 +140,7 @@ class Lark: | |||||
self.options.ambiguity = 'resolve' | self.options.ambiguity = 'resolve' | ||||
else: | else: | ||||
assert self.options.parser == 'earley', "Only Earley supports disambiguation right now" | assert self.options.parser == 'earley', "Only Earley supports disambiguation right now" | ||||
assert self.options.ambiguity in ('resolve', 'explicit', 'auto') | |||||
assert self.options.ambiguity in ('resolve', 'explicit', 'auto', 'resolve__antiscore_sum') | |||||
# Parse the grammar file and compose the grammars (TODO) | # Parse the grammar file and compose the grammars (TODO) | ||||
self.grammar = load_grammar(grammar, source) | self.grammar = load_grammar(grammar, source) | ||||
@@ -50,7 +50,9 @@ class LALR_ContextualLexer: | |||||
def get_ambiguity_resolver(options): | def get_ambiguity_resolver(options): | ||||
if not options or options.ambiguity == 'resolve': | if not options or options.ambiguity == 'resolve': | ||||
return resolve_ambig.resolve_ambig | |||||
return resolve_ambig.standard_resolve_ambig | |||||
elif options.ambiguity == 'resolve__antiscore_sum': | |||||
return resolve_ambig.antiscore_sum_resolve_ambig | |||||
elif options.ambiguity == 'explicit': | elif options.ambiguity == 'explicit': | ||||
return None | return None | ||||
raise ValueError(options) | raise ValueError(options) | ||||
@@ -3,6 +3,11 @@ from functools import cmp_to_key | |||||
from ..tree import Tree, Visitor_NoRecurse | from ..tree import Tree, Visitor_NoRecurse | ||||
# Standard ambiguity resolver (uses comparison) | |||||
# | |||||
# Author: Erez Sh | |||||
def _compare_rules(rule1, rule2): | def _compare_rules(rule1, rule2): | ||||
if rule1.origin != rule2.origin: | if rule1.origin != rule2.origin: | ||||
if rule1.options and rule2.options: | if rule1.options and rule2.options: | ||||
@@ -31,9 +36,9 @@ def _compare_drv(tree1, tree2): | |||||
# when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | # when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | ||||
# computationally inefficient. So we handle it here. | # computationally inefficient. So we handle it here. | ||||
if tree1.data == '_ambig': | if tree1.data == '_ambig': | ||||
_resolve_ambig(tree1) | |||||
_standard_resolve_ambig(tree1) | |||||
if tree2.data == '_ambig': | if tree2.data == '_ambig': | ||||
_resolve_ambig(tree2) | |||||
_standard_resolve_ambig(tree2) | |||||
c = _compare_rules(tree1.rule, tree2.rule) | c = _compare_rules(tree1.rule, tree2.rule) | ||||
if c: | if c: | ||||
@@ -48,21 +53,54 @@ def _compare_drv(tree1, tree2): | |||||
return compare(len(tree1.children), len(tree2.children)) | return compare(len(tree1.children), len(tree2.children)) | ||||
def _resolve_ambig(tree): | |||||
def _standard_resolve_ambig(tree): | |||||
assert tree.data == '_ambig' | assert tree.data == '_ambig' | ||||
best = min(tree.children, key=cmp_to_key(_compare_drv)) | best = min(tree.children, key=cmp_to_key(_compare_drv)) | ||||
assert best.data == 'drv' | assert best.data == 'drv' | ||||
tree.set('drv', best.children) | tree.set('drv', best.children) | ||||
tree.rule = best.rule # needed for applying callbacks | tree.rule = best.rule # needed for applying callbacks | ||||
assert tree.data != '_ambig' | |||||
def standard_resolve_ambig(tree): | |||||
for ambig in tree.find_data('_ambig'): | |||||
_standard_resolve_ambig(ambig) | |||||
return tree | |||||
class ResolveAmbig(Visitor_NoRecurse): | |||||
def _ambig(self, tree): | |||||
_resolve_ambig(tree) | |||||
# Anti-score Sum | |||||
# | |||||
# Author: Uriva (https://github.com/uriva) | |||||
def _antiscore_sum_drv(tree): | |||||
if not isinstance(tree, Tree): | |||||
return 0 | |||||
# XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse, | |||||
# when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | |||||
# computationally inefficient. So we handle it here. | |||||
if tree.data == '_ambig': | |||||
_antiscore_sum_resolve_ambig(tree) | |||||
try: | |||||
priority = tree.rule.options.priority | |||||
except AttributeError: | |||||
# Probably trees that don't take part in this parse (better way to distinguish?) | |||||
priority = None | |||||
return (priority or 0) + sum(map(_antiscore_sum_drv, tree.children), 0) | |||||
def _antiscore_sum_resolve_ambig(tree): | |||||
assert tree.data == '_ambig' | |||||
best = min(tree.children, key=_antiscore_sum_drv) | |||||
assert best.data == 'drv' | |||||
tree.set('drv', best.children) | |||||
tree.rule = best.rule # needed for applying callbacks | |||||
def antiscore_sum_resolve_ambig(tree): | |||||
for ambig in tree.find_data('_ambig'): | |||||
_antiscore_sum_resolve_ambig(ambig) | |||||
def resolve_ambig(tree): | |||||
ResolveAmbig().visit(tree) | |||||
return tree | return tree |
@@ -1,3 +1,8 @@ | |||||
try: | |||||
from future_builtins import filter | |||||
except ImportError: | |||||
pass | |||||
from copy import deepcopy | from copy import deepcopy | ||||
from .utils import inline_args | from .utils import inline_args | ||||
@@ -44,13 +49,7 @@ class Tree(object): | |||||
return hash((self.data, tuple(self.children))) | return hash((self.data, tuple(self.children))) | ||||
def find_pred(self, pred): | def find_pred(self, pred): | ||||
if pred(self): | |||||
yield self | |||||
for c in self.children: | |||||
if isinstance(c, Tree): | |||||
for t in c.find_pred(pred): | |||||
yield t | |||||
return filter(pred, self.iter_subtrees()) | |||||
def find_data(self, data): | def find_data(self, data): | ||||
return self.find_pred(lambda t: t.data == data) | return self.find_pred(lambda t: t.data == data) | ||||
@@ -650,6 +650,62 @@ def _make_parser_test(LEXER, PARSER): | |||||
self.assertEqual(res.children[0].data, 'a') | self.assertEqual(res.children[0].data, 'a') | ||||
@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||||
def test_earley_prioritization_sum(self): | |||||
"Tests effect of priority on result" | |||||
grammar = """ | |||||
start: ab_ b_ a_ | indirection | |||||
indirection: a_ bb_ a_ | |||||
a_: "a" | |||||
b_: "b" | |||||
ab_: "ab" | |||||
bb_.1: "bb" | |||||
""" | |||||
l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||||
res = l.parse('abba') | |||||
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | |||||
grammar = """ | |||||
start: ab_ b_ a_ | indirection | |||||
indirection: a_ bb_ a_ | |||||
a_: "a" | |||||
b_: "b" | |||||
ab_.1: "ab" | |||||
bb_: "bb" | |||||
""" | |||||
l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||||
res = l.parse('abba') | |||||
self.assertEqual(''.join(child.data for child in res.children), 'indirection') | |||||
grammar = """ | |||||
start: ab_ b_ a_ | indirection | |||||
indirection: a_ bb_ a_ | |||||
a_.2: "a" | |||||
b_.1: "b" | |||||
ab_.3: "ab" | |||||
bb_.3: "bb" | |||||
""" | |||||
l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||||
res = l.parse('abba') | |||||
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | |||||
grammar = """ | |||||
start: ab_ b_ a_ | indirection | |||||
indirection: a_ bb_ a_ | |||||
a_.1: "a" | |||||
b_.1: "b" | |||||
ab_.4: "ab" | |||||
bb_.3: "bb" | |||||
""" | |||||
l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||||
res = l.parse('abba') | |||||
self.assertEqual(''.join(child.data for child in res.children), 'indirection') | |||||
_NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() | _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() | ||||
_TestParser.__name__ = _NAME | _TestParser.__name__ = _NAME | ||||