@@ -140,7 +140,7 @@ class Lark: | |||
self.options.ambiguity = 'resolve' | |||
else: | |||
assert self.options.parser == 'earley', "Only Earley supports disambiguation right now" | |||
assert self.options.ambiguity in ('resolve', 'explicit', 'auto') | |||
assert self.options.ambiguity in ('resolve', 'explicit', 'auto', 'resolve__antiscore_sum') | |||
# Parse the grammar file and compose the grammars (TODO) | |||
self.grammar = load_grammar(grammar, source) | |||
@@ -50,7 +50,9 @@ class LALR_ContextualLexer: | |||
def get_ambiguity_resolver(options): | |||
if not options or options.ambiguity == 'resolve': | |||
return resolve_ambig.resolve_ambig | |||
return resolve_ambig.standard_resolve_ambig | |||
elif options.ambiguity == 'resolve__antiscore_sum': | |||
return resolve_ambig.antiscore_sum_resolve_ambig | |||
elif options.ambiguity == 'explicit': | |||
return None | |||
raise ValueError(options) | |||
@@ -3,6 +3,11 @@ from functools import cmp_to_key | |||
from ..tree import Tree, Visitor_NoRecurse | |||
# Standard ambiguity resolver (uses comparison) | |||
# | |||
# Author: Erez Sh | |||
def _compare_rules(rule1, rule2): | |||
if rule1.origin != rule2.origin: | |||
if rule1.options and rule2.options: | |||
@@ -31,9 +36,9 @@ def _compare_drv(tree1, tree2): | |||
# when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | |||
# computationally inefficient. So we handle it here. | |||
if tree1.data == '_ambig': | |||
_resolve_ambig(tree1) | |||
_standard_resolve_ambig(tree1) | |||
if tree2.data == '_ambig': | |||
_resolve_ambig(tree2) | |||
_standard_resolve_ambig(tree2) | |||
c = _compare_rules(tree1.rule, tree2.rule) | |||
if c: | |||
@@ -48,21 +53,54 @@ def _compare_drv(tree1, tree2): | |||
return compare(len(tree1.children), len(tree2.children)) | |||
def _resolve_ambig(tree): | |||
def _standard_resolve_ambig(tree): | |||
assert tree.data == '_ambig' | |||
best = min(tree.children, key=cmp_to_key(_compare_drv)) | |||
assert best.data == 'drv' | |||
tree.set('drv', best.children) | |||
tree.rule = best.rule # needed for applying callbacks | |||
assert tree.data != '_ambig' | |||
def standard_resolve_ambig(tree): | |||
for ambig in tree.find_data('_ambig'): | |||
_standard_resolve_ambig(ambig) | |||
return tree | |||
class ResolveAmbig(Visitor_NoRecurse): | |||
def _ambig(self, tree): | |||
_resolve_ambig(tree) | |||
# Anti-score Sum | |||
# | |||
# Author: Uriva (https://github.com/uriva) | |||
def _antiscore_sum_drv(tree): | |||
if not isinstance(tree, Tree): | |||
return 0 | |||
# XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse, | |||
# when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | |||
# computationally inefficient. So we handle it here. | |||
if tree.data == '_ambig': | |||
_antiscore_sum_resolve_ambig(tree) | |||
try: | |||
priority = tree.rule.options.priority | |||
except AttributeError: | |||
# Probably trees that don't take part in this parse (better way to distinguish?) | |||
priority = None | |||
return (priority or 0) + sum(map(_antiscore_sum_drv, tree.children), 0) | |||
def _antiscore_sum_resolve_ambig(tree): | |||
assert tree.data == '_ambig' | |||
best = min(tree.children, key=_antiscore_sum_drv) | |||
assert best.data == 'drv' | |||
tree.set('drv', best.children) | |||
tree.rule = best.rule # needed for applying callbacks | |||
def antiscore_sum_resolve_ambig(tree): | |||
for ambig in tree.find_data('_ambig'): | |||
_antiscore_sum_resolve_ambig(ambig) | |||
def resolve_ambig(tree): | |||
ResolveAmbig().visit(tree) | |||
return tree |
@@ -1,3 +1,8 @@ | |||
try: | |||
from future_builtins import filter | |||
except ImportError: | |||
pass | |||
from copy import deepcopy | |||
from .utils import inline_args | |||
@@ -44,13 +49,7 @@ class Tree(object): | |||
return hash((self.data, tuple(self.children))) | |||
def find_pred(self, pred): | |||
if pred(self): | |||
yield self | |||
for c in self.children: | |||
if isinstance(c, Tree): | |||
for t in c.find_pred(pred): | |||
yield t | |||
return filter(pred, self.iter_subtrees()) | |||
def find_data(self, data): | |||
return self.find_pred(lambda t: t.data == data) | |||
@@ -650,6 +650,62 @@ def _make_parser_test(LEXER, PARSER): | |||
self.assertEqual(res.children[0].data, 'a') | |||
@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||
def test_earley_prioritization_sum(self): | |||
"Tests effect of priority on result" | |||
grammar = """ | |||
start: ab_ b_ a_ | indirection | |||
indirection: a_ bb_ a_ | |||
a_: "a" | |||
b_: "b" | |||
ab_: "ab" | |||
bb_.1: "bb" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | |||
grammar = """ | |||
start: ab_ b_ a_ | indirection | |||
indirection: a_ bb_ a_ | |||
a_: "a" | |||
b_: "b" | |||
ab_.1: "ab" | |||
bb_: "bb" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'indirection') | |||
grammar = """ | |||
start: ab_ b_ a_ | indirection | |||
indirection: a_ bb_ a_ | |||
a_.2: "a" | |||
b_.1: "b" | |||
ab_.3: "ab" | |||
bb_.3: "bb" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | |||
grammar = """ | |||
start: ab_ b_ a_ | indirection | |||
indirection: a_ bb_ a_ | |||
a_.1: "a" | |||
b_.1: "b" | |||
ab_.4: "ab" | |||
bb_.3: "bb" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'indirection') | |||
_NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() | |||
_TestParser.__name__ = _NAME | |||