Browse Source

Added resolve__antiscore_sum

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 years ago
parent
commit
5d41371fb3
5 changed files with 114 additions and 19 deletions
  1. +1
    -1
      lark/lark.py
  2. +3
    -1
      lark/parser_frontends.py
  3. +48
    -10
      lark/parsers/resolve_ambig.py
  4. +6
    -7
      lark/tree.py
  5. +56
    -0
      tests/test_parser.py

+ 1
- 1
lark/lark.py View File

@@ -140,7 +140,7 @@ class Lark:
self.options.ambiguity = 'resolve'
else:
assert self.options.parser == 'earley', "Only Earley supports disambiguation right now"
assert self.options.ambiguity in ('resolve', 'explicit', 'auto')
assert self.options.ambiguity in ('resolve', 'explicit', 'auto', 'resolve__antiscore_sum')

# Parse the grammar file and compose the grammars (TODO)
self.grammar = load_grammar(grammar, source)


+ 3
- 1
lark/parser_frontends.py View File

@@ -50,7 +50,9 @@ class LALR_ContextualLexer:

def get_ambiguity_resolver(options):
if not options or options.ambiguity == 'resolve':
return resolve_ambig.resolve_ambig
return resolve_ambig.standard_resolve_ambig
elif options.ambiguity == 'resolve__antiscore_sum':
return resolve_ambig.antiscore_sum_resolve_ambig
elif options.ambiguity == 'explicit':
return None
raise ValueError(options)


+ 48
- 10
lark/parsers/resolve_ambig.py View File

@@ -3,6 +3,11 @@ from functools import cmp_to_key

from ..tree import Tree, Visitor_NoRecurse


# Standard ambiguity resolver (uses comparison)
#
# Author: Erez Sh

def _compare_rules(rule1, rule2):
if rule1.origin != rule2.origin:
if rule1.options and rule2.options:
@@ -31,9 +36,9 @@ def _compare_drv(tree1, tree2):
# when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be
# computationally inefficient. So we handle it here.
if tree1.data == '_ambig':
_resolve_ambig(tree1)
_standard_resolve_ambig(tree1)
if tree2.data == '_ambig':
_resolve_ambig(tree2)
_standard_resolve_ambig(tree2)

c = _compare_rules(tree1.rule, tree2.rule)
if c:
@@ -48,21 +53,54 @@ def _compare_drv(tree1, tree2):
return compare(len(tree1.children), len(tree2.children))


def _resolve_ambig(tree):
def _standard_resolve_ambig(tree):
assert tree.data == '_ambig'

best = min(tree.children, key=cmp_to_key(_compare_drv))
assert best.data == 'drv'
tree.set('drv', best.children)
tree.rule = best.rule # needed for applying callbacks

assert tree.data != '_ambig'
def standard_resolve_ambig(tree):
for ambig in tree.find_data('_ambig'):
_standard_resolve_ambig(ambig)

return tree



class ResolveAmbig(Visitor_NoRecurse):
def _ambig(self, tree):
_resolve_ambig(tree)

# Anti-score Sum
#
# Author: Uriva (https://github.com/uriva)

def _antiscore_sum_drv(tree):
if not isinstance(tree, Tree):
return 0

# XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse,
# when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be
# computationally inefficient. So we handle it here.
if tree.data == '_ambig':
_antiscore_sum_resolve_ambig(tree)

try:
priority = tree.rule.options.priority
except AttributeError:
# Probably trees that don't take part in this parse (better way to distinguish?)
priority = None

return (priority or 0) + sum(map(_antiscore_sum_drv, tree.children), 0)

def _antiscore_sum_resolve_ambig(tree):
assert tree.data == '_ambig'

best = min(tree.children, key=_antiscore_sum_drv)
assert best.data == 'drv'
tree.set('drv', best.children)
tree.rule = best.rule # needed for applying callbacks

def antiscore_sum_resolve_ambig(tree):
for ambig in tree.find_data('_ambig'):
_antiscore_sum_resolve_ambig(ambig)

def resolve_ambig(tree):
ResolveAmbig().visit(tree)
return tree

+ 6
- 7
lark/tree.py View File

@@ -1,3 +1,8 @@
try:
from future_builtins import filter
except ImportError:
pass

from copy import deepcopy

from .utils import inline_args
@@ -44,13 +49,7 @@ class Tree(object):
return hash((self.data, tuple(self.children)))

def find_pred(self, pred):
if pred(self):
yield self

for c in self.children:
if isinstance(c, Tree):
for t in c.find_pred(pred):
yield t
return filter(pred, self.iter_subtrees())

def find_data(self, data):
return self.find_pred(lambda t: t.data == data)


+ 56
- 0
tests/test_parser.py View File

@@ -650,6 +650,62 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(res.children[0].data, 'a')


@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
def test_earley_prioritization_sum(self):
"Tests effect of priority on result"

grammar = """
start: ab_ b_ a_ | indirection
indirection: a_ bb_ a_
a_: "a"
b_: "b"
ab_: "ab"
bb_.1: "bb"
"""

l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')

grammar = """
start: ab_ b_ a_ | indirection
indirection: a_ bb_ a_
a_: "a"
b_: "b"
ab_.1: "ab"
bb_: "bb"
"""

l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'indirection')

grammar = """
start: ab_ b_ a_ | indirection
indirection: a_ bb_ a_
a_.2: "a"
b_.1: "b"
ab_.3: "ab"
bb_.3: "bb"
"""

l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')

grammar = """
start: ab_ b_ a_ | indirection
indirection: a_ bb_ a_
a_.1: "a"
b_.1: "b"
ab_.4: "ab"
bb_.3: "bb"
"""

l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'indirection')


_NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize()
_TestParser.__name__ = _NAME


Loading…
Cancel
Save