Browse Source

support the cache having a half life...

This will help keep popular tags near the top, while expiring less
used tags
main
John-Mark Gurney 1 year ago
parent
commit
fda8fb6d07
1 changed files with 63 additions and 13 deletions
  1. +63
    -13
      ui/medashare/tags.py

+ 63
- 13
ui/medashare/tags.py View File

@@ -1,8 +1,10 @@
import collections
import itertools
import math
import pathlib
import shutil
import tempfile
import time
import unittest
from .utils import _asn1coder

@@ -23,6 +25,10 @@ class TagCache:

tags is the initial starting state of the cache.

half_life is the half life of an tag measured in seconds.
Each time a tag is added, it counts as one more, but old
values are decayed per the specified half life.

Current use is to create a starter object as such:
cache = TagCache(), and modify count as needed:
cache.count = 10
@@ -35,10 +41,15 @@ class TagCache:
self._cache = collections.OrderedDict((x, None) for x in tags)
self._count = count
self._modified = False
if 'limit' in kwargs:
self._limit = kwargs['limit']
else:
self._limit = self._count

# λ = ln(2) / t1/2
hl = kwargs.pop('half_life', None)
self._lambda = None if not hl else math.log(2) / hl
self._limit = kwargs.pop('limit', self._count)

if kwargs:
raise TypeError('unknown kwarg(s): %s' %
', '.join(kwargs.keys()))

def _limit_count(self):
while len(self._cache) > self._limit:
@@ -83,20 +94,41 @@ class TagCache:

self._modified = True

try:
del self._cache[tag]
except KeyError:
pass
oldtime, oldval = self._cache.pop(tag, (0, 0))

self._cache[tag] = None
t = time.time()

# N(t) = N0 e^(-λt)
if self._lambda:
v = (t, oldval * math.exp(-self._lambda * (t -
oldtime)) + 1)
else:
v = 0, 0

self._cache[tag] = v

self._limit_count()

def _update_values(self):
'''Update the values to match the current time.'''

t = time.time()

for k, (oldtime, oldval) in self._cache.items():
v = (t, oldval * math.exp(-self._lambda * (t -
oldtime)) + 1)
self._cache[k] = v

def tags(self):
'''Returns the sorted list of tags in the cache.'''

return sorted(itertools.islice(self._cache.keys(),
len(self._cache) - self._count, len(self._cache)))
if self._lambda:
self._update_values()
return sorted(sorted(self._cache.keys(), key=lambda x:
self._cache[x][1], reverse=True)[:self._count])
else:
return sorted(itertools.islice(self._cache.keys(),
len(self._cache) - self._count, len(self._cache)))

def __repr__(self):
return 'TagCache(tags=%s, count=%d, limit=%d)' % \
@@ -143,10 +175,28 @@ class _TestTagCache(unittest.TestCase):
shutil.rmtree(self.basetempdir)
self.tempdir = None

def test_extra_kwargs(self):
with self.assertRaises(TypeError):
TagCache(randomkwargs=True)

@unittest.mock.patch('time.time', side_effect=lambda cnt=
itertools.count(): cnt.next() * 1.)
itertools.count(): next(cnt) * 1.)
def test_halflife(self, tt):
pass
tc = TagCache(count=2, limit=10, half_life=10)

# that when it is added twice
tc.add(('foo', 'foo'))
tc.add(('foo', 'foo'))

# and two others are added
tc.add(('bar', 'bar'))
tc.add(('baz', 'baz'))

# it will have preference
self.assertEqual(tc.tags(), [ ('baz', 'baz'), ('foo', 'foo'), ])

# XXX - deal with limit better, that is, drop the small value,
# not the last

def test_limit(self):
tc = TagCache(count=2, limit=3)


Loading…
Cancel
Save