support the cache having a half life...

This will help keep popular tags near the top, while expiring less used tags
2 years ago · fda8fb6d07
--- a/ui/medashare/tags.py
+++ b/ui/medashare/tags.py
@@ -1,8 +1,10 @@
 import collections
 import itertools
 import math
 import pathlib
 import shutil
 import tempfile
 import time
 import unittest
 from .utils import _asn1coder

@@ -23,6 +25,10 @@ class TagCache:

 	tags is the initial starting state of the cache.

 	half_life is the half life of an tag measured in seconds.
 	Each time a tag is added, it counts as one more, but old
 	values are decayed per the specified half life.

 	Current use is to create a starter object as such:
 	cache = TagCache(), and modify count as needed:
 	cache.count = 10
@@ -35,10 +41,15 @@ class TagCache:
 		self._cache = collections.OrderedDict((x, None) for x in tags)
 		self._count = count
 		self._modified = False
 		if 'limit' in kwargs:
 			self._limit = kwargs['limit']
 		else:
 			self._limit = self._count

 		# λ = ln(2) / t1/2
 		hl = kwargs.pop('half_life', None)
 		self._lambda = None if not hl else math.log(2) / hl
 		self._limit = kwargs.pop('limit', self._count)

 		if kwargs:
 			raise TypeError('unknown kwarg(s): %s' %
 			    ', '.join(kwargs.keys()))

 	def _limit_count(self):
 		while len(self._cache) > self._limit:
@@ -83,20 +94,41 @@ class TagCache:

 		self._modified = True

 		try:
 			del self._cache[tag]
 		except KeyError:
 			pass
 		oldtime, oldval = self._cache.pop(tag, (0, 0))

 		self._cache[tag] = None
 		t = time.time()

 		# N(t) = N0 e^(-λt)
 		if self._lambda:
 			v = (t, oldval * math.exp(-self._lambda * (t -
 			    oldtime)) + 1)
 		else:
 			v = 0, 0

 		self._cache[tag] = v

 		self._limit_count()

 	def _update_values(self):
 		'''Update the values to match the current time.'''

 		t = time.time()

 		for k, (oldtime, oldval) in self._cache.items():
 			v = (t, oldval * math.exp(-self._lambda * (t -
 			    oldtime)) + 1)
 			self._cache[k] = v

 	def tags(self):
 		'''Returns the sorted list of tags in the cache.'''

 		return sorted(itertools.islice(self._cache.keys(),
 		    len(self._cache) - self._count, len(self._cache)))
 		if self._lambda:
 			self._update_values()
 			return sorted(sorted(self._cache.keys(), key=lambda x:
 			    self._cache[x][1], reverse=True)[:self._count])
 		else:
 			return sorted(itertools.islice(self._cache.keys(),
 			    len(self._cache) - self._count, len(self._cache)))

 	def __repr__(self):
 		return 'TagCache(tags=%s, count=%d, limit=%d)' % \
@@ -143,10 +175,28 @@ class _TestTagCache(unittest.TestCase):
 		shutil.rmtree(self.basetempdir)
 		self.tempdir = None

 	def test_extra_kwargs(self):
 		with self.assertRaises(TypeError):
 			TagCache(randomkwargs=True)

 	@unittest.mock.patch('time.time', side_effect=lambda cnt=
 	    itertools.count(): cnt.next() * 1.)
 	    itertools.count(): next(cnt) * 1.)
 	def test_halflife(self, tt):
 		pass
 		tc = TagCache(count=2, limit=10, half_life=10)

 		# that when it is added twice
 		tc.add(('foo', 'foo'))
 		tc.add(('foo', 'foo'))

 		# and two others are added
 		tc.add(('bar', 'bar'))
 		tc.add(('baz', 'baz'))

 		# it will have preference
 		self.assertEqual(tc.tags(), [ ('baz', 'baz'), ('foo', 'foo'), ])

 		# XXX - deal with limit better, that is, drop the small value,
 		# not the last

 	def test_limit(self):
 		tc = TagCache(count=2, limit=3)