From 9b731f51e43899996e893ad7315756d8f9eb3648 Mon Sep 17 00:00:00 2001 From: Thomas Kemmer Date: Sun, 19 Dec 2021 11:40:51 +0100 Subject: [PATCH] Fix #157: Add TLRU cache implementation. --- docs/index.rst | 42 +++++- src/cachetools/__init__.py | 161 ++++++++++++++++++++++ tests/test_tlru.py | 267 +++++++++++++++++++++++++++++++++++++ 3 files changed, 469 insertions(+), 1 deletion(-) create mode 100644 tests/test_tlru.py diff --git a/docs/index.rst b/docs/index.rst index 5673bca..0285e3c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,7 +20,7 @@ method calls. .. testsetup:: * import operator - from cachetools import cached, cachedmethod, LRUCache, TTLCache + from cachetools import cached, cachedmethod, LRUCache, TLRUCache, TTLCache from unittest import mock urllib = mock.MagicMock() @@ -140,6 +140,46 @@ computed when the item is inserted into the cache. items that have expired by the current value returned by :attr:`timer`. +.. autoclass:: TLRUCache(maxsize, ttu, timer=time.monotonic, getsizeof=None) + :members: popitem, timer, ttu + + Similar to :class:`TTLCache`, this class also associates an + expiration time with each item. However, for :class:`TLRUCache` + items, expiration time is calculated by a user-provided time-to-use + (`ttu`) function, which is passed three arguments at the time of + insertion: the new item's key and value, as well as the current + value of `timer()`. + + .. testcode:: + + from datetime import datetime, timedelta + + def my_ttu(_key, value, now): + # assume value.ttl contains the item's time-to-live in hours + return now + timedelta(hours=value.ttl) + + cache = TLRUCache(maxsize=10, ttu=my_ttu, timer=datetime.now) + + The expression `ttu(key, value, timer())` defines the expiration + time of a cache item, and must be comparable against later results + of `timer()`. + + Items that expire because they have exceeded their time-to-use will + be no longer accessible, and will be removed eventually. If no + expired items are there to remove, the least recently used items + will be discarded first to make space when necessary. + + .. method:: expire(self, time=None) + + Expired items will be removed from a cache only at the next + mutating operation, e.g. :meth:`__setitem__` or + :meth:`__delitem__`, and therefore may still claim memory. + Calling this method removes all items whose time-to-use would + have expired by `time`, so garbage collection is free to reuse + their memory. If `time` is :const:`None`, this removes all + items that have expired by the current value returned by + :attr:`timer`. + Extending cache classes ----------------------- diff --git a/src/cachetools/__init__.py b/src/cachetools/__init__.py index dc432c0..85b78ef 100644 --- a/src/cachetools/__init__.py +++ b/src/cachetools/__init__.py @@ -7,6 +7,7 @@ "LRUCache", "MRUCache", "RRCache", + "TLRUCache", "TTLCache", "cached", "cachedmethod", @@ -17,6 +18,7 @@ import collections import collections.abc import functools +import heapq import random import time @@ -497,6 +499,165 @@ def __getlink(self, key): return value +@functools.total_ordering +class _TLRUItem: + + __slots__ = ("key", "expires", "removed") + + def __init__(self, key=None, expires=None): + self.key = key + self.expires = expires + self.removed = False + + def __lt__(self, other): + return self.expires < other.expires + + +class TLRUCache(Cache): + """Time aware Least Recently Used (TLRU) cache implementation.""" + + def __init__(self, maxsize, ttu, timer=time.monotonic, getsizeof=None): + Cache.__init__(self, maxsize, getsizeof) + self.__items = collections.OrderedDict() + self.__order = [] + self.__timer = _Timer(timer) + self.__ttu = ttu + + def __contains__(self, key): + try: + item = self.__items[key] # no reordering + except KeyError: + return False + else: + return self.__timer() < item.expires + + def __getitem__(self, key, cache_getitem=Cache.__getitem__): + try: + item = self.__getitem(key) + except KeyError: + expired = False + else: + expired = not (self.__timer() < item.expires) + if expired: + return self.__missing__(key) + else: + return cache_getitem(self, key) + + def __setitem__(self, key, value, cache_setitem=Cache.__setitem__): + with self.__timer as time: + expires = self.__ttu(key, value, time) + if not (time < expires): + return # skip expired items + self.expire(time) + cache_setitem(self, key, value) + # removing an existing item would break the heap structure, so + # only mark it as removed for now + try: + self.__getitem(key).removed = True + except KeyError: + pass + self.__items[key] = item = _TLRUItem(key, expires) + heapq.heappush(self.__order, item) + + def __delitem__(self, key, cache_delitem=Cache.__delitem__): + with self.__timer as time: + # no self.expire() for performance reasons, e.g. self.clear() [#67] + cache_delitem(self, key) + item = self.__items.pop(key) + item.removed = True + if not (time < item.expires): + raise KeyError(key) + + def __iter__(self): + for curr in self.__order: + # "freeze" time for iterator access + with self.__timer as time: + if time < curr.expires and not curr.removed: + yield curr.key + + def __len__(self): + time = self.__timer() + count = 0 + for curr in self.__order: + if time < curr.expires and not curr.removed: + count += 1 + return count + + def __repr__(self, cache_repr=Cache.__repr__): + with self.__timer as time: + self.expire(time) + return cache_repr(self) + + @property + def currsize(self): + with self.__timer as time: + self.expire(time) + return super().currsize + + @property + def timer(self): + """The timer function used by the cache.""" + return self.__timer + + @property + def ttu(self): + """The local time-to-use function used by the cache.""" + return self.__ttu + + def expire(self, time=None): + """Remove expired items from the cache.""" + if time is None: + time = self.__timer() + items = self.__items + order = self.__order + # clean up the heap if too many items are marked as removed + if len(order) > len(items) * 2: + self.__order = order = [item for item in order if not item.removed] + heapq.heapify(order) + cache_delitem = Cache.__delitem__ + while order and (order[0].removed or not (time < order[0].expires)): + item = heapq.heappop(order) + if not item.removed: + cache_delitem(self, item.key) + del items[item.key] + + def clear(self): + with self.__timer as time: + self.expire(time) + Cache.clear(self) + + def get(self, *args, **kwargs): + with self.__timer: + return Cache.get(self, *args, **kwargs) + + def pop(self, *args, **kwargs): + with self.__timer: + return Cache.pop(self, *args, **kwargs) + + def setdefault(self, *args, **kwargs): + with self.__timer: + return Cache.setdefault(self, *args, **kwargs) + + def popitem(self): + """Remove and return the `(key, value)` pair least recently used that + has not already expired. + + """ + with self.__timer as time: + self.expire(time) + try: + key = next(iter(self.__items)) + except StopIteration: + raise KeyError("%s is empty" % self.__class__.__name__) from None + else: + return (key, self.pop(key)) + + def __getitem(self, key): + value = self.__items[key] + self.__items.move_to_end(key) + return value + + def cached(cache, key=hashkey, lock=None): """Decorator to wrap a function with a memoizing callable that saves results in a cache. diff --git a/tests/test_tlru.py b/tests/test_tlru.py new file mode 100644 index 0000000..e72203f --- /dev/null +++ b/tests/test_tlru.py @@ -0,0 +1,267 @@ +import math +import unittest + +from cachetools import TLRUCache + +from . import CacheTestMixin + + +class Timer: + def __init__(self, auto=False): + self.auto = auto + self.time = 0 + + def __call__(self): + if self.auto: + self.time += 1 + return self.time + + def tick(self): + self.time += 1 + + +class TLRUTestCache(TLRUCache): + def default_ttu(_key, _value, _time): + return math.inf + + def __init__(self, maxsize, ttu=default_ttu, **kwargs): + TLRUCache.__init__(self, maxsize, ttu, timer=Timer(), **kwargs) + + +class TLRUCacheTest(unittest.TestCase, CacheTestMixin): + + Cache = TLRUTestCache + + def test_ttu(self): + cache = TLRUCache(maxsize=6, ttu=lambda _, v, t: t + v + 1, timer=Timer()) + self.assertEqual(0, cache.timer()) + self.assertEqual(3, cache.ttu(None, 1, 1)) + + cache[1] = 1 + self.assertEqual({1}, set(cache)) + self.assertEqual(1, len(cache)) + self.assertEqual(1, cache[1]) + + cache.timer.tick() + self.assertEqual({1}, set(cache)) + self.assertEqual(1, len(cache)) + self.assertEqual(1, cache[1]) + + cache[2] = 2 + self.assertEqual({1, 2}, set(cache)) + self.assertEqual(2, len(cache)) + self.assertEqual(1, cache[1]) + self.assertEqual(2, cache[2]) + + cache.timer.tick() + self.assertEqual({2}, set(cache)) + self.assertEqual(1, len(cache)) + self.assertNotIn(1, cache) + self.assertEqual(2, cache[2]) + + cache[3] = 3 + self.assertEqual({2, 3}, set(cache)) + self.assertEqual(2, len(cache)) + self.assertNotIn(1, cache) + self.assertEqual(2, cache[2]) + self.assertEqual(3, cache[3]) + + cache.timer.tick() + self.assertEqual({2, 3}, set(cache)) + self.assertEqual(2, len(cache)) + self.assertNotIn(1, cache) + self.assertEqual(2, cache[2]) + self.assertEqual(3, cache[3]) + + cache[1] = 1 + self.assertEqual({1, 2, 3}, set(cache)) + self.assertEqual(3, len(cache)) + self.assertEqual(1, cache[1]) + self.assertEqual(2, cache[2]) + self.assertEqual(3, cache[3]) + + cache.timer.tick() + self.assertEqual({1, 3}, set(cache)) + self.assertEqual(2, len(cache)) + self.assertEqual(1, cache[1]) + self.assertNotIn(2, cache) + self.assertEqual(3, cache[3]) + + cache.timer.tick() + self.assertEqual({3}, set(cache)) + self.assertEqual(1, len(cache)) + self.assertNotIn(1, cache) + self.assertNotIn(2, cache) + self.assertEqual(3, cache[3]) + + cache.timer.tick() + self.assertEqual(set(), set(cache)) + self.assertEqual(0, len(cache)) + self.assertNotIn(1, cache) + self.assertNotIn(2, cache) + self.assertNotIn(3, cache) + + with self.assertRaises(KeyError): + del cache[1] + with self.assertRaises(KeyError): + cache.pop(2) + with self.assertRaises(KeyError): + del cache[3] + + def test_ttu_lru(self): + cache = TLRUCache(maxsize=2, ttu=lambda k, v, t: t + 1, timer=Timer()) + self.assertEqual(0, cache.timer()) + self.assertEqual(2, cache.ttu(None, None, 1)) + + cache[1] = 1 + cache[2] = 2 + cache[3] = 3 + + self.assertEqual(len(cache), 2) + self.assertNotIn(1, cache) + self.assertEqual(cache[2], 2) + self.assertEqual(cache[3], 3) + + cache[2] + cache[4] = 4 + self.assertEqual(len(cache), 2) + self.assertNotIn(1, cache) + self.assertEqual(cache[2], 2) + self.assertNotIn(3, cache) + self.assertEqual(cache[4], 4) + + cache[5] = 5 + self.assertEqual(len(cache), 2) + self.assertNotIn(1, cache) + self.assertNotIn(2, cache) + self.assertNotIn(3, cache) + self.assertEqual(cache[4], 4) + self.assertEqual(cache[5], 5) + + def test_ttu_expire(self): + cache = TLRUCache(maxsize=3, ttu=lambda k, v, t: t + 3, timer=Timer()) + with cache.timer as time: + self.assertEqual(time, cache.timer()) + + cache[1] = 1 + cache.timer.tick() + cache[2] = 2 + cache.timer.tick() + cache[3] = 3 + self.assertEqual(2, cache.timer()) + + self.assertEqual({1, 2, 3}, set(cache)) + self.assertEqual(3, len(cache)) + self.assertEqual(1, cache[1]) + self.assertEqual(2, cache[2]) + self.assertEqual(3, cache[3]) + + cache.expire() + self.assertEqual({1, 2, 3}, set(cache)) + self.assertEqual(3, len(cache)) + self.assertEqual(1, cache[1]) + self.assertEqual(2, cache[2]) + self.assertEqual(3, cache[3]) + + cache.expire(3) + self.assertEqual({2, 3}, set(cache)) + self.assertEqual(2, len(cache)) + self.assertNotIn(1, cache) + self.assertEqual(2, cache[2]) + self.assertEqual(3, cache[3]) + + cache.expire(4) + self.assertEqual({3}, set(cache)) + self.assertEqual(1, len(cache)) + self.assertNotIn(1, cache) + self.assertNotIn(2, cache) + self.assertEqual(3, cache[3]) + + cache.expire(5) + self.assertEqual(set(), set(cache)) + self.assertEqual(0, len(cache)) + self.assertNotIn(1, cache) + self.assertNotIn(2, cache) + self.assertNotIn(3, cache) + + def test_ttu_expired(self): + cache = TLRUCache(maxsize=1, ttu=lambda k, _, t: t + k, timer=Timer()) + cache[1] = None + self.assertEqual(cache[1], None) + self.assertEqual(1, len(cache)) + cache[0] = None + self.assertNotIn(0, cache) + self.assertEqual(cache[1], None) + self.assertEqual(1, len(cache)) + cache[-1] = None + self.assertNotIn(-1, cache) + self.assertNotIn(0, cache) + self.assertEqual(cache[1], None) + self.assertEqual(1, len(cache)) + + def test_ttu_atomic(self): + cache = TLRUCache(maxsize=1, ttu=lambda k, v, t: t + 2, timer=Timer(auto=True)) + cache[1] = 1 + self.assertEqual(1, cache[1]) + cache[1] = 1 + self.assertEqual(1, cache.get(1)) + cache[1] = 1 + self.assertEqual(1, cache.pop(1)) + cache[1] = 1 + self.assertEqual(1, cache.setdefault(1)) + cache[1] = 1 + cache.clear() + self.assertEqual(0, len(cache)) + + def test_ttu_tuple_key(self): + cache = TLRUCache(maxsize=1, ttu=lambda k, v, t: t + 1, timer=Timer()) + + cache[(1, 2, 3)] = 42 + self.assertEqual(42, cache[(1, 2, 3)]) + cache.timer.tick() + with self.assertRaises(KeyError): + cache[(1, 2, 3)] + self.assertNotIn((1, 2, 3), cache) + + def test_ttu_reverse_insert(self): + cache = TLRUCache(maxsize=4, ttu=lambda k, v, t: t + v, timer=Timer()) + self.assertEqual(0, cache.timer()) + + cache[3] = 3 + cache[2] = 2 + cache[1] = 1 + cache[0] = 0 + + self.assertEqual({1, 2, 3}, set(cache)) + self.assertEqual(3, len(cache)) + self.assertNotIn(0, cache) + self.assertEqual(1, cache[1]) + self.assertEqual(2, cache[2]) + self.assertEqual(3, cache[3]) + + cache.timer.tick() + + self.assertEqual({2, 3}, set(cache)) + self.assertEqual(2, len(cache)) + self.assertNotIn(0, cache) + self.assertNotIn(1, cache) + self.assertEqual(2, cache[2]) + self.assertEqual(3, cache[3]) + + cache.timer.tick() + + self.assertEqual({3}, set(cache)) + self.assertEqual(1, len(cache)) + self.assertNotIn(0, cache) + self.assertNotIn(1, cache) + self.assertNotIn(2, cache) + self.assertEqual(3, cache[3]) + + cache.timer.tick() + + self.assertEqual(set(), set(cache)) + self.assertEqual(0, len(cache)) + self.assertNotIn(0, cache) + self.assertNotIn(1, cache) + self.assertNotIn(2, cache) + self.assertNotIn(3, cache)