From 9d7e050883c59fc9721e5c30fb78d8c4a3f5bb3f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 14 May 2021 14:00:37 +0100 Subject: [PATCH 1/3] Make `del_multi` independent of `keylen` Changes the way TreeCache works, so that it's easier to tell when you have removed a subtree or a leaf node from the tree - hence `del_multi` doesn't need to care about `keylen`. --- synapse/util/caches/lrucache.py | 6 +- synapse/util/caches/treecache.py | 104 ++++++++++++++++++++----------- tests/util/test_treecache.py | 6 +- 3 files changed, 74 insertions(+), 42 deletions(-) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 1be675e014af..826951dcdc9c 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -34,7 +34,7 @@ from synapse.config import cache as cache_config from synapse.util import caches from synapse.util.caches import CacheMetric, register_cache -from synapse.util.caches.treecache import TreeCache +from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry try: from pympler.asizeof import Asizer @@ -403,7 +403,9 @@ def cache_del_multi(key: KT) -> None: popped = cache.pop(key) if popped is None: return - for leaf in enumerate_leaves(popped, keylen - len(cast(tuple, key))): + # for each deleted node, we now need to remove it from the linked list + # and run its callbacks. + for leaf in iterate_tree_cache_entry(popped): delete_node(leaf) @synchronized diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py index eb4d98f683cb..73502a8b061b 100644 --- a/synapse/util/caches/treecache.py +++ b/synapse/util/caches/treecache.py @@ -1,18 +1,43 @@ -from typing import Dict +# Copyright 2016-2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. SENTINEL = object() +class TreeCacheNode(dict): + """The type of nodes in our tree. + + Has its own type so we can distinguish it from real dicts that are stored at the + leaves. + """ + + pass + + class TreeCache: """ Tree-based backing store for LruCache. Allows subtrees of data to be deleted efficiently. Keys must be tuples. + + The data structure is a chain of TreeCacheNodes: + root = {key_1: {key_2: _value}} """ def __init__(self): self.size = 0 - self.root = {} # type: Dict + self.root = TreeCacheNode() def __setitem__(self, key, value): return self.set(key, value) @@ -21,10 +46,23 @@ def __contains__(self, key): return self.get(key, SENTINEL) is not SENTINEL def set(self, key, value): + if isinstance(value, TreeCacheNode): + # this would mean we couldn't tell where our tree ended and the value + # started. + raise ValueError("Cannot store TreeCacheNodes in a TreeCache") + node = self.root for k in key[:-1]: - node = node.setdefault(k, {}) - node[key[-1]] = _Entry(value) + next_node = node.get(k, SENTINEL) + if next_node is SENTINEL: + next_node = node[k] = TreeCacheNode() + elif not isinstance(next_node, TreeCacheNode): + # this suggests that the caller is not being consistent with its key + # length. + raise ValueError("value conflicts with an existing subtree") + node = next_node + + node[key[-1]] = value self.size += 1 def get(self, key, default=None): @@ -33,25 +71,41 @@ def get(self, key, default=None): node = node.get(k, None) if node is None: return default - return node.get(key[-1], _Entry(default)).value + return node.get(key[-1], default) def clear(self): self.size = 0 - self.root = {} + self.root = TreeCacheNode() def pop(self, key, default=None): + """Remove the given key, or subkey, from the cache + + Args: + key: key or subkey to remove. + default: value to return if key is not found + + Returns: + If the key is not found, 'default'. If the key is complete, the removed + value. If the key is partial, the TreeCacheNode corresponding to the part + of the tree that was removed. + """ + # a list of the nodes we have touched on the way down the tree nodes = [] node = self.root for k in key[:-1]: node = node.get(k, None) - nodes.append(node) # don't add the root node if node is None: return default + if not isinstance(node, TreeCacheNode): + # we've gone off the end of the tree + raise ValueError("pop() key too long") + nodes.append(node) # don't add the root node popped = node.pop(key[-1], SENTINEL) if popped is SENTINEL: return default + # working back up the tree, clear out any nodes that are now empty node_and_keys = list(zip(nodes, key)) node_and_keys.reverse() node_and_keys.append((self.root, None)) @@ -61,14 +115,15 @@ def pop(self, key, default=None): if n: break + # found an empty node: remove it from its parent, and loop. node_and_keys[i + 1][0].pop(k) - popped, cnt = _strip_and_count_entires(popped) + cnt = sum(1 for _ in iterate_tree_cache_entry(popped)) self.size -= cnt return popped def values(self): - return list(iterate_tree_cache_entry(self.root)) + return iterate_tree_cache_entry(self.root) def __len__(self): return self.size @@ -78,36 +133,9 @@ def iterate_tree_cache_entry(d): """Helper function to iterate over the leaves of a tree, i.e. a dict of that can contain dicts. """ - if isinstance(d, dict): + if isinstance(d, TreeCacheNode): for value_d in d.values(): for value in iterate_tree_cache_entry(value_d): yield value else: - if isinstance(d, _Entry): - yield d.value - else: - yield d - - -class _Entry: - __slots__ = ["value"] - - def __init__(self, value): - self.value = value - - -def _strip_and_count_entires(d): - """Takes an _Entry or dict with leaves of _Entry's, and either returns the - value or a dictionary with _Entry's replaced by their values. - - Also returns the count of _Entry's - """ - if isinstance(d, dict): - cnt = 0 - for key, value in d.items(): - v, n = _strip_and_count_entires(value) - d[key] = v - cnt += n - return d, cnt - else: - return d.value, 1 + yield d diff --git a/tests/util/test_treecache.py b/tests/util/test_treecache.py index 3b077af27e90..60663720536d 100644 --- a/tests/util/test_treecache.py +++ b/tests/util/test_treecache.py @@ -13,7 +13,7 @@ # limitations under the License. -from synapse.util.caches.treecache import TreeCache +from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry from .. import unittest @@ -64,12 +64,14 @@ def test_pop_mixedlevel(self): cache[("a", "b")] = "AB" cache[("b", "a")] = "BA" self.assertEquals(cache.get(("a", "a")), "AA") - cache.pop(("a",)) + popped = cache.pop(("a",)) self.assertEquals(cache.get(("a", "a")), None) self.assertEquals(cache.get(("a", "b")), None) self.assertEquals(cache.get(("b", "a")), "BA") self.assertEquals(len(cache), 1) + self.assertEquals({"AA", "AB"}, set(iterate_tree_cache_entry(popped))) + def test_clear(self): cache = TreeCache() cache[("a",)] = "A" From 175c8ae9cf61ed33bc74488c1597edb3ee4826a3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 14 May 2021 14:02:39 +0100 Subject: [PATCH 2/3] Remove redundant `keylen` param on LruCache This is no longer required. --- synapse/replication/slave/storage/client_ips.py | 2 +- synapse/storage/databases/main/client_ips.py | 2 +- synapse/storage/databases/main/devices.py | 2 +- synapse/storage/databases/main/events_worker.py | 1 - synapse/util/caches/deferred_cache.py | 2 -- synapse/util/caches/descriptors.py | 1 - synapse/util/caches/lrucache.py | 4 ---- tests/util/test_lrucache.py | 4 ++-- 8 files changed, 5 insertions(+), 13 deletions(-) diff --git a/synapse/replication/slave/storage/client_ips.py b/synapse/replication/slave/storage/client_ips.py index 8730966380de..13ed87adc4ab 100644 --- a/synapse/replication/slave/storage/client_ips.py +++ b/synapse/replication/slave/storage/client_ips.py @@ -24,7 +24,7 @@ def __init__(self, database: DatabasePool, db_conn, hs): super().__init__(database, db_conn, hs) self.client_ip_last_seen = LruCache( - cache_name="client_ip_last_seen", keylen=4, max_size=50000 + cache_name="client_ip_last_seen", max_size=50000 ) # type: LruCache[tuple, int] async def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id): diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py index d60010e942ab..074b077bef91 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py @@ -436,7 +436,7 @@ class ClientIpStore(ClientIpWorkerStore): def __init__(self, database: DatabasePool, db_conn, hs): self.client_ip_last_seen = LruCache( - cache_name="client_ip_last_seen", keylen=4, max_size=50000 + cache_name="client_ip_last_seen", max_size=50000 ) super().__init__(database, db_conn, hs) diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index a1f98b7e3854..fd87ba71abcb 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1053,7 +1053,7 @@ def __init__(self, database: DatabasePool, db_conn, hs): # Map of (user_id, device_id) -> bool. If there is an entry that implies # the device exists. self.device_id_exists_cache = LruCache( - cache_name="device_id_exists", keylen=2, max_size=10000 + cache_name="device_id_exists", max_size=10000 ) async def store_device( diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 2c823e09cfa6..6963bbf7f484 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -157,7 +157,6 @@ def __init__(self, database: DatabasePool, db_conn, hs): self._get_event_cache = LruCache( cache_name="*getEvent*", - keylen=3, max_size=hs.config.caches.event_cache_size, ) diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py index 484097a48a45..371e7e4dd0de 100644 --- a/synapse/util/caches/deferred_cache.py +++ b/synapse/util/caches/deferred_cache.py @@ -70,7 +70,6 @@ def __init__( self, name: str, max_entries: int = 1000, - keylen: int = 1, tree: bool = False, iterable: bool = False, apply_cache_factor_from_config: bool = True, @@ -101,7 +100,6 @@ def metrics_cb(): # a Deferred. self.cache = LruCache( max_size=max_entries, - keylen=keylen, cache_name=name, cache_type=cache_type, size_callback=(lambda d: len(d) or 1) if iterable else None, diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 3a4d027095bb..2ac24a2f2536 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -270,7 +270,6 @@ def __get__(self, obj, owner): cache = DeferredCache( name=self.orig.__name__, max_entries=self.max_entries, - keylen=self.num_args, tree=self.tree, iterable=self.iterable, ) # type: DeferredCache[CacheKey, Any] diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 826951dcdc9c..54df407ff70c 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -160,7 +160,6 @@ def __init__( self, max_size: int, cache_name: Optional[str] = None, - keylen: int = 1, cache_type: Type[Union[dict, TreeCache]] = dict, size_callback: Optional[Callable] = None, metrics_collection_callback: Optional[Callable[[], None]] = None, @@ -173,9 +172,6 @@ def __init__( cache_name: The name of this cache, for the prometheus metrics. If unset, no metrics will be reported on this cache. - keylen: The length of the tuple used as the cache key. Ignored unless - cache_type is `TreeCache`. - cache_type (type): type of underlying cache to be used. Typically one of dict or TreeCache. diff --git a/tests/util/test_lrucache.py b/tests/util/test_lrucache.py index df3e27779fd2..377904e72e6c 100644 --- a/tests/util/test_lrucache.py +++ b/tests/util/test_lrucache.py @@ -59,7 +59,7 @@ def test_pop(self): self.assertEquals(cache.pop("key"), None) def test_del_multi(self): - cache = LruCache(4, keylen=2, cache_type=TreeCache) + cache = LruCache(4, cache_type=TreeCache) cache[("animal", "cat")] = "mew" cache[("animal", "dog")] = "woof" cache[("vehicles", "car")] = "vroom" @@ -165,7 +165,7 @@ def test_del_multi(self): m2 = Mock() m3 = Mock() m4 = Mock() - cache = LruCache(4, keylen=2, cache_type=TreeCache) + cache = LruCache(4, cache_type=TreeCache) cache.set(("a", "1"), "value", callbacks=[m1]) cache.set(("a", "2"), "value", callbacks=[m2]) From c441f0ea1bd4a3398716e0cf8e284e8e6af06cac Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 14 May 2021 14:03:54 +0100 Subject: [PATCH 3/3] changelog --- changelog.d/9993.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/9993.misc diff --git a/changelog.d/9993.misc b/changelog.d/9993.misc new file mode 100644 index 000000000000..0dd92440717d --- /dev/null +++ b/changelog.d/9993.misc @@ -0,0 +1 @@ +Remove `keylen` param on `LruCache`.