Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Remove keylen from LruCache. #9993

Merged
merged 3 commits into from
May 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/9993.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove `keylen` param on `LruCache`.
2 changes: 1 addition & 1 deletion synapse/replication/slave/storage/client_ips.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, database: DatabasePool, db_conn, hs):
super().__init__(database, db_conn, hs)

self.client_ip_last_seen = LruCache(
cache_name="client_ip_last_seen", keylen=4, max_size=50000
cache_name="client_ip_last_seen", max_size=50000
) # type: LruCache[tuple, int]

async def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id):
Expand Down
2 changes: 1 addition & 1 deletion synapse/storage/databases/main/client_ips.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ class ClientIpStore(ClientIpWorkerStore):
def __init__(self, database: DatabasePool, db_conn, hs):

self.client_ip_last_seen = LruCache(
cache_name="client_ip_last_seen", keylen=4, max_size=50000
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should have been 3, ftr

cache_name="client_ip_last_seen", max_size=50000
)

super().__init__(database, db_conn, hs)
Expand Down
2 changes: 1 addition & 1 deletion synapse/storage/databases/main/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -1053,7 +1053,7 @@ def __init__(self, database: DatabasePool, db_conn, hs):
# Map of (user_id, device_id) -> bool. If there is an entry that implies
# the device exists.
self.device_id_exists_cache = LruCache(
cache_name="device_id_exists", keylen=2, max_size=10000
cache_name="device_id_exists", max_size=10000
)

async def store_device(
Expand Down
1 change: 0 additions & 1 deletion synapse/storage/databases/main/events_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ def __init__(self, database: DatabasePool, db_conn, hs):

self._get_event_cache = LruCache(
cache_name="*getEvent*",
keylen=3,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and this one should have been 1.

max_size=hs.config.caches.event_cache_size,
)

Expand Down
2 changes: 0 additions & 2 deletions synapse/util/caches/deferred_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def __init__(
self,
name: str,
max_entries: int = 1000,
keylen: int = 1,
tree: bool = False,
iterable: bool = False,
apply_cache_factor_from_config: bool = True,
Expand Down Expand Up @@ -101,7 +100,6 @@ def metrics_cb():
# a Deferred.
self.cache = LruCache(
max_size=max_entries,
keylen=keylen,
cache_name=name,
cache_type=cache_type,
size_callback=(lambda d: len(d) or 1) if iterable else None,
Expand Down
1 change: 0 additions & 1 deletion synapse/util/caches/descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ def __get__(self, obj, owner):
cache = DeferredCache(
name=self.orig.__name__,
max_entries=self.max_entries,
keylen=self.num_args,
tree=self.tree,
iterable=self.iterable,
) # type: DeferredCache[CacheKey, Any]
Expand Down
10 changes: 4 additions & 6 deletions synapse/util/caches/lrucache.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from synapse.config import cache as cache_config
from synapse.util import caches
from synapse.util.caches import CacheMetric, register_cache
from synapse.util.caches.treecache import TreeCache
from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry

try:
from pympler.asizeof import Asizer
Expand Down Expand Up @@ -160,7 +160,6 @@ def __init__(
self,
max_size: int,
cache_name: Optional[str] = None,
keylen: int = 1,
cache_type: Type[Union[dict, TreeCache]] = dict,
size_callback: Optional[Callable] = None,
metrics_collection_callback: Optional[Callable[[], None]] = None,
Expand All @@ -173,9 +172,6 @@ def __init__(
cache_name: The name of this cache, for the prometheus metrics. If unset,
no metrics will be reported on this cache.

keylen: The length of the tuple used as the cache key. Ignored unless
cache_type is `TreeCache`.

cache_type (type):
type of underlying cache to be used. Typically one of dict
or TreeCache.
Expand Down Expand Up @@ -403,7 +399,9 @@ def cache_del_multi(key: KT) -> None:
popped = cache.pop(key)
if popped is None:
return
for leaf in enumerate_leaves(popped, keylen - len(cast(tuple, key))):
# for each deleted node, we now need to remove it from the linked list
# and run its callbacks.
for leaf in iterate_tree_cache_entry(popped):
delete_node(leaf)

@synchronized
Expand Down
104 changes: 66 additions & 38 deletions synapse/util/caches/treecache.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,43 @@
from typing import Dict
# Copyright 2016-2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

SENTINEL = object()


class TreeCacheNode(dict):
"""The type of nodes in our tree.

Has its own type so we can distinguish it from real dicts that are stored at the
leaves.
"""

pass


class TreeCache:
"""
Tree-based backing store for LruCache. Allows subtrees of data to be deleted
efficiently.
Keys must be tuples.

The data structure is a chain of TreeCacheNodes:
root = {key_1: {key_2: _value}}
"""

def __init__(self):
self.size = 0
self.root = {} # type: Dict
self.root = TreeCacheNode()

def __setitem__(self, key, value):
return self.set(key, value)
Expand All @@ -21,10 +46,23 @@ def __contains__(self, key):
return self.get(key, SENTINEL) is not SENTINEL

def set(self, key, value):
if isinstance(value, TreeCacheNode):
# this would mean we couldn't tell where our tree ended and the value
# started.
raise ValueError("Cannot store TreeCacheNodes in a TreeCache")

node = self.root
for k in key[:-1]:
node = node.setdefault(k, {})
node[key[-1]] = _Entry(value)
next_node = node.get(k, SENTINEL)
if next_node is SENTINEL:
next_node = node[k] = TreeCacheNode()
Comment on lines +56 to +58
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've replaced setdefault with separate get and set to avoid making lots of redundant TreeCacheNode objects.

elif not isinstance(next_node, TreeCacheNode):
# this suggests that the caller is not being consistent with its key
# length.
raise ValueError("value conflicts with an existing subtree")
node = next_node

node[key[-1]] = value
self.size += 1

def get(self, key, default=None):
Expand All @@ -33,25 +71,41 @@ def get(self, key, default=None):
node = node.get(k, None)
if node is None:
return default
return node.get(key[-1], _Entry(default)).value
return node.get(key[-1], default)

def clear(self):
self.size = 0
self.root = {}
self.root = TreeCacheNode()

def pop(self, key, default=None):
"""Remove the given key, or subkey, from the cache

Args:
key: key or subkey to remove.
default: value to return if key is not found

Returns:
If the key is not found, 'default'. If the key is complete, the removed
value. If the key is partial, the TreeCacheNode corresponding to the part
of the tree that was removed.
"""
# a list of the nodes we have touched on the way down the tree
nodes = []

node = self.root
for k in key[:-1]:
node = node.get(k, None)
nodes.append(node) # don't add the root node
if node is None:
return default
if not isinstance(node, TreeCacheNode):
# we've gone off the end of the tree
raise ValueError("pop() key too long")
nodes.append(node) # don't add the root node
popped = node.pop(key[-1], SENTINEL)
if popped is SENTINEL:
return default

# working back up the tree, clear out any nodes that are now empty
node_and_keys = list(zip(nodes, key))
node_and_keys.reverse()
node_and_keys.append((self.root, None))
Expand All @@ -61,14 +115,15 @@ def pop(self, key, default=None):

if n:
break
# found an empty node: remove it from its parent, and loop.
node_and_keys[i + 1][0].pop(k)

popped, cnt = _strip_and_count_entires(popped)
cnt = sum(1 for _ in iterate_tree_cache_entry(popped))
self.size -= cnt
return popped

def values(self):
return list(iterate_tree_cache_entry(self.root))
return iterate_tree_cache_entry(self.root)

def __len__(self):
return self.size
Expand All @@ -78,36 +133,9 @@ def iterate_tree_cache_entry(d):
"""Helper function to iterate over the leaves of a tree, i.e. a dict of that
can contain dicts.
"""
if isinstance(d, dict):
if isinstance(d, TreeCacheNode):
for value_d in d.values():
for value in iterate_tree_cache_entry(value_d):
yield value
else:
if isinstance(d, _Entry):
yield d.value
else:
yield d


class _Entry:
__slots__ = ["value"]

def __init__(self, value):
self.value = value


def _strip_and_count_entires(d):
"""Takes an _Entry or dict with leaves of _Entry's, and either returns the
value or a dictionary with _Entry's replaced by their values.

Also returns the count of _Entry's
"""
if isinstance(d, dict):
cnt = 0
for key, value in d.items():
v, n = _strip_and_count_entires(value)
d[key] = v
cnt += n
return d, cnt
else:
return d.value, 1
yield d
4 changes: 2 additions & 2 deletions tests/util/test_lrucache.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_pop(self):
self.assertEquals(cache.pop("key"), None)

def test_del_multi(self):
cache = LruCache(4, keylen=2, cache_type=TreeCache)
cache = LruCache(4, cache_type=TreeCache)
cache[("animal", "cat")] = "mew"
cache[("animal", "dog")] = "woof"
cache[("vehicles", "car")] = "vroom"
Expand Down Expand Up @@ -165,7 +165,7 @@ def test_del_multi(self):
m2 = Mock()
m3 = Mock()
m4 = Mock()
cache = LruCache(4, keylen=2, cache_type=TreeCache)
cache = LruCache(4, cache_type=TreeCache)

cache.set(("a", "1"), "value", callbacks=[m1])
cache.set(("a", "2"), "value", callbacks=[m2])
Expand Down
6 changes: 4 additions & 2 deletions tests/util/test_treecache.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.


from synapse.util.caches.treecache import TreeCache
from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry

from .. import unittest

Expand Down Expand Up @@ -64,12 +64,14 @@ def test_pop_mixedlevel(self):
cache[("a", "b")] = "AB"
cache[("b", "a")] = "BA"
self.assertEquals(cache.get(("a", "a")), "AA")
cache.pop(("a",))
popped = cache.pop(("a",))
self.assertEquals(cache.get(("a", "a")), None)
self.assertEquals(cache.get(("a", "b")), None)
self.assertEquals(cache.get(("b", "a")), "BA")
self.assertEquals(len(cache), 1)

self.assertEquals({"AA", "AB"}, set(iterate_tree_cache_entry(popped)))

def test_clear(self):
cache = TreeCache()
cache[("a",)] = "A"
Expand Down