Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Track why we're evicting from caches #10829

Merged
merged 2 commits into from
Sep 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/10829.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Track cache eviction rates more finely in Prometheus' monitoring.
31 changes: 23 additions & 8 deletions synapse/util/caches/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import collections
import logging
import typing
from enum import Enum, auto
from sys import intern
from typing import Callable, Dict, Optional, Sized

Expand All @@ -34,7 +36,7 @@

cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"])
cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"])
cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name"])
cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name", "reason"])
cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"])
cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
cache_memory_usage = Gauge(
Expand All @@ -46,11 +48,16 @@
response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"])
response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"])
response_cache_evicted = Gauge(
"synapse_util_caches_response_cache:evicted_size", "", ["name"]
"synapse_util_caches_response_cache:evicted_size", "", ["name", "reason"]
)
response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"])


class EvictionReason(Enum):
size = auto()
time = auto()


@attr.s(slots=True)
class CacheMetric:

Expand All @@ -61,7 +68,9 @@ class CacheMetric:

hits = attr.ib(default=0)
misses = attr.ib(default=0)
evicted_size = attr.ib(default=0)
eviction_size_by_reason: typing.Counter[EvictionReason] = attr.ib(
factory=collections.Counter
)
memory_usage = attr.ib(default=None)

def inc_hits(self) -> None:
Expand All @@ -70,8 +79,8 @@ def inc_hits(self) -> None:
def inc_misses(self) -> None:
self.misses += 1

def inc_evictions(self, size: int = 1) -> None:
self.evicted_size += size
def inc_evictions(self, reason: EvictionReason, size: int = 1) -> None:
self.eviction_size_by_reason[reason] += size

def inc_memory_usage(self, memory: int) -> None:
if self.memory_usage is None:
Expand All @@ -94,14 +103,20 @@ def collect(self) -> None:
if self._cache_type == "response_cache":
response_cache_size.labels(self._cache_name).set(len(self._cache))
response_cache_hits.labels(self._cache_name).set(self.hits)
response_cache_evicted.labels(self._cache_name).set(self.evicted_size)
for reason in EvictionReason:
response_cache_evicted.labels(self._cache_name, reason.name).set(
self.eviction_size_by_reason[reason]
)
response_cache_total.labels(self._cache_name).set(
self.hits + self.misses
)
else:
cache_size.labels(self._cache_name).set(len(self._cache))
cache_hits.labels(self._cache_name).set(self.hits)
cache_evicted.labels(self._cache_name).set(self.evicted_size)
for reason in EvictionReason:
cache_evicted.labels(self._cache_name, reason.name).set(
self.eviction_size_by_reason[reason]
)
cache_total.labels(self._cache_name).set(self.hits + self.misses)
if getattr(self._cache, "max_size", None):
cache_max_size.labels(self._cache_name).set(self._cache.max_size)
Expand Down
10 changes: 5 additions & 5 deletions synapse/util/caches/expiringcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from synapse.config import cache as cache_config
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.util import Clock
from synapse.util.caches import register_cache
from synapse.util.caches import EvictionReason, register_cache

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -98,9 +98,9 @@ def evict(self) -> None:
while self._max_size and len(self) > self._max_size:
_key, value = self._cache.popitem(last=False)
if self.iterable:
self.metrics.inc_evictions(len(value.value))
self.metrics.inc_evictions(EvictionReason.size, len(value.value))
else:
self.metrics.inc_evictions()
self.metrics.inc_evictions(EvictionReason.size)

def __getitem__(self, key: KT) -> VT:
try:
Expand Down Expand Up @@ -175,9 +175,9 @@ def _prune_cache(self) -> None:
for k in keys_to_delete:
value = self._cache.pop(k)
if self.iterable:
self.metrics.inc_evictions(len(value.value))
self.metrics.inc_evictions(EvictionReason.time, len(value.value))
else:
self.metrics.inc_evictions()
self.metrics.inc_evictions(EvictionReason.time)

logger.debug(
"[%s] _prune_cache before: %d, after len: %d",
Expand Down
4 changes: 2 additions & 2 deletions synapse/util/caches/lrucache.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from synapse.config import cache as cache_config
from synapse.metrics.background_process_metrics import wrap_as_background_process
from synapse.util import Clock, caches
from synapse.util.caches import CacheMetric, register_cache
from synapse.util.caches import CacheMetric, EvictionReason, register_cache
from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
from synapse.util.linked_list import ListNode

Expand Down Expand Up @@ -403,7 +403,7 @@ def evict() -> None:
evicted_len = delete_node(node)
cache.pop(node.key, None)
if metrics:
metrics.inc_evictions(evicted_len)
metrics.inc_evictions(EvictionReason.size, evicted_len)

def synchronized(f: FT) -> FT:
@wraps(f)
Expand Down