Skip to content

Commit

Permalink
[Metrics] Fix the incorrect object store size from dashboard vs metrics
Browse files Browse the repository at this point in the history
Signed-off-by: SangBin Cho <rkooo567@gmail.com>
Signed-off-by: elliottower <elliot@elliottower.com>
  • Loading branch information
rkooo567 authored and elliottower committed Apr 22, 2023
1 parent f5415d1 commit 6bb9199
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 3 deletions.
6 changes: 3 additions & 3 deletions dashboard/modules/metrics/grafana_dashboard_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,14 @@ def max_plus_pending(max_resource, pending_resource):
id=29,
title="Object Store Memory",
description="Object store memory usage by location. The dotted line indicates the object store memory capacity.\n\nLocation: where the memory was allocated, which is MMAP_SHM or MMAP_DISK to indicate memory-mapped page, SPILLED to indicate spillage to disk, and WORKER_HEAP for objects small enough to be inlined in worker memory. Refer to metric_defs.cc for more information.",
unit="gbytes",
unit="bytes",
targets=[
Target(
expr="sum(ray_object_store_memory{{{global_filters}}} / 1e9) by (Location)",
expr="sum(ray_object_store_memory{{{global_filters}}}) by (Location)",
legend="{{Location}}",
),
Target(
expr='sum(ray_resources{{Name="object_store_memory",{global_filters}}} / 1e9)',
expr='sum(ray_resources{{Name="object_store_memory",{global_filters}}})',
legend="MAX",
),
],
Expand Down
32 changes: 32 additions & 0 deletions python/ray/tests/test_object_store_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
import sys

import requests
import ray
from ray._private.test_utils import (
raw_metrics,
Expand Down Expand Up @@ -352,6 +353,37 @@ def test_seal_memory(shutdown_only):
)


def test_object_store_memory_matches_dashboard_obj_memory(shutdown_only):
# https://github.com/ray-project/ray/issues/32092
# Verify the dashboard's object store memory report is same as
# the one from metrics
ctx = ray.init(
object_store_memory=500 * MiB,
)

def verify():
resources = raw_metrics(ctx)["ray_resources"]
object_store_memory_bytes_from_metrics = 0
for sample in resources:
# print(sample)
if sample.labels["Name"] == "object_store_memory":
object_store_memory_bytes_from_metrics += sample.value

r = requests.get(f"http://{ctx.dashboard_url}/nodes?view=summary")
object_store_memory_bytes_from_dashboard = int(
r.json()["data"]["summary"][0]["raylet"]["objectStoreAvailableMemory"]
)

assert (
object_store_memory_bytes_from_dashboard
== object_store_memory_bytes_from_metrics
)
assert object_store_memory_bytes_from_dashboard == 500 * MiB
return True

wait_for_condition(verify)


if __name__ == "__main__":
import sys
import os
Expand Down

0 comments on commit 6bb9199

Please sign in to comment.