From 529a61ba79e8e14d30e6f73307184041d52ca9a4 Mon Sep 17 00:00:00 2001 From: Yi Cheng <74173148+iycheng@users.noreply.github.com> Date: Wed, 5 Jul 2023 11:08:02 -0700 Subject: [PATCH] [core] Add metrics for object size distribution in object store (#37005) ## Why are these changes needed? This PR add the metrics for the object size distribution to help the user understand how the objects are used in the script. ## Related issue number #36923 --- .../object_manager/plasma/stats_collector.cc | 10 +++++++++- src/ray/stats/metric_defs.cc | 19 +++++++++++++++++++ src/ray/stats/metric_defs.h | 1 + src/ray/stats/tag_defs.cc | 2 ++ src/ray/stats/tag_defs.h | 2 ++ 5 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/ray/object_manager/plasma/stats_collector.cc b/src/ray/object_manager/plasma/stats_collector.cc index b9ca600f46b3..e46468ce46e0 100644 --- a/src/ray/object_manager/plasma/stats_collector.cc +++ b/src/ray/object_manager/plasma/stats_collector.cc @@ -35,15 +35,23 @@ void ObjectStatsCollector::OnObjectCreated(const LocalObject &obj) { if (kSource == plasma::flatbuf::ObjectSource::CreatedByWorker) { num_objects_created_by_worker_++; num_bytes_created_by_worker_ += kObjectSize; + ray::stats::STATS_object_store_dist.Record( + kObjectSize, {{ray::stats::SourceKey, "CreatedByWorker"}}); } else if (kSource == plasma::flatbuf::ObjectSource::RestoredFromStorage) { num_objects_restored_++; num_bytes_restored_ += kObjectSize; + ray::stats::STATS_object_store_dist.Record( + kObjectSize, {{ray::stats::SourceKey, "RestoredFromStorage"}}); } else if (kSource == plasma::flatbuf::ObjectSource::ReceivedFromRemoteRaylet) { num_objects_received_++; num_bytes_received_ += kObjectSize; + ray::stats::STATS_object_store_dist.Record( + kObjectSize, {{ray::stats::SourceKey, "ReceivedFromRemoteRaylet"}}); } else if (kSource == plasma::flatbuf::ObjectSource::ErrorStoredByRaylet) { num_objects_errored_++; num_bytes_errored_ += kObjectSize; + ray::stats::STATS_object_store_dist.Record( + kObjectSize, {{ray::stats::SourceKey, "ErrorStoredByRaylet"}}); } RAY_CHECK(!obj.Sealed()); @@ -246,4 +254,4 @@ int64_t ObjectStatsCollector::GetNumObjectsUnsealed() const { return num_objects_unsealed_; } -} // namespace plasma \ No newline at end of file +} // namespace plasma diff --git a/src/ray/stats/metric_defs.cc b/src/ray/stats/metric_defs.cc index 93957e36699e..cbc158e6984c 100644 --- a/src/ray/stats/metric_defs.cc +++ b/src/ray/stats/metric_defs.cc @@ -98,6 +98,25 @@ DEFINE_stats( (), ray::stats::GAUGE); +double operator""_MB(unsigned long long int x) { + return static_cast(1024L * 1024L * x); +} + +DEFINE_stats(object_store_dist, + "The distribution of object size in bytes", + ("Source"), + ({32_MB, + 64_MB, + 128_MB, + 256_MB, + 512_MB, + 1024_MB, + 2048_MB, + 4096_MB, + 8192_MB, + 16384_MB}), + ray::stats::HISTOGRAM); + /// Placement group metrics from the GCS. DEFINE_stats(placement_groups, "Number of placement groups broken down by state.", diff --git a/src/ray/stats/metric_defs.h b/src/ray/stats/metric_defs.h index 4f33de6fd4c3..d6a6dc6ebfef 100644 --- a/src/ray/stats/metric_defs.h +++ b/src/ray/stats/metric_defs.h @@ -108,6 +108,7 @@ DECLARE_stats(gcs_task_manager_task_events_reported); /// Object Store DECLARE_stats(object_store_memory); +DECLARE_stats(object_store_dist); /// Placement Group DECLARE_stats(gcs_placement_group_creation_latency_ms); diff --git a/src/ray/stats/tag_defs.cc b/src/ray/stats/tag_defs.cc index 0753a3dd96d7..65a4c21c2cd1 100644 --- a/src/ray/stats/tag_defs.cc +++ b/src/ray/stats/tag_defs.cc @@ -47,5 +47,7 @@ const TagKeyType NameKey = TagKeyType::Register("Name"); const TagKeyType LocationKey = TagKeyType::Register("Location"); const TagKeyType ObjectStateKey = TagKeyType::Register("ObjectState"); + +const TagKeyType SourceKey = TagKeyType::Register("Source"); } // namespace stats } // namespace ray diff --git a/src/ray/stats/tag_defs.h b/src/ray/stats/tag_defs.h index 2f0edc228718..9abee69ca733 100644 --- a/src/ray/stats/tag_defs.h +++ b/src/ray/stats/tag_defs.h @@ -48,6 +48,8 @@ extern const TagKeyType SessionNameKey; extern const TagKeyType NameKey; +extern const TagKeyType SourceKey; + // Object store memory location tag constants extern const TagKeyType LocationKey; constexpr char kObjectLocMmapShm[] = "MMAP_SHM";