diff --git a/velox/common/base/Counters.cpp b/velox/common/base/Counters.cpp index f47584dd7202..0564a4262c66 100644 --- a/velox/common/base/Counters.cpp +++ b/velox/common/base/Counters.cpp @@ -86,6 +86,23 @@ void registerVeloxMetrics() { DEFINE_METRIC( kMetricArbitratorRequestsCount, facebook::velox::StatType::COUNT); + // The number of arbitration that reclaims the used memory from the query + // which initiates the memory arbitration request itself. It ensures the + // memory arbitration request won't exceed its per-query memory capacity + // limit. + DEFINE_METRIC( + kMetricArbitratorLocalArbitrationCount, facebook::velox::StatType::COUNT); + + // The number of arbitration which ensures the total allocated query capacity + // won't exceed the arbitrator capacity limit. It may or may not reclaim + // memory from the query which initiate the memory arbitration request. This + // indicates the velox runtime doesn't have enough memory to run all the + // queries at their peak memory usage. We have to trigger spilling to let them + // run through completion. + DEFINE_METRIC( + kMetricArbitratorGlobalArbitrationCount, + facebook::velox::StatType::COUNT); + // The number of times a query level memory pool is aborted as a result of a // memory arbitration process. The memory pool aborted will eventually result // in a cancelling the original query. diff --git a/velox/common/base/Counters.h b/velox/common/base/Counters.h index 4625c250d11c..41be72ae9bfb 100644 --- a/velox/common/base/Counters.h +++ b/velox/common/base/Counters.h @@ -64,6 +64,12 @@ constexpr folly::StringPiece kMetricMemoryPoolReservationLeakBytes{ constexpr folly::StringPiece kMetricArbitratorRequestsCount{ "velox.arbitrator_requests_count"}; +constexpr folly::StringPiece kMetricArbitratorLocalArbitrationCount{ + "velox.arbitrator_local_arbitration_count"}; + +constexpr folly::StringPiece kMetricArbitratorGlobalArbitrationCount{ + "velox.arbitrator_global_arbitration_count"}; + constexpr folly::StringPiece kMetricArbitratorAbortedCount{ "velox.arbitrator_aborted_count"}; diff --git a/velox/common/memory/SharedArbitrator.cpp b/velox/common/memory/SharedArbitrator.cpp index 4e05c2b06cad..d96784127ddd 100644 --- a/velox/common/memory/SharedArbitrator.cpp +++ b/velox/common/memory/SharedArbitrator.cpp @@ -432,6 +432,7 @@ bool SharedArbitrator::arbitrateMemory( } VELOX_CHECK_LT(freedBytes, growTarget); + RECORD_METRIC_VALUE(kMetricArbitratorGlobalArbitrationCount); freedBytes += reclaimUsedMemoryFromCandidatesBySpill( requestor, candidates, growTarget - freedBytes); if (requestor->aborted()) { @@ -547,6 +548,7 @@ uint64_t SharedArbitrator::reclaim( try { freedBytes = pool->shrink(targetBytes); if (freedBytes < targetBytes) { + RECORD_METRIC_VALUE(kMetricArbitratorLocalArbitrationCount); pool->reclaim( targetBytes - freedBytes, memoryReclaimWaitMs_, reclaimerStats); } diff --git a/velox/docs/monitoring/metrics.rst b/velox/docs/monitoring/metrics.rst index d206d725d4e8..f66b3e0be673 100644 --- a/velox/docs/monitoring/metrics.rst +++ b/velox/docs/monitoring/metrics.rst @@ -117,6 +117,18 @@ Memory Management - Count - The number of times a memory arbitration request was initiated by a memory pool attempting to grow its capacity. + * - arbitrator_local_arbitration_count + - Count + - The number of arbitration that reclaims the used memory from the query which initiates + the memory arbitration request itself. It ensures the memory arbitration request won't + exceed its per-query memory capacity limit. + * - arbitrator_global_arbitration_count + - Count + - The number of arbitration which ensures the total allocated query capacity won't exceed + the arbitrator capacity limit. It may or may not reclaim memory from the query which + initiate the memory arbitration request. This indicates the velox runtime doesn't have + enough memory to run all the queries at their peak memory usage. We have to trigger + spilling to let them run through completion. * - arbitrator_aborted_count - Count - The number of times a query level memory pool is aborted as a result of