Skip to content

Commit

Permalink
Add metrics for spilling (facebookincubator#8025)
Browse files Browse the repository at this point in the history
Summary:
Add the following global metrics:
velox.spill_input_bytes
velox.spill_bytes
velox.spill__rows_count
velox.spill_files_count
velox.spill_fill_time_ms
velox.spill_sort_time_ms
velox.spill_serialization_time_ms
velox.spill_disk_writes_count
velox.spill_flush_time_ms
velox.spill_write_time_ms

Pull Request resolved: facebookincubator#8025

Reviewed By: xiaoxmeng

Differential Revision: D52172296

Pulled By: bikramSingh91

fbshipit-source-id: bfbb67f6a0ab05c51ba9f47db2f0b5410e65a9cc
  • Loading branch information
bikramSingh91 authored and facebook-github-bot committed Dec 15, 2023
1 parent 2106478 commit 4466c87
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 7 deletions.
61 changes: 55 additions & 6 deletions velox/common/base/Counters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,6 @@ void registerVeloxMetrics() {
DEFINE_HISTOGRAM_METRIC(
kMetricCacheShrinkTimeMs, 10, 0, 100'000, 50, 90, 99, 100);

// Tracks the number of times that we hit the max spill level limit.
DEFINE_METRIC(
kMetricMaxSpillLevelExceededCount, facebook::velox::StatType::COUNT);

/// ================== Memory Arbitration Counters =================

// Tracks memory reclaim exec time in range of [0, 600s] with 20 buckets and
Expand Down Expand Up @@ -95,9 +91,62 @@ void registerVeloxMetrics() {
DEFINE_HISTOGRAM_METRIC(
kMetricArbitratorArbitrationTimeMs, 20, 0, 600'000, 50, 90, 99, 100);

/// Tracks the average of free memory capacity managed by the arbitrator in
/// bytes.
// Tracks the average of free memory capacity managed by the arbitrator in
// bytes.
DEFINE_METRIC(
kMetricArbitratorFreeCapacityBytes, facebook::velox::StatType::AVG);

/// ================== Spill related Counters =================

// The number of bytes in memory to spill.
DEFINE_METRIC(kMetricSpilledInputBytes, facebook::velox::StatType::SUM);

// The number of bytes spilled to disk which can be number of compressed
// bytes if compression is enabled.
DEFINE_METRIC(kMetricSpilledBytes, facebook::velox::StatType::SUM);

// The number of spilled rows.
DEFINE_METRIC(kMetricSpilledRowsCount, facebook::velox::StatType::COUNT);

// The number of spilled files.
DEFINE_METRIC(kMetricSpilledFilesCount, facebook::velox::StatType::COUNT);

// The distribution of the amount of time spent on filling rows for spilling.
// in range of [0, 600s] with 20 buckets. It is configured to report the
// latency at P50, P90, P99, and P100 percentiles.
DEFINE_HISTOGRAM_METRIC(
kMetricSpillFillTimeMs, 20, 0, 600'000, 50, 90, 99, 100);

// The distribution of the amount of time spent on sorting rows for spilling
// in range of [0, 600s] with 20 buckets. It is configured to report the
// latency at P50, P90, P99, and P100 percentiles.
DEFINE_HISTOGRAM_METRIC(
kMetricSpillSortTimeMs, 20, 0, 600'000, 50, 90, 99, 100);

// The distribution of the amount of time spent on serializing rows for
// spilling in range of [0, 600s] with 20 buckets. It is configured to report
// the latency at P50, P90, P99, and P100 percentiles.
DEFINE_HISTOGRAM_METRIC(
kMetricSpillSerializationTimeMs, 20, 0, 600'000, 50, 90, 99, 100);

// The number of disk writes to spill rows.
DEFINE_METRIC(kMetricSpillDiskWritesCount, facebook::velox::StatType::COUNT);

// The distribution of the amount of time spent on copy out serialized
// rows for disk write in range of [0, 600s] with 20 buckets. It is configured
// to report the latency at P50, P90, P99, and P100 percentiles. Note: If
// compression is enabled, this includes the compression time.
DEFINE_HISTOGRAM_METRIC(
kMetricSpillFlushTimeMs, 20, 0, 600'000, 50, 90, 99, 100);

// The distribution of the amount of time spent on writing spilled rows to
// disk in range of [0, 600s] with 20 buckets. It is configured to report the
// latency at P50, P90, P99, and P100 percentiles.
DEFINE_HISTOGRAM_METRIC(
kMetricSpillWriteTimeMs, 20, 0, 600'000, 50, 90, 99, 100);

// Tracks the number of times that we hit the max spill level limit.
DEFINE_METRIC(
kMetricMaxSpillLevelExceededCount, facebook::velox::StatType::COUNT);
}
} // namespace facebook::velox
26 changes: 26 additions & 0 deletions velox/common/base/Counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,30 @@ constexpr folly::StringPiece kMetricArbitratorArbitrationTimeMs{

constexpr folly::StringPiece kMetricArbitratorFreeCapacityBytes{
"velox.arbitrator_free_capacity_bytes"};

constexpr folly::StringPiece kMetricSpilledInputBytes{
"velox.spill_input_bytes"};

constexpr folly::StringPiece kMetricSpilledBytes{"velox.spill_bytes"};

constexpr folly::StringPiece kMetricSpilledRowsCount{"velox.spill_rows_count"};

constexpr folly::StringPiece kMetricSpilledFilesCount{
"velox.spill_files_count"};

constexpr folly::StringPiece kMetricSpillFillTimeMs{"velox.spill_fill_time_ms"};

constexpr folly::StringPiece kMetricSpillSortTimeMs{"velox.spill_sort_time_ms"};

constexpr folly::StringPiece kMetricSpillSerializationTimeMs{
"velox.spill_serialization_time_ms"};

constexpr folly::StringPiece kMetricSpillDiskWritesCount{
"velox.spill_disk_writes_count"};

constexpr folly::StringPiece kMetricSpillFlushTimeMs{
"velox.spill_flush_time_ms"};

constexpr folly::StringPiece kMetricSpillWriteTimeMs{
"velox.spill_write_time_ms"};
} // namespace facebook::velox
13 changes: 13 additions & 0 deletions velox/common/base/SpillStats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
*/

#include "velox/common/base/SpillStats.h"
#include "velox/common/base/Counters.h"
#include "velox/common/base/Exceptions.h"
#include "velox/common/base/StatsReporter.h"
#include "velox/common/base/SuccinctPrinter.h"

namespace facebook::velox::common {
Expand Down Expand Up @@ -220,6 +222,9 @@ void updateGlobalSpillRunStats(uint64_t numRuns) {
void updateGlobalSpillAppendStats(
uint64_t numRows,
uint64_t serializationTimeUs) {
RECORD_METRIC_VALUE(kMetricSpilledRowsCount, numRows);
RECORD_HISTOGRAM_METRIC_VALUE(
kMetricSpillSerializationTimeMs, serializationTimeUs / 1'000);
auto statsLocked = localSpillStats().wlock();
statsLocked->spilledRows += numRows;
statsLocked->spillSerializationTimeUs += serializationTimeUs;
Expand All @@ -230,17 +235,23 @@ void incrementGlobalSpilledPartitionStats() {
}

void updateGlobalSpillFillTime(uint64_t timeUs) {
RECORD_HISTOGRAM_METRIC_VALUE(kMetricSpillFillTimeMs, timeUs / 1'000);
localSpillStats().wlock()->spillFillTimeUs += timeUs;
}

void updateGlobalSpillSortTime(uint64_t timeUs) {
RECORD_HISTOGRAM_METRIC_VALUE(kMetricSpillSortTimeMs, timeUs / 1'000);
localSpillStats().wlock()->spillSortTimeUs += timeUs;
}

void updateGlobalSpillWriteStats(
uint64_t spilledBytes,
uint64_t flushTimeUs,
uint64_t writeTimeUs) {
RECORD_METRIC_VALUE(kMetricSpillDiskWritesCount);
RECORD_METRIC_VALUE(kMetricSpilledBytes, spilledBytes);
RECORD_HISTOGRAM_METRIC_VALUE(kMetricSpillFlushTimeMs, flushTimeUs / 1'000);
RECORD_HISTOGRAM_METRIC_VALUE(kMetricSpillWriteTimeMs, writeTimeUs / 1'000);
auto statsLocked = localSpillStats().wlock();
++statsLocked->spillDiskWrites;
statsLocked->spilledBytes += spilledBytes;
Expand All @@ -249,11 +260,13 @@ void updateGlobalSpillWriteStats(
}

void updateGlobalSpillMemoryBytes(uint64_t spilledInputBytes) {
RECORD_METRIC_VALUE(kMetricSpilledInputBytes, spilledInputBytes);
auto statsLocked = localSpillStats().wlock();
statsLocked->spilledInputBytes += spilledInputBytes;
}

void incrementGlobalSpilledFiles() {
RECORD_METRIC_VALUE(kMetricSpilledFilesCount);
++localSpillStats().wlock()->spilledFiles;
}

Expand Down
44 changes: 43 additions & 1 deletion velox/docs/metrics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,50 @@ Spilling
- Description
* - spill_max_level_exceeded_count
- Count
- The number of times that a spillable operator hits the max spill level
- The number of times that a spill-able operator hits the max spill level
limit.
* - spill_input_bytes
- Sum
- The number of bytes in memory to spill.
* - spill_bytes
- Sum
- The number of bytes spilled to disk which can be the number of compressed
bytes if compression is enabled.
* - spill_rows_count
- Count
- The number of spilled rows.
* - spill_files_count
- Count
- The number of spilled files.
* - spill_fill_time_ms
- Histogram
- The distribution of the amount of time spent on filling rows for spilling
in range of [0, 600s] with 20 buckets. It is configured to report the
latency at P50, P90, P99, and P100 percentiles.
* - spill_sort_time_ms
- Histogram
- The distribution of the amount of time spent on sorting rows for spilling
in range of [0, 600s] with 20 buckets. It is configured to report the
latency at P50, P90, P99, and P100 percentiles.
* - spill_serialization_time_ms
- Histogram
- The distribution of the amount of time spent on serializing rows for
spilling in range of [0, 600s] with 20 buckets. It is configured to report
the latency at P50, P90, P99, and P100 percentiles.
* - spill_disk_writes_count
- Count
- The number of disk writes to spill rows.
* - spill_flush_time_ms
- Histogram
- The distribution of the amount of time spent on copy out serialized
rows for disk write in range of [0, 600s] with 20 buckets. It is configured
to report the latency at P50, P90, P99, and P100 percentiles. Note: If
compression is enabled, this includes the compression time.
* - spill_write_time_ms
- Histogram
- The distribution of the amount of time spent on writing spilled rows to
disk in range of [0, 600s] with 20 buckets. It is configured to report the
latency at P50, P90, P99, and P100 percentiles.

Hive Connector
--------------
Expand Down

0 comments on commit 4466c87

Please sign in to comment.