Stabilize criterion benchmark results

DataDog · Aug 6, 2024 · 78578fc · 78578fc
1 parent 737c1f2
commit 78578fc
Show file tree

Hide file tree

Showing 7 changed files with 74 additions and 34 deletions.
diff --git a/benchmark/run_benchmarks_ci.sh b/benchmark/run_benchmarks_ci.sh
@@ -22,7 +22,7 @@ pushd "${PROJECT_DIR}" > /dev/null
 
 # Run benchmarks
 message "Running benchmarks"
-cargo bench --workspace -- --sample-size=200
+cargo bench --workspace -- --warm-up-time 1 --measurement-time 5 --sample-size=250
 message "Finished running benchmarks"
 
 # Copy the benchmark results to the output directory

diff --git a/trace-normalization/benches/normalization_utils.rs b/trace-normalization/benches/normalization_utils.rs
@@ -1,12 +1,18 @@
 // Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/
 // SPDX-License-Identifier: Apache-2.0
 
+use criterion::Throughput::Elements;
 use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
 use datadog_trace_protobuf::pb;
-use std::collections::HashMap;
+use std::{collections::HashMap, time::Duration};
 
 fn normalize_service_bench(c: &mut Criterion) {
     let mut group = c.benchmark_group("normalization/normalize_service");
+    group.throughput(Elements(1));
+    // We only need to measure for a small time since the function is very fast
+    group.warm_up_time(Duration::from_secs(1));
+    group.measurement_time(Duration::from_secs(1));
+    group.sample_size(250);
     let cases = &[
             "",
             "test_ASCII",
@@ -30,7 +36,7 @@ fn normalize_service_bench(c: &mut Criterion) {
                 b.iter_batched_ref(
                     || case.to_owned(),
                     datadog_trace_normalization::normalize_utils::normalize_service,
-                    BatchSize::NumBatches(100000),
+                    BatchSize::SmallInput,
                 )
             },
         );
@@ -40,6 +46,11 @@ fn normalize_service_bench(c: &mut Criterion) {
 
 fn normalize_name_bench(c: &mut Criterion) {
     let mut group = c.benchmark_group("normalization/normalize_name");
+    group.throughput(Elements(1));
+    // We only need to measure for a small time since the function is very fast
+    group.warm_up_time(Duration::from_secs(1));
+    group.measurement_time(Duration::from_secs(1));
+    group.sample_size(250);
     let cases = &[
         "good",
         "bad-name",
@@ -53,7 +64,7 @@ fn normalize_name_bench(c: &mut Criterion) {
                 b.iter_batched_ref(
                     || case.to_owned(),
                     datadog_trace_normalization::normalize_utils::normalize_name,
-                    BatchSize::NumIterations(100000),
+                    BatchSize::SmallInput,
                 )
             },
         );
@@ -109,7 +120,7 @@ fn normalize_span_bench(c: &mut Criterion) {
         |b, case| {
             b.iter_batched_ref(
                 || case.to_owned(),
-                |s| datadog_trace_normalization::normalizer::normalize_trace(s),
+                |t| datadog_trace_normalization::normalizer::normalize_trace(t),
                 BatchSize::SmallInput,
             )
         },

diff --git a/trace-obfuscation/benches/benchmarks/credit_cards_bench.rs b/trace-obfuscation/benches/benchmarks/credit_cards_bench.rs
@@ -1,12 +1,19 @@
 // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
 // SPDX-License-Identifier: Apache-2.0
 
+use std::time::Duration;
+
 use criterion::Throughput::Elements;
 use criterion::{criterion_group, BenchmarkId, Criterion};
 use datadog_trace_obfuscation::credit_cards::is_card_number;
 
 pub fn is_card_number_bench(c: &mut Criterion) {
     let mut group = c.benchmark_group("credit_card");
+    group.throughput(Elements(1));
+    // We only need to measure for a small time since the function is very fast
+    group.warm_up_time(Duration::from_secs(1));
+    group.measurement_time(Duration::from_secs(1));
+    group.sample_size(250);
     let ccs = [
         "378282246310005",
         "  378282246310005",
@@ -17,7 +24,6 @@ pub fn is_card_number_bench(c: &mut Criterion) {
         "",
     ];
     for c in ccs.iter() {
-        group.throughput(Elements(1));
         group.bench_with_input(BenchmarkId::new("is_card_number", c), c, |b, i| {
             b.iter(|| is_card_number(i, true))
         });
@@ -26,6 +32,11 @@ pub fn is_card_number_bench(c: &mut Criterion) {
 
 fn is_card_number_no_luhn_bench(c: &mut Criterion) {
     let mut group = c.benchmark_group("credit_card");
+    group.throughput(Elements(1));
+    // We only need to measure for a small time since the function is very fast
+    group.warm_up_time(Duration::from_secs(1));
+    group.measurement_time(Duration::from_secs(1));
+    group.sample_size(250);
     let ccs = [
         "378282246310005",
         "  378282246310005",
@@ -36,7 +47,6 @@ fn is_card_number_no_luhn_bench(c: &mut Criterion) {
         "",
     ];
     for c in ccs.iter() {
-        group.throughput(Elements(1));
         group.bench_with_input(BenchmarkId::new("is_card_number_no_luhn", c), c, |b, i| {
             b.iter(|| is_card_number(i, false))
         });

diff --git a/trace-obfuscation/benches/benchmarks/redis_obfuscation_bench.rs b/trace-obfuscation/benches/benchmarks/redis_obfuscation_bench.rs
@@ -96,11 +96,16 @@ SET k v
     ];
 
     group.bench_function("obfuscate_redis_string", |b| {
-        b.iter(|| {
-            for c in cases {
-                black_box(redis::obfuscate_redis_string(c));
-            }
-        })
+        b.iter_batched_ref(
+            // Keep the String instances around to avoid measuring the deallocation cost
+            || Vec::with_capacity(cases.len()) as Vec<String>,
+            |res: &mut Vec<String>| {
+                for c in cases {
+                    res.push(black_box(redis::obfuscate_redis_string(c)));
+                }
+            },
+            criterion::BatchSize::LargeInput,
+        )
     });
 }
 

diff --git a/trace-obfuscation/benches/benchmarks/replace_trace_tags_bench.rs b/trace-obfuscation/benches/benchmarks/replace_trace_tags_bench.rs
@@ -48,11 +48,13 @@ fn criterion_benchmark(c: &mut Criterion) {
         span_links: vec![],
     };
 
-    let mut trace = [span_1];
+    let trace = [span_1];
     group.bench_function("replace_trace_tags", |b| {
-        b.iter(|| {
-            replacer::replace_trace_tags(black_box(&mut trace), black_box(rules));
-        })
+        b.iter_batched_ref(
+            || trace.to_owned(),
+            |t| replacer::replace_trace_tags(black_box(t), black_box(rules)),
+            criterion::BatchSize::LargeInput,
+        )
     });
 }
 

diff --git a/trace-obfuscation/benches/benchmarks/sql_obfuscation_bench.rs b/trace-obfuscation/benches/benchmarks/sql_obfuscation_bench.rs
@@ -7,11 +7,16 @@ use datadog_trace_obfuscation::sql::obfuscate_sql_string;
 fn sql_obfuscation(c: &mut Criterion) {
     let mut group = c.benchmark_group("sql");
     group.bench_function("obfuscate_sql_string", |b| {
-        b.iter(|| {
-            for (input, _) in CASES {
-                black_box(obfuscate_sql_string(input));
-            }
-        })
+        b.iter_batched_ref(
+            // Keep the String instances around to avoid measuring the deallocation cost
+            || Vec::with_capacity(CASES.len()) as Vec<String>,
+            |res: &mut Vec<String>| {
+                for (input, _) in CASES {
+                    res.push(black_box(obfuscate_sql_string(input)));
+                }
+            },
+            criterion::BatchSize::LargeInput,
+        )
     });
 }
 

diff --git a/trace-utils/benches/deserialization.rs b/trace-utils/benches/deserialization.rs
@@ -1,7 +1,7 @@
 // Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/
 // SPDX-License-Identifier: Apache-2.0
 
-use criterion::{criterion_group, Criterion};
+use criterion::{black_box, criterion_group, Criterion};
 use datadog_trace_utils::tracer_header_tags::TracerHeaderTags;
 use datadog_trace_utils::tracer_payload::{
     DefaultTraceChunkProcessor, TraceEncoding, TracerPayloadCollection, TracerPayloadParams,
@@ -44,18 +44,25 @@ pub fn deserialize_msgpack_to_internal(c: &mut Criterion) {
     c.bench_function(
         "benching deserializing traces from msgpack to their internal representation ",
         |b| {
-            b.iter(|| {
-                let result: anyhow::Result<TracerPayloadCollection> = TracerPayloadParams::new(
-                    &data,
-                    tracer_header_tags,
-                    &mut DefaultTraceChunkProcessor,
-                    false,
-                    TraceEncoding::V04,
-                )
-                .try_into();
-
-                assert!(result.is_ok())
-            })
+            b.iter_batched(
+                || &data,
+                |data| {
+                    let result: anyhow::Result<TracerPayloadCollection> = black_box(
+                        TracerPayloadParams::new(
+                            data,
+                            tracer_header_tags,
+                            &mut DefaultTraceChunkProcessor,
+                            false,
+                            TraceEncoding::V04,
+                        )
+                        .try_into(),
+                    );
+                    assert!(result.is_ok());
+                    // Return the result to avoid measuring the deallocation time
+                    result
+                },
+                criterion::BatchSize::LargeInput,
+            );
         },
     );
 }