Skip to content

Commit

Permalink
Use inplace normalization and add benchmark (#506)
Browse files Browse the repository at this point in the history
* Use inplace normalization and add benchmark

* Fix early exit in ascci case

* Make unicode handling faster by checking lowercase class before computing the lowercasing

* Add benchmark to ci

* Add macro benchmark

---------

Co-authored-by: paullegranddc <82819397+paullegranddc@users.noreply.github.com>
  • Loading branch information
VianneyRuhlmann and paullegranddc authored Jul 5, 2024
1 parent 3142b2d commit 6f91123
Show file tree
Hide file tree
Showing 9 changed files with 545 additions and 373 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tools/docker/Dockerfile.build
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ COPY "data-pipeline/Cargo.toml" "data-pipeline/"
COPY "data-pipeline-ffi/Cargo.toml" "data-pipeline-ffi/"
COPY "bin_tests/Cargo.toml" "bin_tests/"
RUN find -name "Cargo.toml" | sed -e s#Cargo.toml#src/lib.rs#g | xargs -n 1 sh -c 'mkdir -p $(dirname $1); touch $1; echo $1' create_stubs
RUN echo profiling/benches/main.rs profiling/benches/interning_strings.rs trace-obfuscation/benches/trace_obfuscation.rs tools/src/bin/dedup_headers.rs tools/sidecar_mockgen/src/bin/sidecar_mockgen.rs ddtelemetry/examples/tm-worker-test.rs ipc/tarpc/tarpc/examples/compression.rs ipc/tarpc/tarpc/examples/custom_transport.rs ipc/tarpc/tarpc/examples/pubsub.rs ipc/tarpc/tarpc/examples/readme.rs ipc/tarpc/tarpc/examples/tracing.rs ipc/tarpc/tarpc/tests/compile_fail.rs ipc/tarpc/tarpc/tests/dataservice.rs ipc/tarpc/tarpc/tests/service_functional.rs bin_tests/src/bin/crashtracker_bin_test.rs bin_tests/src/bin/test_the_tests.rs | xargs -n 1 sh -c 'mkdir -p $(dirname $1); touch $1; echo $1' create_stubs
RUN echo trace-normalization/benches/normalization_utils.rs profiling/benches/main.rs profiling/benches/interning_strings.rs trace-obfuscation/benches/trace_obfuscation.rs tools/src/bin/dedup_headers.rs tools/sidecar_mockgen/src/bin/sidecar_mockgen.rs ddtelemetry/examples/tm-worker-test.rs ipc/benches/ipc.rs ipc/tarpc/tarpc/examples/compression.rs ipc/tarpc/tarpc/examples/custom_transport.rs ipc/tarpc/tarpc/examples/pubsub.rs ipc/tarpc/tarpc/examples/readme.rs ipc/tarpc/tarpc/examples/tracing.rs ipc/tarpc/tarpc/tests/compile_fail.rs ipc/tarpc/tarpc/tests/dataservice.rs ipc/tarpc/tarpc/tests/service_functional.rs bin_tests/src/bin/crashtracker_bin_test.rs bin_tests/src/bin/test_the_tests.rs | xargs -n 1 sh -c 'mkdir -p $(dirname $1); touch $1; echo $1' create_stubs

# cache dependencies
RUN cargo fetch --locked
Expand Down
5 changes: 5 additions & 0 deletions trace-normalization/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,8 @@ datadog-trace-protobuf = { path = "../trace-protobuf" }
[dev-dependencies]
rand = "0.8.5"
duplicate = "0.4.1"
criterion = "0.5"

[[bench]]
name = "normalization_utils"
harness = false
125 changes: 125 additions & 0 deletions trace-normalization/benches/normalization_utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0

use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
use datadog_trace_protobuf::pb;
use std::collections::HashMap;

fn normalize_service_bench(c: &mut Criterion) {
let mut group = c.benchmark_group("normalization/normalize_service");
let cases = &[
"",
"test_ASCII",
"Test Conversion 0f Weird !@#$%^&**() Characters",
"Data🐨dog🐶 繋がっ⛰てて",
"A00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 000000000000",
];

for case in cases {
group.bench_with_input(
BenchmarkId::new(
"normalize_service",
if case.is_empty() {
"[empty string]"
} else {
case
},
),
*case,
|b, case| {
b.iter_batched_ref(
|| case.to_owned(),
datadog_trace_normalization::normalize_utils::normalize_service,
BatchSize::NumBatches(100000),
)
},
);
}
group.finish()
}

fn normalize_name_bench(c: &mut Criterion) {
let mut group = c.benchmark_group("normalization/normalize_name");
let cases = &[
"good",
"bad-name",
"Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.",
];
for case in cases {
group.bench_with_input(
BenchmarkId::new("normalize_name", case),
*case,
|b, case| {
b.iter_batched_ref(
|| case.to_owned(),
datadog_trace_normalization::normalize_utils::normalize_name,
BatchSize::NumIterations(100000),
)
},
);
}
group.finish()
}

fn normalize_span_bench(c: &mut Criterion) {
let trace = [
pb::Span {
duration: 10000000,
error: 0,
resource: "GET /some/raclette".to_string(),
service: "django".to_string(),
name: "django.controller".to_string(),
span_id: 1388,
start: 1448466874000000000,
trace_id: 424242,
meta: HashMap::from([
("user".to_string(), "leo".to_string()),
("pool".to_string(), "fondue".to_string()),
]),
metrics: HashMap::from([("cheese_weight".to_string(), 100000.0)]),
parent_id: 1111,
r#type: "http".to_string(),
meta_struct: HashMap::new(),
span_links: vec![],
},
pb::Span {
duration: 12000000,
error: 1,
resource: "GET /some/reblochon".to_string(),
service: "".to_string(),
name: "django.controller".to_string(),
span_id: 1456,
start: 1448466849000000000,
trace_id: 424242,
meta: HashMap::from([
("user".to_string(), "leo".to_string()),
("pool".to_string(), "tartiflette".to_string()),
]),
metrics: HashMap::from([("cheese_weight".to_string(), 100000.0)]),
parent_id: 1123,
r#type: "http".to_string(),
meta_struct: HashMap::new(),
span_links: vec![],
},
];

c.bench_with_input(
BenchmarkId::new("normalization/normalize_trace", "test_trace"),
&trace,
|b, case| {
b.iter_batched_ref(
|| case.to_owned(),
|s| datadog_trace_normalization::normalizer::normalize_trace(s),
BatchSize::SmallInput,
)
},
);
}

criterion_group!(
benches,
normalize_service_bench,
normalize_name_bench,
normalize_span_bench
);
criterion_main!(benches);
4 changes: 2 additions & 2 deletions trace-normalization/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@

#![deny(clippy::all)]

pub mod normalizer;

pub mod normalize_utils;
pub mod normalizer;
pub(crate) mod utf8_helpers;
Loading

0 comments on commit 6f91123

Please sign in to comment.