diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 6b384de98..54a6fd350 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -11,3 +11,5 @@ NOTICE @Datadog/libdatadog rustfmt.toml @Datadog/libdatadog README.md @Datadog/libdatadog trace-normalization @Datadog/serverless +trace-obfuscation @Datadog/serverless +trace-protobuf @Datadog/serverless diff --git a/Cargo.lock b/Cargo.lock index e9bffa2c8..e58ae9cd7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -20,6 +20,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anyhow" version = "1.0.68" @@ -134,6 +140,33 @@ dependencies = [ "winapi", ] +[[package]] +name = "ciborium" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c137568cc60b904a7724001b35ce2630fd00d5d84805fbb608ab89509d788f" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "346de753af073cc87b52b2083a506b38ac176a44cfb05497b622e27be899b369" + +[[package]] +name = "ciborium-ll" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213030a2b5a4e0c0892b6652260cf6ccac84827b83a85a534e178e3906c4cf1b" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "clap" version = "2.34.0" @@ -141,10 +174,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ "bitflags", - "textwrap", + "textwrap 0.11.0", "unicode-width", ] +[[package]] +name = "clap" +version = "3.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" +dependencies = [ + "bitflags", + "clap_lex", + "indexmap", + "textwrap 0.16.0", +] + +[[package]] +name = "clap_lex" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +dependencies = [ + "os_str_bytes", +] + [[package]] name = "codespan-reporting" version = "0.11.1" @@ -195,8 +249,8 @@ checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" dependencies = [ "atty", "cast", - "clap", - "criterion-plot", + "clap 2.34.0", + "criterion-plot 0.4.5", "csv", "itertools", "lazy_static", @@ -213,6 +267,32 @@ dependencies = [ "walkdir", ] +[[package]] +name = "criterion" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" +dependencies = [ + "anes", + "atty", + "cast", + "ciborium", + "clap 3.2.23", + "criterion-plot 0.5.0", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + [[package]] name = "criterion-plot" version = "0.4.5" @@ -223,6 +303,16 @@ dependencies = [ "itertools", ] +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + [[package]] name = "crossbeam-channel" version = "0.5.6" @@ -391,11 +481,29 @@ name = "datadog-trace-normalization" version = "2.0.0" dependencies = [ "anyhow", + "datadog-trace-protobuf", "duplicate", - "prost", "rand", ] +[[package]] +name = "datadog-trace-obfuscation" +version = "2.0.0" +dependencies = [ + "anyhow", + "criterion 0.4.0", + "datadog-trace-protobuf", + "duplicate", + "regex", +] + +[[package]] +name = "datadog-trace-protobuf" +version = "2.0.0" +dependencies = [ + "prost", +] + [[package]] name = "ddcommon" version = "2.0.0" @@ -434,7 +542,7 @@ version = "2.0.0" dependencies = [ "anyhow", "bytes", - "criterion", + "criterion 0.3.6", "ddcommon", "futures", "http", @@ -1117,6 +1225,12 @@ dependencies = [ "thiserror", ] +[[package]] +name = "os_str_bytes" +version = "6.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" + [[package]] name = "output_vt100" version = "0.1.3" @@ -1771,6 +1885,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "textwrap" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" + [[package]] name = "thiserror" version = "1.0.38" diff --git a/Cargo.toml b/Cargo.toml index eb73497c1..d67c9e2b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ members = [ "tools", "tools/cc_utils", "trace-normalization", + "trace-obfuscation", "spawn_worker", "tests/spawn_from_lib" ] diff --git a/LICENSE-3rdparty.yml b/LICENSE-3rdparty.yml index ee22dc06e..c8de9c8f6 100644 --- a/LICENSE-3rdparty.yml +++ b/LICENSE-3rdparty.yml @@ -1,5 +1,5 @@ --- -root_name: "datadog-profiling, ddcommon, datadog-profiling-ffi, ddcommon-ffi, ddtelemetry, ddtelemetry-ffi, tools, cc_utils, datadog-trace-normalization, spawn_worker" +root_name: "datadog-profiling, ddcommon, datadog-profiling-ffi, ddcommon-ffi, ddtelemetry, ddtelemetry-ffi, tools, cc_utils, datadog-trace-normalization, datadog-trace-protobuf, datadog-trace-obfuscation, spawn_worker" third_party_libraries: - package_name: aho-corasick package_version: 0.7.20 diff --git a/tools/docker/Dockerfile.build b/tools/docker/Dockerfile.build index 9fb396e89..693d9ce43 100644 --- a/tools/docker/Dockerfile.build +++ b/tools/docker/Dockerfile.build @@ -78,12 +78,14 @@ COPY "ddtelemetry-ffi/Cargo.toml" "ddtelemetry-ffi/" COPY "profiling/Cargo.toml" "profiling/" COPY "profiling-ffi/Cargo.toml" "profiling-ffi/" COPY "trace-normalization/Cargo.toml" "trace-normalization/" +COPY "trace-obfuscation/Cargo.toml" "trace-obfuscation/" +COPY "trace-protobuf/Cargo.toml" "trace-protobuf/" COPY "tools/Cargo.toml" "tools/" COPY "tools/cc_utils/Cargo.toml" "tools/cc_utils/" COPY "spawn_worker/Cargo.toml" "spawn_worker/" COPY "tests/spawn_from_lib/Cargo.toml" "tests/spawn_from_lib/" RUN find -name "Cargo.toml" | sed -e s#Cargo.toml#src/lib.rs#g | xargs -n 1 sh -c 'mkdir -p $(dirname $1); touch $1; echo $1' create_stubs -RUN echo ddtelemetry/benches/ipc.rs tools/src/bin/dedup_headers.rs ddtelemetry/examples/tm-worker-test.rs | xargs -n 1 sh -c 'mkdir -p $(dirname $1); touch $1; echo $1' create_stubs +RUN echo trace-obfuscation/benches/replace_trace_tags_bench.rs ddtelemetry/benches/ipc.rs tools/src/bin/dedup_headers.rs ddtelemetry/examples/tm-worker-test.rs | xargs -n 1 sh -c 'mkdir -p $(dirname $1); touch $1; echo $1' create_stubs # cache dependencies RUN cargo fetch --locked diff --git a/trace-normalization/Cargo.toml b/trace-normalization/Cargo.toml index a0468c533..e5a7abdba 100644 --- a/trace-normalization/Cargo.toml +++ b/trace-normalization/Cargo.toml @@ -5,8 +5,8 @@ authors = ["David Lee "] edition = "2021" [dependencies] -prost = "0.11.6" anyhow = "1.0" +datadog-trace-protobuf = { path = "../trace-protobuf" } [dev-dependencies] rand = "0.8.5" diff --git a/trace-normalization/src/lib.rs b/trace-normalization/src/lib.rs index 93e5a8825..cda2a44ed 100644 --- a/trace-normalization/src/lib.rs +++ b/trace-normalization/src/lib.rs @@ -5,10 +5,6 @@ #![deny(clippy::all)] -pub mod pb { - include!("./pb/pb.rs"); -} - pub mod normalizer; pub mod normalize_utils; diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs index 492f211cd..3152a36ea 100644 --- a/trace-normalization/src/normalizer.rs +++ b/trace-normalization/src/normalizer.rs @@ -4,7 +4,7 @@ // Datadog, Inc. use crate::normalize_utils; -use crate::pb; +use datadog_trace_protobuf::pb; use std::time::SystemTime; const MAX_TYPE_LEN: usize = 100; @@ -177,7 +177,7 @@ mod tests { use crate::normalize_utils; use crate::normalizer; use crate::normalizer::DEFAULT_SPAN_NAME; - use crate::pb; + use datadog_trace_protobuf::pb; use rand::Rng; use std::collections::HashMap; use std::time::SystemTime; diff --git a/trace-obfuscation/Cargo.toml b/trace-obfuscation/Cargo.toml new file mode 100644 index 000000000..86a072f5b --- /dev/null +++ b/trace-obfuscation/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "datadog-trace-obfuscation" +version = "2.0.0" +authors = ["David Lee "] +edition = "2021" + +[dependencies] +anyhow = "1.0" +regex = "1" +datadog-trace-protobuf = { path = "../trace-protobuf" } + +[dev-dependencies] +duplicate = "0.4.1" +criterion = "0.4" + +[[bench]] +name = "replace_trace_tags_bench" +harness = false \ No newline at end of file diff --git a/trace-obfuscation/benches/replace_trace_tags_bench.rs b/trace-obfuscation/benches/replace_trace_tags_bench.rs new file mode 100644 index 000000000..a6c3d8266 --- /dev/null +++ b/trace-obfuscation/benches/replace_trace_tags_bench.rs @@ -0,0 +1,59 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present +// Datadog, Inc. + +use std::collections::HashMap; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datadog_trace_obfuscation::replacer; +use datadog_trace_protobuf::pb; + +fn criterion_benchmark(c: &mut Criterion) { + let rules: &[replacer::ReplaceRule] = &replacer::parse_rules_from_string(&[ + ["http.url", "(token/)([^/]*)", "${1}?"], + ["http.url", "guid", "[REDACTED]"], + ["*", "(token/)([^/]*)", "${1}?"], + ["*", "this", "that"], + ["custom.tag", "(/foo/bar/).*", "${1}extra"], + ["resource.name", "prod", "stage"], + ]) + .unwrap(); + + let span_1 = pb::Span { + duration: 10000000, + error: 0, + resource: "GET /some/raclette".to_string(), + service: "django".to_string(), + name: "django.controller".to_string(), + span_id: 123, + start: 1448466874000000000, + trace_id: 424242, + meta: HashMap::from([ + ("resource.name".to_string(), "this is prod".to_string()), + ( + "http.url".to_string(), + "some/[REDACTED]/token/abcdef/abc".to_string(), + ), + ( + "other.url".to_string(), + "some/guid/token/abcdef/abc".to_string(), + ), + ("custom.tag".to_string(), "/foo/bar/foo".to_string()), + ]), + metrics: HashMap::from([("cheese_weight".to_string(), 100000.0)]), + parent_id: 1111, + r#type: "http".to_string(), + meta_struct: HashMap::new(), + }; + + let mut trace = [span_1]; + c.bench_function("replace_trace_tags_bench", |b| { + b.iter(|| { + replacer::replace_trace_tags(black_box(&mut trace), black_box(rules)); + }) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/trace-obfuscation/src/lib.rs b/trace-obfuscation/src/lib.rs new file mode 100644 index 000000000..b07fd9888 --- /dev/null +++ b/trace-obfuscation/src/lib.rs @@ -0,0 +1,8 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present +// Datadog, Inc. + +#![deny(clippy::all)] + +pub mod replacer; diff --git a/trace-obfuscation/src/replacer.rs b/trace-obfuscation/src/replacer.rs new file mode 100644 index 000000000..f7b713580 --- /dev/null +++ b/trace-obfuscation/src/replacer.rs @@ -0,0 +1,185 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present +// Datadog, Inc. + +use datadog_trace_protobuf::pb; +use regex::Regex; + +#[derive(Debug)] +pub struct ReplaceRule<'a> { + // name specifies the name of the tag that the replace rule addresses. However, + // some exceptions apply such as: + // * "resource.name" will target the resource + // * "*" will target all tags and the resource + name: &'a str, + + // re holds the regex pattern for matching. + re: regex::Regex, + + // repl specifies the replacement string to be used when Pattern matches. + repl: &'a str, +} + +/// replace_trace_tags replaces the tag values of all spans within a trace with a given set of rules. +pub fn replace_trace_tags(trace: &mut [pb::Span], rules: &[ReplaceRule]) { + for rule in rules { + for span in trace.iter_mut() { + match rule.name { + "*" => { + for (_, val) in span.meta.iter_mut() { + *val = rule.re.replace_all(val, rule.repl).to_string(); + } + } + "resource.name" => { + span.resource = rule.re.replace_all(&span.resource, rule.repl).to_string(); + } + _ => { + if let Some(val) = span.meta.get_mut(rule.name) { + let replaced_tag = rule.re.replace_all(val, rule.repl).to_string(); + *val = replaced_tag; + } + } + } + } + } +} + +/// parse_rules_from_string takes an array of rules, represented as an array of length 3 arrays +/// holding the tag name, regex pattern, and replacement string as strings. +/// * returns a vec of ReplaceRules +pub fn parse_rules_from_string<'a>( + rules: &'a [[&'a str; 3]], +) -> anyhow::Result>> { + let mut vec: Vec = Vec::with_capacity(rules.len()); + + for [name, pattern, repl] in rules { + let compiled_regex = match Regex::new(pattern) { + Ok(res) => res, + Err(err) => { + anyhow::bail!("Obfuscator Error: Error while parsing rule: {}", err) + } + }; + vec.push(ReplaceRule { + name, + re: compiled_regex, + repl, + }); + } + Ok(vec) +} + +#[cfg(test)] +mod tests { + + use crate::replacer; + use datadog_trace_protobuf::pb; + use duplicate::duplicate_item; + use std::collections::HashMap; + + fn new_test_span_with_tags(tags: HashMap<&str, &str>) -> pb::Span { + let mut span = pb::Span { + duration: 10000000, + error: 0, + resource: "GET /some/raclette".to_string(), + service: "django".to_string(), + name: "django.controller".to_string(), + span_id: 123, + start: 1448466874000000000, + trace_id: 424242, + meta: HashMap::new(), + metrics: HashMap::from([("cheese_weight".to_string(), 100000.0)]), + parent_id: 1111, + r#type: "http".to_string(), + meta_struct: HashMap::new(), + }; + for (key, val) in tags { + match key { + "resource.name" => { + span.resource = val.to_string(); + } + _ => { + span.meta.insert(key.to_string(), val.to_string()); + } + } + } + span + } + + #[duplicate_item( + [ + test_name [test_replace_tags] + rules [&[ + ["http.url", "(token/)([^/]*)", "${1}?"], + ["http.url", "guid", "[REDACTED]"], + ["custom.tag", "(/foo/bar/).*", "${1}extra"], + ]] + input [ + HashMap::from([ + ("http.url", "some/guid/token/abcdef/abc"), + ("custom.tag", "/foo/bar/foo"), + ]) + ] + expected [ + HashMap::from([ + ("http.url", "some/[REDACTED]/token/?/abc"), + ("custom.tag", "/foo/bar/extra"), + ]) + ]; + ] + [ + test_name [test_replace_tags_with_exceptions] + rules [&[ + ["*", "(token/)([^/]*)", "${1}?"], + ["*", "this", "that"], + ["http.url", "guid", "[REDACTED]"], + ["custom.tag", "(/foo/bar/).*", "${1}extra"], + ["resource.name", "prod", "stage"], + ]] + input [ + HashMap::from([ + ("resource.name", "this is prod"), + ("http.url", "some/[REDACTED]/token/abcdef/abc"), + ("other.url", "some/guid/token/abcdef/abc"), + ("custom.tag", "/foo/bar/foo"), + ]) + ] + expected [ + HashMap::from([ + ("resource.name", "this is stage"), + ("http.url", "some/[REDACTED]/token/?/abc"), + ("other.url", "some/guid/token/?/abc"), + ("custom.tag", "/foo/bar/extra"), + ]) + ]; + ] + )] + #[test] + fn test_name() { + let parsed_rules = replacer::parse_rules_from_string(rules); + let root_span = new_test_span_with_tags(input); + let child_span = new_test_span_with_tags(input); + let mut trace = [root_span, child_span]; + + replacer::replace_trace_tags(&mut trace, &parsed_rules.unwrap()); + + for (key, val) in expected { + match key { + "resource.name" => { + assert_eq!(val, trace[0].resource); + assert_eq!(val, trace[1].resource); + } + _ => { + assert_eq!(val, trace[0].meta.get(key).unwrap()); + assert_eq!(val, trace[1].meta.get(key).unwrap()); + } + } + } + } + + #[test] + fn test_parse_rules_invalid_regex() { + let result = replacer::parse_rules_from_string(&[["http.url", ")", "${1}?"]]); + assert!(result.is_err()); + } +} diff --git a/trace-protobuf/Cargo.toml b/trace-protobuf/Cargo.toml new file mode 100644 index 000000000..da06e0076 --- /dev/null +++ b/trace-protobuf/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "datadog-trace-protobuf" +version = "2.0.0" +authors = ["David Lee "] +edition = "2021" + +[dependencies] +prost = "0.11.6" diff --git a/trace-protobuf/src/lib.rs b/trace-protobuf/src/lib.rs new file mode 100644 index 000000000..b07eceea6 --- /dev/null +++ b/trace-protobuf/src/lib.rs @@ -0,0 +1,10 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present +// Datadog, Inc. + +#![deny(clippy::all)] + +pub mod pb { + include!("./pb/pb.rs"); +} diff --git a/trace-normalization/src/pb/agent_payload.proto b/trace-protobuf/src/pb/agent_payload.proto similarity index 100% rename from trace-normalization/src/pb/agent_payload.proto rename to trace-protobuf/src/pb/agent_payload.proto diff --git a/trace-normalization/src/pb/descriptor.proto b/trace-protobuf/src/pb/descriptor.proto similarity index 100% rename from trace-normalization/src/pb/descriptor.proto rename to trace-protobuf/src/pb/descriptor.proto diff --git a/trace-normalization/src/pb/gogo.proto b/trace-protobuf/src/pb/gogo.proto similarity index 100% rename from trace-normalization/src/pb/gogo.proto rename to trace-protobuf/src/pb/gogo.proto diff --git a/trace-normalization/src/pb/pb.rs b/trace-protobuf/src/pb/pb.rs similarity index 100% rename from trace-normalization/src/pb/pb.rs rename to trace-protobuf/src/pb/pb.rs diff --git a/trace-normalization/src/pb/span.proto b/trace-protobuf/src/pb/span.proto similarity index 100% rename from trace-normalization/src/pb/span.proto rename to trace-protobuf/src/pb/span.proto diff --git a/trace-normalization/src/pb/tracer_payload.proto b/trace-protobuf/src/pb/tracer_payload.proto similarity index 100% rename from trace-normalization/src/pb/tracer_payload.proto rename to trace-protobuf/src/pb/tracer_payload.proto