-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Trace Tag Replacer: functionality to scrub sensitive data from spans #111
Changes from 11 commits
14eb5b2
9d8c7e5
1070666
62a1e8c
b9b3e75
32107cf
ca6daa9
530f3cf
b8bb46c
4495bfa
6f4cf14
7d00515
89c1874
e39da91
334410a
6bba281
fc84f24
e577cc9
e6e165c
4dcae5c
8627022
9ceccb0
c05cccf
0a28c71
baf1c55
a4de7ad
2166645
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
[package] | ||
name = "datadog-trace-obfuscation" | ||
version = "2.0.0" | ||
authors = ["David Lee <david.lee@datadoghq.com>"] | ||
edition = "2021" | ||
|
||
[dependencies] | ||
prost = "0.11.6" | ||
anyhow = "1.0" | ||
regex = "1" | ||
|
||
[dev-dependencies] | ||
duplicate = "0.4.1" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
// Unless explicitly stated otherwise all files in this repository are licensed | ||
// under the Apache License Version 2.0. This product includes software | ||
// developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present | ||
// Datadog, Inc. | ||
|
||
#![deny(clippy::all)] | ||
|
||
pub mod pb { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think with this setup, the protocol buffer definitions will be included in the binary multiple times. Also might mean the normalization and obfuscation crates are using different types. Might be best to either bundle them all into a single crate, or move this protocol buffer definitions into a single dedicated crate and reference that instead. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've moved all the protobuf files + definitions into it's own dir/crate in the libdatadog root |
||
include!("../../trace-normalization/src/pb/pb.rs"); | ||
pawelchcki marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
pub mod replacer; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,198 @@ | ||
// Unless explicitly stated otherwise all files in this repository are licensed | ||
// under the Apache License Version 2.0. This product includes software | ||
// developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present | ||
// Datadog, Inc. | ||
|
||
use crate::pb; | ||
use regex::Regex; | ||
|
||
pub trait TraceTagReplacer { | ||
fn replace_trace_tags(trace: &mut [pb::Span], rules: &[ReplaceRule]); | ||
} | ||
|
||
#[derive(Debug)] | ||
pub struct ReplaceRule<'a> { | ||
// name specifies the name of the tag that the replace rule addresses. However, | ||
// some exceptions apply such as: | ||
// • "resource.name" will target the resource | ||
// • "*" will target all tags and the resource | ||
name: &'a str, | ||
|
||
// re holds the regex pattern for matching. | ||
re: regex::Regex, | ||
|
||
// repl specifies the replacement string to be used when Pattern matches. | ||
repl: &'a str, | ||
} | ||
|
||
struct DefaultTraceTagReplacer {} | ||
|
||
impl TraceTagReplacer for DefaultTraceTagReplacer { | ||
/// replace_trace_tags replaces the tag values of all spans within a trace with a given set of rules. | ||
fn replace_trace_tags(trace: &mut [pb::Span], rules: &[ReplaceRule]) { | ||
for rule in rules { | ||
for span in &mut *trace { | ||
match rule.name { | ||
"*" => { | ||
for (_, val) in span.meta.iter_mut() { | ||
*val = rule.re.replace_all(val, rule.repl).to_string(); | ||
} | ||
} | ||
"resource.name" => { | ||
span.resource = rule.re.replace_all(&span.resource, rule.repl).to_string(); | ||
} | ||
_ => { | ||
if let Some(val) = span.meta.get_mut(rule.name) { | ||
let replaced_tag = rule.re.replace_all(val, rule.repl).to_string(); | ||
*val = replaced_tag; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
/// parse_rules_from_string takes an array of rules, represented as an array of length 3 arrays | ||
/// holding the tag name, regex pattern, and replacement string as strings. | ||
/// * returns a vec of ReplaceRules | ||
pub fn parse_rules_from_string<'a>( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this may be changed in the future depending on how the "agentless" code consumes config variables, and how we want to parse the "replace_tags" config variables. |
||
rules: &'a [[&'a str; 3]], | ||
) -> anyhow::Result<Vec<ReplaceRule<'a>>> { | ||
let mut vec: Vec<ReplaceRule> = Vec::with_capacity(rules.len()); | ||
|
||
for [name, pattern, repl] in rules { | ||
let compiled_regex = match Regex::new(pattern) { | ||
Ok(res) => res, | ||
Err(err) => { | ||
anyhow::bail!(format!( | ||
"Obfuscator Error: Error while parsing rule: {}", | ||
thedavl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
err | ||
)) | ||
} | ||
}; | ||
vec.push(ReplaceRule { | ||
name, | ||
re: compiled_regex, | ||
repl, | ||
}); | ||
} | ||
Ok(vec) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
|
||
use crate::pb; | ||
use crate::replacer; | ||
use duplicate::duplicate_item; | ||
use std::collections::HashMap; | ||
|
||
use super::TraceTagReplacer; | ||
|
||
fn new_test_span_with_tags(tags: HashMap<&str, &str>) -> pb::Span { | ||
let mut span = pb::Span { | ||
duration: 10000000, | ||
error: 0, | ||
resource: "GET /some/raclette".to_string(), | ||
service: "django".to_string(), | ||
name: "django.controller".to_string(), | ||
span_id: 123, | ||
start: 1448466874000000000, | ||
trace_id: 424242, | ||
meta: HashMap::new(), | ||
metrics: HashMap::from([("cheese_weight".to_string(), 100000.0)]), | ||
parent_id: 1111, | ||
r#type: "http".to_string(), | ||
meta_struct: HashMap::new(), | ||
}; | ||
for (key, val) in tags { | ||
match key { | ||
"resource.name" => { | ||
span.resource = val.to_string(); | ||
} | ||
_ => { | ||
span.meta.insert(key.to_string(), val.to_string()); | ||
} | ||
} | ||
} | ||
span | ||
} | ||
|
||
#[duplicate_item( | ||
pawelchcki marked this conversation as resolved.
Show resolved
Hide resolved
|
||
[ | ||
test_name [test_replace_tags] | ||
rules [&[ | ||
["http.url", "(token/)([^/]*)", "${1}?"], | ||
["http.url", "guid", "[REDACTED]"], | ||
["custom.tag", "(/foo/bar/).*", "${1}extra"], | ||
]] | ||
input [ | ||
HashMap::from([ | ||
("http.url", "some/guid/token/abcdef/abc"), | ||
("custom.tag", "/foo/bar/foo"), | ||
]) | ||
] | ||
expected [ | ||
HashMap::from([ | ||
("http.url", "some/[REDACTED]/token/?/abc"), | ||
("custom.tag", "/foo/bar/extra"), | ||
]) | ||
]; | ||
] | ||
[ | ||
test_name [test_replace_tags_with_exceptions] | ||
rules [&[ | ||
["*", "(token/)([^/]*)", "${1}?"], | ||
["*", "this", "that"], | ||
["http.url", "guid", "[REDACTED]"], | ||
["custom.tag", "(/foo/bar/).*", "${1}extra"], | ||
["resource.name", "prod", "stage"], | ||
]] | ||
input [ | ||
HashMap::from([ | ||
("resource.name", "this is prod"), | ||
("http.url", "some/[REDACTED]/token/abcdef/abc"), | ||
("other.url", "some/guid/token/abcdef/abc"), | ||
("custom.tag", "/foo/bar/foo"), | ||
]) | ||
] | ||
expected [ | ||
HashMap::from([ | ||
("resource.name", "this is stage"), | ||
("http.url", "some/[REDACTED]/token/?/abc"), | ||
("other.url", "some/guid/token/?/abc"), | ||
("custom.tag", "/foo/bar/extra"), | ||
]) | ||
]; | ||
] | ||
)] | ||
#[test] | ||
fn test_name() { | ||
let parsed_rules = replacer::parse_rules_from_string(rules); | ||
let root_span = new_test_span_with_tags(input); | ||
let child_span = new_test_span_with_tags(input); | ||
let mut trace = [root_span, child_span]; | ||
|
||
replacer::DefaultTraceTagReplacer::replace_trace_tags(&mut trace, &parsed_rules.unwrap()); | ||
|
||
for (key, val) in expected { | ||
match key { | ||
"resource.name" => { | ||
assert_eq!(val, trace[0].resource); | ||
assert_eq!(val, trace[1].resource); | ||
} | ||
_ => { | ||
assert_eq!(val, trace[0].meta.get(key).unwrap()); | ||
assert_eq!(val, trace[1].meta.get(key).unwrap()); | ||
} | ||
} | ||
} | ||
} | ||
|
||
#[test] | ||
fn test_parse_rules_invalid_regex() { | ||
let result = replacer::parse_rules_from_string(&[["http.url", ")", "${1}?"]]); | ||
assert!(result.is_err()); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this clippy setting still required?