From ab26e7be6c300f53b00284c92afac2972e592817 Mon Sep 17 00:00:00 2001 From: David Lee Date: Thu, 2 Feb 2023 15:09:38 -0800 Subject: [PATCH 01/16] add service tag + env tag normalization + unit tests --- trace-normalization/src/normalize_utils.rs | 223 ++++++++++++------ trace-normalization/src/normalizer.rs | 250 ++++++++++++++++++++- 2 files changed, 393 insertions(+), 80 deletions(-) diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs index 9a0d18e23..85ea9dfaf 100644 --- a/trace-normalization/src/normalize_utils.rs +++ b/trace-normalization/src/normalize_utils.rs @@ -3,8 +3,15 @@ // developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present // Datadog, Inc. +// DEFAULT_SERVICE_NAME is the default name we assign a service if it's missing and we have no reasonable fallback +pub(crate) const DEFAULT_SERVICE_NAME: &str = "unnamed-service"; + // MAX_NAME_LEN the maximum length a name can have pub(crate) const MAX_NAME_LEN: usize = 100; +// MAX_SERVICE_LEN the maximum length a service can have +pub(crate) const MAX_SERVICE_LEN: usize = 100; +// MAX_SERVICE_LEN the maximum length a tag can have +pub(crate) const MAX_TAG_LEN: usize = 200; // TruncateUTF8 truncates the given string to make sure it uses less than limit bytes. // If the last character is a utf8 character that would be split, it removes it @@ -25,23 +32,133 @@ pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str { s } -// NormalizeService returns a span service or an error describing why normalization failed. -// TODO: Implement this in a future PR -// pub fn normalize_service(svc: String, lang: String) -> (String, Option) { -// if svc == "" { -// return (fallback_service(lang), errors::NormalizeErrors::ErrorEmpty); -// } -// if svc.len() > MAX_SERVICE_LEN { -// return (truncate_utf8(svc, MAX_SERVICE_LEN), errors::NormalizeErrors::ErrorTooLong.into()); -// } -// TODO: implement tag normalization -// let s: String = normalize_tag(svc); -// if s == "" { -// return (fallbackService(lang), errors::NormalizeErrors::ErrorInvalid) -// } -// return (s, err) -// (svc, None) -// } +// fallbackService returns the fallback service name for a service +// belonging to language lang. +pub(crate) fn fallback_service(lang: String) -> String { + if lang.is_empty() { + return DEFAULT_SERVICE_NAME.to_string(); + } + let mut service_name = String::new(); + service_name.push_str("unnamed-"); + service_name.push_str(&lang); + service_name.push_str("-service"); + // TODO: the original golang implementation uses a map to cache previously created + // service names. Implement that here. + service_name +} + +// NormalizeService normalizes a span service and returns an error describing the reason +// (if any) why the name was modified. +pub(crate) fn normalize_service(svc: &str) -> anyhow::Result { + anyhow::ensure!(!svc.is_empty(), "Normalizer Error: Empty service name."); + + let truncated_service = if svc.len() > MAX_SERVICE_LEN as usize { + truncate_utf8(svc, MAX_SERVICE_LEN) + } else { + svc + }; + + normalize_tag(truncated_service) +} + +// NormalizeTag applies some normalization to ensure the tags match the backend requirements. +pub(crate) fn normalize_tag(tag: &str) -> anyhow::Result { + // Fast path: Check if the tag is valid and only contains ASCII characters, + // if yes return it as-is right away. For most use-cases this reduces CPU usage. + if is_normalized_ascii_tag(tag) { + return Ok(tag.to_string()); + } + + anyhow::ensure!(!tag.is_empty(), "Normalizer Error: Empty tag name."); + + // given a dummy value + let mut last_char: char = 'a'; + + let mut result = String::with_capacity(tag.len()); + + let char_vec: Vec = tag.chars().collect(); + + for cur_char in char_vec { + if result.len() == MAX_TAG_LEN as usize { + break; + } + if cur_char.is_lowercase() { + result.push(cur_char); + last_char = cur_char; + continue; + } + if cur_char.is_uppercase() { + let mut iter = cur_char.to_lowercase(); + if iter.len() == 1 { + let c: char = iter.next().unwrap(); + result.push(c); + last_char = c; + } + continue; + } + if cur_char.is_alphabetic() { + result.push(cur_char); + last_char = cur_char; + continue; + } + if cur_char == ':' { + result.push(cur_char); + last_char = cur_char; + continue; + } + if !result.is_empty() && (cur_char.is_ascii_digit() || cur_char == '.' || cur_char == '/' || cur_char == '-') { + result.push(cur_char); + last_char = cur_char; + continue; + } + if !result.is_empty() && last_char != '_' { + result.push('_'); + last_char = '_'; + } + } + + if last_char == '_' { + result.remove(result.len() - 1); + } + + Ok(result.to_string()) +} + +pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool { + if tag.is_empty() { + return true; + } + if tag.len() > MAX_TAG_LEN as usize { + return false; + } + if !is_valid_ascii_start_char(tag.chars().next().unwrap()) { + return false; + } + for mut i in 0..tag.len() { + let b: char = tag.chars().nth(i).unwrap(); + if is_valid_ascii_tag_char(b) { + continue; + } + if b == '_' { + // an underscore is only okay if followed by a valid non-underscore character + i+=1; + if i == tag.len() || !is_valid_ascii_tag_char(tag.chars().nth(i).unwrap()) { + return false; + } + } else { + return false; + } + } + true +} + +pub(crate) fn is_valid_ascii_start_char(c: char) -> bool { + ('a'..='z').contains(&c) || c == ':' +} + +pub(crate) fn is_valid_ascii_tag_char(c: char) -> bool { + is_valid_ascii_start_char(c) || ('0'..='9').contains(&c) || c == '.' || c == '/' || c == '-' +} // normalize_name normalizes a span name or an error describing why normalization failed. pub(crate) fn normalize_name(name: &str) -> anyhow::Result { @@ -56,58 +173,6 @@ pub(crate) fn normalize_name(name: &str) -> anyhow::Result { normalize_metric_names(truncated_name) } -// TODO: Implement this in a future PR -// NormalizeTag applies some normalization to ensure the tags match the backend requirements. -// pub fn normalize_tag(v: String) -> String { -// Fast path: Check if the tag is valid and only contains ASCII characters, -// if yes return it as-is right away. For most use-cases this reduces CPU usage. -// if is_normalized_ascii_tag(v.clone()) { -// return v; -// } - -// if v.is_empty() { -// return "".to_string(); -// } - -// "".to_string() -// } - -// pub fn is_normalized_ascii_tag(tag: String) -> bool { -// if tag.is_empty() { -// return true; -// } -// if tag.len() > MAX_TAG_LEN { -// return false; -// } -// if !is_valid_ascii_start_char(tag.chars().next().unwrap()) { -// return false; -// } -// for mut i in 0..tag.len() { -// let b: char = tag.chars().nth(i).unwrap(); -// if is_valid_ascii_tag_char(b) { -// continue; -// } -// if b == '_' { -// // an underscore is only okay if followed by a valid non-underscore character -// i+=1; -// if i == tag.len() || !is_valid_ascii_tag_char(tag.chars().nth(i).unwrap()) { -// return false; -// } -// } else { -// return false; -// } -// } -// true -// } - -// pub fn is_valid_ascii_start_char(c: char) -> bool { -// ('a'..='z').contains(&c) || c == ':' -// } - -// pub fn is_valid_ascii_tag_char(c: char) -> bool { -// is_valid_ascii_start_char(c) || ('0'..='9').contains(&c) || c == '.' || c == '/' || c == '-' -// } - pub(crate) fn normalize_metric_names(name: &str) -> anyhow::Result { let mut result = String::with_capacity(name.len()); @@ -190,4 +255,24 @@ mod tests { } } } + + #[duplicate_item( + test_name input expected expected_err; + [test_normalize_empty_service] [""] [normalize_utils::DEFAULT_SERVICE_NAME] ["Normalizer Error: Empty service name."]; + [test_normalize_valid_service] ["good"] ["good"] [""]; + [test_normalize_long_service] ["Too$Long$.".repeat(20).as_str()] ["too_long_.".repeat(10)] [""]; + [test_normalize_dash_service] ["bad&service"] ["bad_service"] [""]; + )] + #[test] + fn test_name() { + match normalize_utils::normalize_service(input) { + Ok(val) => { + assert_eq!(expected_err, ""); + assert_eq!(val, expected) + }, + Err(err) => { + assert_eq!(format!("{err}"), expected_err); + } + } + } } diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs index 099904d9d..990d3a741 100644 --- a/trace-normalization/src/normalizer.rs +++ b/trace-normalization/src/normalizer.rs @@ -21,9 +21,12 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> { anyhow::ensure!(s.trace_id != 0, "TraceID is zero (reason:trace_id_zero)"); anyhow::ensure!(s.span_id != 0, "SpanID is zero (reason:span_id_zero)"); - // TODO: Implement service name normalizer in future PR - // let (svc, _) = normalize_utils::normalize_service(s.service.clone(), "".to_string()); - // s.service = svc; + let normalized_service = match normalize_utils::normalize_service(&s.service) { + Ok(service) => service, + Err(_) => normalize_utils::fallback_service("".to_string()) + }; + + s.service = normalized_service; // TODO: check for a feature flag to determine the component tag to become the span name // https://github.com/DataDog/datadog-agent/blob/dc88d14851354cada1d15265220a39dce8840dcc/pkg/trace/agent/normalizer.go#L64 @@ -76,14 +79,18 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> { s.r#type = normalize_utils::truncate_utf8(&s.r#type, MAX_TYPE_LEN).to_string(); } - // TODO: Implement tag normalization in future PR - // if s.meta.contains_key("env") { - // let env_tag: String = s.meta.get("env").unwrap().to_string(); - // s.meta.insert("env".to_string(), normalize_utils::normalize_tag(env_tag)); - // } + if s.meta.contains_key("env") { + let env_tag: &str = s.meta.get("env").unwrap(); + match normalize_utils::normalize_tag(env_tag) { + Ok(normalized_tag) => { + s.meta.insert("env".to_string(), normalized_tag); + }, + Err(_) => {} + } + }; if let Some(code) = s.meta.get("http.status_code") { - if !is_valid_status_code(code.to_string()) { + if !is_valid_status_code(code) { s.meta.remove("http.status_code"); } }; @@ -91,7 +98,7 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> { Ok(()) } -pub(crate) fn is_valid_status_code(sc: String) -> bool { +pub(crate) fn is_valid_status_code(sc: &str) -> bool { if let Ok(code) = sc.parse::() { return (100..600).contains(&code); } @@ -100,12 +107,12 @@ pub(crate) fn is_valid_status_code(sc: String) -> bool { #[cfg(test)] mod tests { - use crate::normalize_utils; use crate::normalizer; use crate::pb; use rand::Rng; use std::collections::HashMap; + use std::time::SystemTime; pub fn new_test_span() -> pb::Span { let mut rng = rand::thread_rng(); @@ -198,4 +205,225 @@ mod tests { assert!(normalizer::normalize(&mut test_span).is_ok()); assert_eq!(test_span.resource, test_span.name); } + + #[test] + pub fn test_normalize_trace_id_passes() { + let mut test_span = new_test_span(); + let before_trace_id = test_span.trace_id; + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(before_trace_id, test_span.trace_id); + } + + #[test] + pub fn test_normalize_no_trace_id() { + let mut test_span = new_test_span(); + test_span.trace_id = 0; + assert!(normalizer::normalize(&mut test_span).is_err()); + } + + #[test] + pub fn test_normalize_component_to_name() { + let mut test_span = new_test_span(); + let before_trace_id = test_span.trace_id; + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(before_trace_id, test_span.trace_id); + } + + // TODO: Add a unit test for testing Component2Name, one that is + // implemented within the normalize function. + + #[test] + pub fn test_normalize_span_id_passes() { + let mut test_span = new_test_span(); + let before_span_id = test_span.span_id; + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(before_span_id, test_span.span_id); + } + + #[test] + pub fn test_normalize_no_span_id() { + let mut test_span = new_test_span(); + test_span.span_id = 0; + assert!(normalizer::normalize(&mut test_span).is_err()); + } + + #[test] + pub fn test_normalize_start_passes() { + let mut test_span = new_test_span(); + let before_start = test_span.start; + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(before_start, test_span.start); + } + + fn get_current_time() -> i64 { + SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_nanos() as i64 + } + + #[test] + pub fn test_normalize_start_too_small() { + let mut test_span = new_test_span(); + + test_span.start = 42; + let min_start = get_current_time() - test_span.duration; + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert!(test_span.start >= min_start); + assert!(test_span.start <= get_current_time()); + } + + #[test] + pub fn test_normalize_start_too_small_with_large_duration() { + let mut test_span = new_test_span(); + + test_span.start = 42; + test_span.duration = get_current_time() * 2; + let min_start = get_current_time(); + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert!(test_span.start >= min_start); // start should have been reset to current time + assert!(test_span.start <= get_current_time()); //start should have been reset to current time + } + + #[test] + pub fn test_normalize_duration_passes() { + let mut test_span = new_test_span(); + let before_duration = test_span.duration; + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(before_duration, test_span.duration); + } + + #[test] + pub fn test_normalize_empty_duration() { + let mut test_span = new_test_span(); + test_span.duration = 0; + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(test_span.duration, 0); + } + + #[test] + pub fn test_normalize_negative_duration() { + let mut test_span = new_test_span(); + test_span.duration = -50; + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(test_span.duration, 0); + } + + #[test] + pub fn test_normalize_large_duration() { + let mut test_span = new_test_span(); + test_span.duration = std::i64::MAX; + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(test_span.duration, 0); + } + + #[test] + pub fn test_normalize_error_passes() { + let mut test_span = new_test_span(); + let before_error = test_span.error; + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(before_error, test_span.error); + } + + #[test] + pub fn test_normalize_metrics_passes() { + let mut test_span = new_test_span(); + let before_metrics = test_span.metrics.clone(); + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(before_metrics, test_span.metrics); + } + + #[test] + pub fn test_normalize_meta_passes() { + let mut test_span = new_test_span(); + let before_meta = test_span.meta.clone(); + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(before_meta, test_span.meta); + } + + #[test] + pub fn test_normalize_parent_id_passes() { + let mut test_span = new_test_span(); + let before_parent_id = test_span.parent_id; + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(before_parent_id, test_span.parent_id); + } + + #[test] + pub fn test_normalize_type_passes() { + let mut test_span = new_test_span(); + let before_type = test_span.r#type.clone(); + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(before_type, test_span.r#type); + } + + #[test] + pub fn test_normalize_type_too_long() { + let mut test_span = new_test_span(); + test_span.r#type = "sql".repeat(1000); + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(test_span.r#type.len(), normalizer::MAX_TYPE_LEN as usize); + } + + #[test] + pub fn test_normalize_service_tag() { + let mut test_span = new_test_span(); + test_span.service = "retargeting(api-Staging ".to_string(); + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(test_span.service, "retargeting_api-staging"); + } + + #[test] + pub fn test_normalize_env() { + let mut test_span = new_test_span(); + test_span.meta.insert("env".to_string(), "DEVELOPMENT".to_string()); + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!("development", test_span.meta.get("env").unwrap()); + } + + #[test] + pub fn test_special_zipkin_root_span() { + let mut test_span = new_test_span(); + test_span.parent_id = 42; + test_span.trace_id = 42; + test_span.span_id = 42; + + let before_trace_id = test_span.trace_id; + let before_span_id = test_span.span_id; + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!(test_span.parent_id, 0); + assert_eq!(test_span.trace_id, before_trace_id); + assert_eq!(test_span.span_id, before_span_id); + + } + + #[test] + pub fn test_normalize_trace_empty() { + let mut test_span = new_test_span(); + test_span.meta.insert("env".to_string(), "DEVELOPMENT".to_string()); + + assert!(normalizer::normalize(&mut test_span).is_ok()); + assert_eq!("development", test_span.meta.get("env").unwrap()); + } + + #[test] + pub fn test_is_valid_status_code() { + assert!(normalizer::is_valid_status_code("100")); + assert!(normalizer::is_valid_status_code("599")); + assert!(!normalizer::is_valid_status_code("99")); + assert!(!normalizer::is_valid_status_code("600")); + assert!(!normalizer::is_valid_status_code("Invalid status code")); + } } From 38d0c621e67ee503f73ee26c06e60d159fee104e Mon Sep 17 00:00:00 2001 From: David Lee Date: Thu, 2 Feb 2023 15:20:29 -0800 Subject: [PATCH 02/16] Update normalizer.rs --- trace-normalization/src/normalizer.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs index 990d3a741..b799ae1fd 100644 --- a/trace-normalization/src/normalizer.rs +++ b/trace-normalization/src/normalizer.rs @@ -28,7 +28,7 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> { s.service = normalized_service; - // TODO: check for a feature flag to determine the component tag to become the span name + // TODO: component2name: check for a feature flag to determine the component tag to become the span name // https://github.com/DataDog/datadog-agent/blob/dc88d14851354cada1d15265220a39dce8840dcc/pkg/trace/agent/normalizer.go#L64 let normalized_name = match normalize_utils::normalize_name(&s.name) { @@ -107,6 +107,7 @@ pub(crate) fn is_valid_status_code(sc: &str) -> bool { #[cfg(test)] mod tests { + use crate::normalize_utils; use crate::normalizer; use crate::pb; From 4058d5c340bab1dc789ef88619a8263f2334972b Mon Sep 17 00:00:00 2001 From: David Lee Date: Thu, 2 Feb 2023 15:23:10 -0800 Subject: [PATCH 03/16] lint --- trace-normalization/src/normalize_utils.rs | 32 ++++++++++++---------- trace-normalization/src/normalizer.rs | 31 +++++++++++---------- 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs index 85ea9dfaf..3ebd58169 100644 --- a/trace-normalization/src/normalize_utils.rs +++ b/trace-normalization/src/normalize_utils.rs @@ -36,8 +36,8 @@ pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str { // belonging to language lang. pub(crate) fn fallback_service(lang: String) -> String { if lang.is_empty() { - return DEFAULT_SERVICE_NAME.to_string(); - } + return DEFAULT_SERVICE_NAME.to_string(); + } let mut service_name = String::new(); service_name.push_str("unnamed-"); service_name.push_str(&lang); @@ -52,7 +52,7 @@ pub(crate) fn fallback_service(lang: String) -> String { pub(crate) fn normalize_service(svc: &str) -> anyhow::Result { anyhow::ensure!(!svc.is_empty(), "Normalizer Error: Empty service name."); - let truncated_service = if svc.len() > MAX_SERVICE_LEN as usize { + let truncated_service = if svc.len() > MAX_SERVICE_LEN { truncate_utf8(svc, MAX_SERVICE_LEN) } else { svc @@ -64,10 +64,10 @@ pub(crate) fn normalize_service(svc: &str) -> anyhow::Result { // NormalizeTag applies some normalization to ensure the tags match the backend requirements. pub(crate) fn normalize_tag(tag: &str) -> anyhow::Result { // Fast path: Check if the tag is valid and only contains ASCII characters, - // if yes return it as-is right away. For most use-cases this reduces CPU usage. - if is_normalized_ascii_tag(tag) { - return Ok(tag.to_string()); - } + // if yes return it as-is right away. For most use-cases this reduces CPU usage. + if is_normalized_ascii_tag(tag) { + return Ok(tag.to_string()); + } anyhow::ensure!(!tag.is_empty(), "Normalizer Error: Empty tag name."); @@ -79,7 +79,7 @@ pub(crate) fn normalize_tag(tag: &str) -> anyhow::Result { let char_vec: Vec = tag.chars().collect(); for cur_char in char_vec { - if result.len() == MAX_TAG_LEN as usize { + if result.len() == MAX_TAG_LEN { break; } if cur_char.is_lowercase() { @@ -106,7 +106,9 @@ pub(crate) fn normalize_tag(tag: &str) -> anyhow::Result { last_char = cur_char; continue; } - if !result.is_empty() && (cur_char.is_ascii_digit() || cur_char == '.' || cur_char == '/' || cur_char == '-') { + if !result.is_empty() + && (cur_char.is_ascii_digit() || cur_char == '.' || cur_char == '/' || cur_char == '-') + { result.push(cur_char); last_char = cur_char; continue; @@ -128,7 +130,7 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool { if tag.is_empty() { return true; } - if tag.len() > MAX_TAG_LEN as usize { + if tag.len() > MAX_TAG_LEN { return false; } if !is_valid_ascii_start_char(tag.chars().next().unwrap()) { @@ -141,10 +143,10 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool { } if b == '_' { // an underscore is only okay if followed by a valid non-underscore character - i+=1; - if i == tag.len() || !is_valid_ascii_tag_char(tag.chars().nth(i).unwrap()) { - return false; - } + i += 1; + if i == tag.len() || !is_valid_ascii_tag_char(tag.chars().nth(i).unwrap()) { + return false; + } } else { return false; } @@ -269,7 +271,7 @@ mod tests { Ok(val) => { assert_eq!(expected_err, ""); assert_eq!(val, expected) - }, + } Err(err) => { assert_eq!(format!("{err}"), expected_err); } diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs index b799ae1fd..1b8f04084 100644 --- a/trace-normalization/src/normalizer.rs +++ b/trace-normalization/src/normalizer.rs @@ -23,7 +23,7 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> { let normalized_service = match normalize_utils::normalize_service(&s.service) { Ok(service) => service, - Err(_) => normalize_utils::fallback_service("".to_string()) + Err(_) => normalize_utils::fallback_service("".to_string()), }; s.service = normalized_service; @@ -81,11 +81,8 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> { if s.meta.contains_key("env") { let env_tag: &str = s.meta.get("env").unwrap(); - match normalize_utils::normalize_tag(env_tag) { - Ok(normalized_tag) => { - s.meta.insert("env".to_string(), normalized_tag); - }, - Err(_) => {} + if let Ok(normalized_tag) = normalize_utils::normalize_tag(env_tag) { + s.meta.insert("env".to_string(), normalized_tag); } }; @@ -230,7 +227,7 @@ mod tests { assert_eq!(before_trace_id, test_span.trace_id); } - // TODO: Add a unit test for testing Component2Name, one that is + // TODO: Add a unit test for testing Component2Name, one that is // implemented within the normalize function. #[test] @@ -257,7 +254,10 @@ mod tests { } fn get_current_time() -> i64 { - SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_nanos() as i64 + SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_nanos() as i64 } #[test] @@ -275,11 +275,11 @@ mod tests { #[test] pub fn test_normalize_start_too_small_with_large_duration() { let mut test_span = new_test_span(); - + test_span.start = 42; test_span.duration = get_current_time() * 2; let min_start = get_current_time(); - + assert!(normalizer::normalize(&mut test_span).is_ok()); assert!(test_span.start >= min_start); // start should have been reset to current time assert!(test_span.start <= get_current_time()); //start should have been reset to current time @@ -372,7 +372,7 @@ mod tests { test_span.r#type = "sql".repeat(1000); assert!(normalizer::normalize(&mut test_span).is_ok()); - assert_eq!(test_span.r#type.len(), normalizer::MAX_TYPE_LEN as usize); + assert_eq!(test_span.r#type.len(), normalizer::MAX_TYPE_LEN); } #[test] @@ -387,7 +387,9 @@ mod tests { #[test] pub fn test_normalize_env() { let mut test_span = new_test_span(); - test_span.meta.insert("env".to_string(), "DEVELOPMENT".to_string()); + test_span + .meta + .insert("env".to_string(), "DEVELOPMENT".to_string()); assert!(normalizer::normalize(&mut test_span).is_ok()); assert_eq!("development", test_span.meta.get("env").unwrap()); @@ -407,13 +409,14 @@ mod tests { assert_eq!(test_span.parent_id, 0); assert_eq!(test_span.trace_id, before_trace_id); assert_eq!(test_span.span_id, before_span_id); - } #[test] pub fn test_normalize_trace_empty() { let mut test_span = new_test_span(); - test_span.meta.insert("env".to_string(), "DEVELOPMENT".to_string()); + test_span + .meta + .insert("env".to_string(), "DEVELOPMENT".to_string()); assert!(normalizer::normalize(&mut test_span).is_ok()); assert_eq!("development", test_span.meta.get("env").unwrap()); From 2eb29407b79c3ba077f486f519777a6c3bde6fde Mon Sep 17 00:00:00 2001 From: David Lee Date: Thu, 2 Feb 2023 15:29:31 -0800 Subject: [PATCH 04/16] remove .unwrap() outside of unit tests --- trace-normalization/src/normalizer.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs index 1b8f04084..5b8484e8d 100644 --- a/trace-normalization/src/normalizer.rs +++ b/trace-normalization/src/normalizer.rs @@ -80,9 +80,10 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> { } if s.meta.contains_key("env") { - let env_tag: &str = s.meta.get("env").unwrap(); - if let Ok(normalized_tag) = normalize_utils::normalize_tag(env_tag) { - s.meta.insert("env".to_string(), normalized_tag); + if let Some(env_tag) = s.meta.get("env") { + if let Ok(normalized_tag) = normalize_utils::normalize_tag(env_tag) { + s.meta.insert("env".to_string(), normalized_tag); + } } }; From 4236bd53ba46b9c7484d0e96d5170e0f23007155 Mon Sep 17 00:00:00 2001 From: David Lee Date: Mon, 6 Feb 2023 08:19:43 -0800 Subject: [PATCH 05/16] change fallback_service to take an &str --- trace-normalization/src/normalize_utils.rs | 4 ++-- trace-normalization/src/normalizer.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs index 3ebd58169..7676a41c0 100644 --- a/trace-normalization/src/normalize_utils.rs +++ b/trace-normalization/src/normalize_utils.rs @@ -34,13 +34,13 @@ pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str { // fallbackService returns the fallback service name for a service // belonging to language lang. -pub(crate) fn fallback_service(lang: String) -> String { +pub(crate) fn fallback_service(lang: &str) -> String { if lang.is_empty() { return DEFAULT_SERVICE_NAME.to_string(); } let mut service_name = String::new(); service_name.push_str("unnamed-"); - service_name.push_str(&lang); + service_name.push_str(lang); service_name.push_str("-service"); // TODO: the original golang implementation uses a map to cache previously created // service names. Implement that here. diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs index 5b8484e8d..9eee4d5d4 100644 --- a/trace-normalization/src/normalizer.rs +++ b/trace-normalization/src/normalizer.rs @@ -23,7 +23,7 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> { let normalized_service = match normalize_utils::normalize_service(&s.service) { Ok(service) => service, - Err(_) => normalize_utils::fallback_service("".to_string()), + Err(_) => normalize_utils::fallback_service(""), }; s.service = normalized_service; From aca86becad04388d61b971db3ebe073080648b3d Mon Sep 17 00:00:00 2001 From: David Lee Date: Mon, 6 Feb 2023 08:26:26 -0800 Subject: [PATCH 06/16] modify fallback service --- trace-normalization/src/normalize_utils.rs | 11 +++-------- trace-normalization/src/normalizer.rs | 2 +- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs index 7676a41c0..6d977cfec 100644 --- a/trace-normalization/src/normalize_utils.rs +++ b/trace-normalization/src/normalize_utils.rs @@ -34,17 +34,12 @@ pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str { // fallbackService returns the fallback service name for a service // belonging to language lang. -pub(crate) fn fallback_service(lang: &str) -> String { +pub(crate) fn fallback_service() -> String { if lang.is_empty() { return DEFAULT_SERVICE_NAME.to_string(); } - let mut service_name = String::new(); - service_name.push_str("unnamed-"); - service_name.push_str(lang); - service_name.push_str("-service"); - // TODO: the original golang implementation uses a map to cache previously created - // service names. Implement that here. - service_name + // In the go agent implementation, if a lang was specified in TagStats + // (extracted from the payload header) the fallback_service name would be "unnamed-{lang}-service". } // NormalizeService normalizes a span service and returns an error describing the reason diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs index 9eee4d5d4..e309c2cdd 100644 --- a/trace-normalization/src/normalizer.rs +++ b/trace-normalization/src/normalizer.rs @@ -23,7 +23,7 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> { let normalized_service = match normalize_utils::normalize_service(&s.service) { Ok(service) => service, - Err(_) => normalize_utils::fallback_service(""), + Err(_) => normalize_utils::fallback_service(), }; s.service = normalized_service; From 263e1065423ea72b1d1f3ffcf5ebfb9a641d6839 Mon Sep 17 00:00:00 2001 From: David Lee Date: Mon, 6 Feb 2023 08:27:10 -0800 Subject: [PATCH 07/16] lint --- trace-normalization/src/normalize_utils.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs index 6d977cfec..6f6f6e6c2 100644 --- a/trace-normalization/src/normalize_utils.rs +++ b/trace-normalization/src/normalize_utils.rs @@ -35,11 +35,9 @@ pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str { // fallbackService returns the fallback service name for a service // belonging to language lang. pub(crate) fn fallback_service() -> String { - if lang.is_empty() { - return DEFAULT_SERVICE_NAME.to_string(); - } // In the go agent implementation, if a lang was specified in TagStats - // (extracted from the payload header) the fallback_service name would be "unnamed-{lang}-service". + // (extracted from the payload header) the fallback_service name would be "unnamed-{lang}-service". + DEFAULT_SERVICE_NAME.to_string() } // NormalizeService normalizes a span service and returns an error describing the reason From 659c309396eded9469a0bf1e8d33a374088a3ce4 Mon Sep 17 00:00:00 2001 From: David Lee Date: Mon, 6 Feb 2023 09:00:44 -0800 Subject: [PATCH 08/16] remove more .unwrap() --- trace-normalization/src/normalize_utils.rs | 33 +++++++++++++++------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs index 6f6f6e6c2..df63e7f78 100644 --- a/trace-normalization/src/normalize_utils.rs +++ b/trace-normalization/src/normalize_utils.rs @@ -82,8 +82,7 @@ pub(crate) fn normalize_tag(tag: &str) -> anyhow::Result { } if cur_char.is_uppercase() { let mut iter = cur_char.to_lowercase(); - if iter.len() == 1 { - let c: char = iter.next().unwrap(); + if let Some(c) = iter.next() { result.push(c); last_char = c; } @@ -126,20 +125,34 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool { if tag.len() > MAX_TAG_LEN { return false; } - if !is_valid_ascii_start_char(tag.chars().next().unwrap()) { - return false; + match tag.chars().next() { + Some(c) => { + if !is_valid_ascii_start_char(c) { + return false; + } + } + None => return false, } + for mut i in 0..tag.len() { - let b: char = tag.chars().nth(i).unwrap(); - if is_valid_ascii_tag_char(b) { + let cur_char: char = match tag.chars().nth(i) { + Some(c) => c, + None => return false, + }; + if is_valid_ascii_tag_char(cur_char) { continue; } - if b == '_' { + if cur_char == '_' { // an underscore is only okay if followed by a valid non-underscore character i += 1; - if i == tag.len() || !is_valid_ascii_tag_char(tag.chars().nth(i).unwrap()) { - return false; - } + match tag.chars().nth(i) { + Some(c) => { + if !is_valid_ascii_tag_char(c) { + return false; + } + } + None => return false, + }; } else { return false; } From 0e35bbd9b8effc30276df96bda600db720b2d8a7 Mon Sep 17 00:00:00 2001 From: David Lee Date: Mon, 6 Feb 2023 09:49:40 -0800 Subject: [PATCH 09/16] add mountain of tag normalization unit tests --- trace-normalization/src/normalize_utils.rs | 74 +++++++++++++++++++--- 1 file changed, 65 insertions(+), 9 deletions(-) diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs index df63e7f78..567d8661f 100644 --- a/trace-normalization/src/normalize_utils.rs +++ b/trace-normalization/src/normalize_utils.rs @@ -13,7 +13,7 @@ pub(crate) const MAX_SERVICE_LEN: usize = 100; // MAX_SERVICE_LEN the maximum length a tag can have pub(crate) const MAX_TAG_LEN: usize = 200; -// TruncateUTF8 truncates the given string to make sure it uses less than limit bytes. +// truncate_utf8 truncates the given string to make sure it uses less than limit bytes. // If the last character is a utf8 character that would be split, it removes it // entirely to make sure the resulting string is not broken. pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str { @@ -32,7 +32,7 @@ pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str { s } -// fallbackService returns the fallback service name for a service +// fallback_service returns the fallback service name for a service // belonging to language lang. pub(crate) fn fallback_service() -> String { // In the go agent implementation, if a lang was specified in TagStats @@ -40,7 +40,7 @@ pub(crate) fn fallback_service() -> String { DEFAULT_SERVICE_NAME.to_string() } -// NormalizeService normalizes a span service and returns an error describing the reason +// normalize_service normalizes a span service and returns an error describing the reason // (if any) why the name was modified. pub(crate) fn normalize_service(svc: &str) -> anyhow::Result { anyhow::ensure!(!svc.is_empty(), "Normalizer Error: Empty service name."); @@ -54,7 +54,7 @@ pub(crate) fn normalize_service(svc: &str) -> anyhow::Result { normalize_tag(truncated_service) } -// NormalizeTag applies some normalization to ensure the tags match the backend requirements. +// normalize_tag applies some normalization to ensure the tags match the backend requirements. pub(crate) fn normalize_tag(tag: &str) -> anyhow::Result { // Fast path: Check if the tag is valid and only contains ASCII characters, // if yes return it as-is right away. For most use-cases this reduces CPU usage. @@ -265,11 +265,11 @@ mod tests { } #[duplicate_item( - test_name input expected expected_err; - [test_normalize_empty_service] [""] [normalize_utils::DEFAULT_SERVICE_NAME] ["Normalizer Error: Empty service name."]; - [test_normalize_valid_service] ["good"] ["good"] [""]; - [test_normalize_long_service] ["Too$Long$.".repeat(20).as_str()] ["too_long_.".repeat(10)] [""]; - [test_normalize_dash_service] ["bad&service"] ["bad_service"] [""]; + test_name input expected expected_err; + [test_normalize_empty_service] [""] [normalize_utils::DEFAULT_SERVICE_NAME] ["Normalizer Error: Empty service name."]; + [test_normalize_valid_service] ["good"] ["good"] [""]; + [test_normalize_long_service] ["Too$Long$.".repeat(20).as_str()] ["too_long_.".repeat(10)] [""]; + [test_normalize_dash_service] ["bad&service"] ["bad_service"] [""]; )] #[test] fn test_name() { @@ -283,4 +283,60 @@ mod tests { } } } + #[duplicate_item( + test_name input expected expected_err; + [test_normalize_tag_1] ["#test_starting_hash"] ["test_starting_hash"] [""]; + [test_normalize_tag_2] ["TestCAPSandSuch"] ["testcapsandsuch"] [""]; + [test_normalize_tag_3] ["Test Conversion Of Weird !@#$%^&**() Characters"] ["test_conversion_of_weird_characters"] [""]; + [test_normalize_tag_4] ["$#weird_starting"] ["weird_starting"] [""]; + [test_normalize_tag_5] ["allowed:c0l0ns"] ["allowed:c0l0ns"] [""]; + [test_normalize_tag_6] ["1love"] ["love"] [""]; + [test_normalize_tag_7] ["ünicöde"] ["ünicöde"] [""]; + [test_normalize_tag_8] ["ünicöde:metäl"] ["ünicöde:metäl"] [""]; + [test_normalize_tag_9] ["Data🐨dog🐶 繋がっ⛰てて"] ["data_dog_繋がっ_てて"] [""]; + [test_normalize_tag_10] [" spaces "] ["spaces"] [""]; + [test_normalize_tag_11] [" #hashtag!@#spaces #__<># "] ["hashtag_spaces"] [""]; + [test_normalize_tag_12] [":testing"] [":testing"] [""]; + [test_normalize_tag_13] ["_foo"] ["foo"] [""]; + [test_normalize_tag_14] [":::test"] [":::test"] [""]; + [test_normalize_tag_15] ["contiguous_____underscores"] ["contiguous_underscores"] [""]; + [test_normalize_tag_16] ["foo_"] ["foo"] [""]; + [test_normalize_tag_17] ["\u{017F}odd_\u{017F}case\u{017F}"] ["\u{017F}odd_\u{017F}case\u{017F}"] [""]; // edge-case + [test_normalize_tag_18] [""] [""] [""]; + [test_normalize_tag_19] [" "] [""] [""]; + [test_normalize_tag_20] ["ok"] ["ok"] [""]; + [test_normalize_tag_21] ["™Ö™Ö™™Ö™"] ["ö_ö_ö"] [""]; + [test_normalize_tag_22] ["AlsO:ök"] ["also:ök"] [""]; + [test_normalize_tag_23] [":still_ok"] [":still_ok"] [""]; + [test_normalize_tag_24] ["___trim"] ["trim"] [""]; + [test_normalize_tag_25] ["12.:trim@"] [":trim"] [""]; + [test_normalize_tag_26] ["12.:trim@@"] [":trim"] [""]; + [test_normalize_tag_27] ["fun:ky__tag/1"] ["fun:ky_tag/1"] [""]; + [test_normalize_tag_28] ["fun:ky@tag/2"] ["fun:ky_tag/2"] [""]; + [test_normalize_tag_29] ["fun:ky@@@tag/3"] ["fun:ky_tag/3"] [""]; + [test_normalize_tag_30] ["tag:1/2.3"] ["tag:1/2.3"] [""]; + [test_normalize_tag_31] ["---fun:k####y_ta@#g/1_@@#"]["fun:k_y_ta_g/1"] [""]; + [test_normalize_tag_32] ["AlsO:œ#@ö))œk"] ["also:œ_ö_œk"] [""]; + [test_normalize_tag_33] ["a".repeat(888).as_str()] ["a".repeat(200)] [""]; + [test_normalize_tag_34] [("a".to_owned() + &"🐶".repeat(799)).as_str()] ["a"] [""]; + [test_normalize_tag_35] [("a".to_string() + &char::REPLACEMENT_CHARACTER.to_string()).as_str()] ["a"] [""]; + [test_normalize_tag_36] [("a".to_string() + &char::REPLACEMENT_CHARACTER.to_string() + &char::REPLACEMENT_CHARACTER.to_string()).as_str()] ["a"] [""]; + [test_normalize_tag_37] [("a".to_string() + &char::REPLACEMENT_CHARACTER.to_string() + &char::REPLACEMENT_CHARACTER.to_string() + "b").as_str()] ["a_b"] [""]; + [test_normalize_tag_38] + ["A00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 000000000000"] + ["a00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000_0"] + [""]; + )] + #[test] + fn test_name() { + match normalize_utils::normalize_tag(input) { + Ok(normalized_tag) => { + assert_eq!(expected_err, ""); + assert_eq!(normalized_tag, expected) + } + Err(err) => { + assert_eq!(format!("{err}"), expected_err); + } + } + } } From 12c80c2450c3087752bc7c0834015b6a4380e432 Mon Sep 17 00:00:00 2001 From: David Lee Date: Mon, 6 Feb 2023 15:58:37 -0800 Subject: [PATCH 10/16] address PR comments --- trace-normalization/src/normalize_utils.rs | 15 ++---- trace-normalization/src/normalizer.rs | 62 +++++++++++----------- 2 files changed, 36 insertions(+), 41 deletions(-) diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs index 567d8661f..e052381ac 100644 --- a/trace-normalization/src/normalize_utils.rs +++ b/trace-normalization/src/normalize_utils.rs @@ -4,14 +4,14 @@ // Datadog, Inc. // DEFAULT_SERVICE_NAME is the default name we assign a service if it's missing and we have no reasonable fallback -pub(crate) const DEFAULT_SERVICE_NAME: &str = "unnamed-service"; +const DEFAULT_SERVICE_NAME: &str = "unnamed-service"; // MAX_NAME_LEN the maximum length a name can have pub(crate) const MAX_NAME_LEN: usize = 100; // MAX_SERVICE_LEN the maximum length a service can have -pub(crate) const MAX_SERVICE_LEN: usize = 100; +const MAX_SERVICE_LEN: usize = 100; // MAX_SERVICE_LEN the maximum length a tag can have -pub(crate) const MAX_TAG_LEN: usize = 200; +const MAX_TAG_LEN: usize = 200; // truncate_utf8 truncates the given string to make sure it uses less than limit bytes. // If the last character is a utf8 character that would be split, it removes it @@ -69,9 +69,7 @@ pub(crate) fn normalize_tag(tag: &str) -> anyhow::Result { let mut result = String::with_capacity(tag.len()); - let char_vec: Vec = tag.chars().collect(); - - for cur_char in char_vec { + for cur_char in tag.chars() { if result.len() == MAX_TAG_LEN { break; } @@ -135,10 +133,7 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool { } for mut i in 0..tag.len() { - let cur_char: char = match tag.chars().nth(i) { - Some(c) => c, - None => return false, - }; + let Some(cur_char) = tag.chars().nth(i) else { return false; }; if is_valid_ascii_tag_char(cur_char) { continue; } diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs index e309c2cdd..345ad80a3 100644 --- a/trace-normalization/src/normalizer.rs +++ b/trace-normalization/src/normalizer.rs @@ -113,7 +113,7 @@ mod tests { use std::collections::HashMap; use std::time::SystemTime; - pub fn new_test_span() -> pb::Span { + fn new_test_span() -> pb::Span { let mut rng = rand::thread_rng(); pb::Span { @@ -137,7 +137,7 @@ mod tests { } #[test] - pub fn test_normalize_name_passes() { + fn test_normalize_name_passes() { let mut test_span = new_test_span(); let before_name = test_span.name.clone(); assert!(normalizer::normalize(&mut test_span).is_ok()); @@ -145,7 +145,7 @@ mod tests { } #[test] - pub fn test_normalize_empty_name() { + fn test_normalize_empty_name() { let mut test_span = new_test_span(); test_span.name = "".to_string(); assert!(normalizer::normalize(&mut test_span).is_ok()); @@ -153,7 +153,7 @@ mod tests { } #[test] - pub fn test_normalize_long_name() { + fn test_normalize_long_name() { let mut test_span = new_test_span(); test_span.name = "CAMEMBERT".repeat(100); assert!(normalizer::normalize(&mut test_span).is_ok()); @@ -161,7 +161,7 @@ mod tests { } #[test] - pub fn test_normalize_name_no_alphanumeric() { + fn test_normalize_name_no_alphanumeric() { let mut test_span = new_test_span(); test_span.name = "/".to_string(); assert!(normalizer::normalize(&mut test_span).is_ok()); @@ -169,7 +169,7 @@ mod tests { } #[test] - pub fn test_normalize_name_for_metrics() { + fn test_normalize_name_for_metrics() { let expected_names = HashMap::from([ ( "pylons.controller".to_string(), @@ -190,7 +190,7 @@ mod tests { } #[test] - pub fn test_normalize_resource_passes() { + fn test_normalize_resource_passes() { let mut test_span = new_test_span(); let before_resource = test_span.resource.clone(); assert!(normalizer::normalize(&mut test_span).is_ok()); @@ -198,7 +198,7 @@ mod tests { } #[test] - pub fn test_normalize_empty_resource() { + fn test_normalize_empty_resource() { let mut test_span = new_test_span(); test_span.resource = "".to_string(); assert!(normalizer::normalize(&mut test_span).is_ok()); @@ -206,7 +206,7 @@ mod tests { } #[test] - pub fn test_normalize_trace_id_passes() { + fn test_normalize_trace_id_passes() { let mut test_span = new_test_span(); let before_trace_id = test_span.trace_id; assert!(normalizer::normalize(&mut test_span).is_ok()); @@ -214,14 +214,14 @@ mod tests { } #[test] - pub fn test_normalize_no_trace_id() { + fn test_normalize_no_trace_id() { let mut test_span = new_test_span(); test_span.trace_id = 0; assert!(normalizer::normalize(&mut test_span).is_err()); } #[test] - pub fn test_normalize_component_to_name() { + fn test_normalize_component_to_name() { let mut test_span = new_test_span(); let before_trace_id = test_span.trace_id; assert!(normalizer::normalize(&mut test_span).is_ok()); @@ -232,7 +232,7 @@ mod tests { // implemented within the normalize function. #[test] - pub fn test_normalize_span_id_passes() { + fn test_normalize_span_id_passes() { let mut test_span = new_test_span(); let before_span_id = test_span.span_id; assert!(normalizer::normalize(&mut test_span).is_ok()); @@ -240,14 +240,14 @@ mod tests { } #[test] - pub fn test_normalize_no_span_id() { + fn test_normalize_no_span_id() { let mut test_span = new_test_span(); test_span.span_id = 0; assert!(normalizer::normalize(&mut test_span).is_err()); } #[test] - pub fn test_normalize_start_passes() { + fn test_normalize_start_passes() { let mut test_span = new_test_span(); let before_start = test_span.start; assert!(normalizer::normalize(&mut test_span).is_ok()); @@ -262,7 +262,7 @@ mod tests { } #[test] - pub fn test_normalize_start_too_small() { + fn test_normalize_start_too_small() { let mut test_span = new_test_span(); test_span.start = 42; @@ -274,7 +274,7 @@ mod tests { } #[test] - pub fn test_normalize_start_too_small_with_large_duration() { + fn test_normalize_start_too_small_with_large_duration() { let mut test_span = new_test_span(); test_span.start = 42; @@ -287,7 +287,7 @@ mod tests { } #[test] - pub fn test_normalize_duration_passes() { + fn test_normalize_duration_passes() { let mut test_span = new_test_span(); let before_duration = test_span.duration; @@ -296,7 +296,7 @@ mod tests { } #[test] - pub fn test_normalize_empty_duration() { + fn test_normalize_empty_duration() { let mut test_span = new_test_span(); test_span.duration = 0; @@ -305,7 +305,7 @@ mod tests { } #[test] - pub fn test_normalize_negative_duration() { + fn test_normalize_negative_duration() { let mut test_span = new_test_span(); test_span.duration = -50; @@ -314,7 +314,7 @@ mod tests { } #[test] - pub fn test_normalize_large_duration() { + fn test_normalize_large_duration() { let mut test_span = new_test_span(); test_span.duration = std::i64::MAX; @@ -323,7 +323,7 @@ mod tests { } #[test] - pub fn test_normalize_error_passes() { + fn test_normalize_error_passes() { let mut test_span = new_test_span(); let before_error = test_span.error; @@ -332,7 +332,7 @@ mod tests { } #[test] - pub fn test_normalize_metrics_passes() { + fn test_normalize_metrics_passes() { let mut test_span = new_test_span(); let before_metrics = test_span.metrics.clone(); @@ -341,7 +341,7 @@ mod tests { } #[test] - pub fn test_normalize_meta_passes() { + fn test_normalize_meta_passes() { let mut test_span = new_test_span(); let before_meta = test_span.meta.clone(); @@ -350,7 +350,7 @@ mod tests { } #[test] - pub fn test_normalize_parent_id_passes() { + fn test_normalize_parent_id_passes() { let mut test_span = new_test_span(); let before_parent_id = test_span.parent_id; @@ -359,7 +359,7 @@ mod tests { } #[test] - pub fn test_normalize_type_passes() { + fn test_normalize_type_passes() { let mut test_span = new_test_span(); let before_type = test_span.r#type.clone(); @@ -368,7 +368,7 @@ mod tests { } #[test] - pub fn test_normalize_type_too_long() { + fn test_normalize_type_too_long() { let mut test_span = new_test_span(); test_span.r#type = "sql".repeat(1000); @@ -377,7 +377,7 @@ mod tests { } #[test] - pub fn test_normalize_service_tag() { + fn test_normalize_service_tag() { let mut test_span = new_test_span(); test_span.service = "retargeting(api-Staging ".to_string(); @@ -386,7 +386,7 @@ mod tests { } #[test] - pub fn test_normalize_env() { + fn test_normalize_env() { let mut test_span = new_test_span(); test_span .meta @@ -397,7 +397,7 @@ mod tests { } #[test] - pub fn test_special_zipkin_root_span() { + fn test_special_zipkin_root_span() { let mut test_span = new_test_span(); test_span.parent_id = 42; test_span.trace_id = 42; @@ -413,7 +413,7 @@ mod tests { } #[test] - pub fn test_normalize_trace_empty() { + fn test_normalize_trace_empty() { let mut test_span = new_test_span(); test_span .meta @@ -424,7 +424,7 @@ mod tests { } #[test] - pub fn test_is_valid_status_code() { + fn test_is_valid_status_code() { assert!(normalizer::is_valid_status_code("100")); assert!(normalizer::is_valid_status_code("599")); assert!(!normalizer::is_valid_status_code("99")); From 7c3a31d039d11eacd0db52d721039ddbcb4e1118 Mon Sep 17 00:00:00 2001 From: David Lee Date: Mon, 6 Feb 2023 15:58:49 -0800 Subject: [PATCH 11/16] modify comment wording --- trace-normalization/src/normalize_utils.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs index e052381ac..72e79004b 100644 --- a/trace-normalization/src/normalize_utils.rs +++ b/trace-normalization/src/normalize_utils.rs @@ -34,14 +34,13 @@ pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str { // fallback_service returns the fallback service name for a service // belonging to language lang. +// In the go agent implementation, if a lang was specified in TagStats +// (extracted from the payload header) the fallback_service name would be "unnamed-{lang}-service". pub(crate) fn fallback_service() -> String { - // In the go agent implementation, if a lang was specified in TagStats - // (extracted from the payload header) the fallback_service name would be "unnamed-{lang}-service". DEFAULT_SERVICE_NAME.to_string() } -// normalize_service normalizes a span service and returns an error describing the reason -// (if any) why the name was modified. +// normalize_service normalizes a span service pub(crate) fn normalize_service(svc: &str) -> anyhow::Result { anyhow::ensure!(!svc.is_empty(), "Normalizer Error: Empty service name."); From 924c6d3292b5f7fd6da612a3f0fb13c7991f79b9 Mon Sep 17 00:00:00 2001 From: David Lee Date: Tue, 7 Feb 2023 08:07:34 -0800 Subject: [PATCH 12/16] remove let ... else --- trace-normalization/src/normalize_utils.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs index 72e79004b..b7244ca34 100644 --- a/trace-normalization/src/normalize_utils.rs +++ b/trace-normalization/src/normalize_utils.rs @@ -132,7 +132,10 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool { } for mut i in 0..tag.len() { - let Some(cur_char) = tag.chars().nth(i) else { return false; }; + let cur_char = match tag.chars().nth(i) { + Some(c) => c, + None => return false, + }; if is_valid_ascii_tag_char(cur_char) { continue; } From 8b1f91adfc012af88e3d16af1a5bf4e99364f573 Mon Sep 17 00:00:00 2001 From: David Lee Date: Wed, 8 Feb 2023 14:37:07 -0800 Subject: [PATCH 13/16] Update trace-normalization/src/normalize_utils.rs Co-authored-by: Levi Morrison --- trace-normalization/src/normalize_utils.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs index b7244ca34..5e1f55cab 100644 --- a/trace-normalization/src/normalize_utils.rs +++ b/trace-normalization/src/normalize_utils.rs @@ -44,11 +44,7 @@ pub(crate) fn fallback_service() -> String { pub(crate) fn normalize_service(svc: &str) -> anyhow::Result { anyhow::ensure!(!svc.is_empty(), "Normalizer Error: Empty service name."); - let truncated_service = if svc.len() > MAX_SERVICE_LEN { - truncate_utf8(svc, MAX_SERVICE_LEN) - } else { - svc - }; + let truncated_service = truncate_utf8(svc, MAX_SERVICE_LEN); normalize_tag(truncated_service) } From 8be8691e8623b5017c1d0545477808f8c8192c88 Mon Sep 17 00:00:00 2001 From: David Lee Date: Wed, 8 Feb 2023 14:43:44 -0800 Subject: [PATCH 14/16] address PR comments --- trace-normalization/src/normalize_utils.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs index 5e1f55cab..a0dc6467f 100644 --- a/trace-normalization/src/normalize_utils.rs +++ b/trace-normalization/src/normalize_utils.rs @@ -127,8 +127,10 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool { None => return false, } - for mut i in 0..tag.len() { - let cur_char = match tag.chars().nth(i) { + let mut tag_iter = tag.chars().peekable(); + + while tag_iter.peek().is_some() { + let cur_char = match tag_iter.next() { Some(c) => c, None => return false, }; @@ -137,8 +139,7 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool { } if cur_char == '_' { // an underscore is only okay if followed by a valid non-underscore character - i += 1; - match tag.chars().nth(i) { + match tag_iter.next() { Some(c) => { if !is_valid_ascii_tag_char(c) { return false; From 912f026c246d273550b055253e8d8a8000513ba9 Mon Sep 17 00:00:00 2001 From: David Lee Date: Wed, 8 Feb 2023 14:55:25 -0800 Subject: [PATCH 15/16] remove .peekable() in is_normalized_ascii_tag func --- trace-normalization/src/normalize_utils.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs index a0dc6467f..e0899550a 100644 --- a/trace-normalization/src/normalize_utils.rs +++ b/trace-normalization/src/normalize_utils.rs @@ -127,13 +127,9 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool { None => return false, } - let mut tag_iter = tag.chars().peekable(); + let mut tag_iter = tag.chars(); - while tag_iter.peek().is_some() { - let cur_char = match tag_iter.next() { - Some(c) => c, - None => return false, - }; + while let Some(cur_char) = tag_iter.next() { if is_valid_ascii_tag_char(cur_char) { continue; } From 0c8de519221684e30c9385b320ecae58730b2229 Mon Sep 17 00:00:00 2001 From: David Lee Date: Wed, 8 Feb 2023 15:01:42 -0800 Subject: [PATCH 16/16] Update normalize_utils.rs --- trace-normalization/src/normalize_utils.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs index e0899550a..3a7e28b7b 100644 --- a/trace-normalization/src/normalize_utils.rs +++ b/trace-normalization/src/normalize_utils.rs @@ -118,7 +118,10 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool { if tag.len() > MAX_TAG_LEN { return false; } - match tag.chars().next() { + + let mut tag_iter = tag.chars(); + + match tag_iter.next() { Some(c) => { if !is_valid_ascii_start_char(c) { return false; @@ -127,8 +130,6 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool { None => return false, } - let mut tag_iter = tag.chars(); - while let Some(cur_char) = tag_iter.next() { if is_valid_ascii_tag_char(cur_char) { continue;