From ab26e7be6c300f53b00284c92afac2972e592817 Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Thu, 2 Feb 2023 15:09:38 -0800
Subject: [PATCH 01/16] add service tag + env tag normalization + unit tests

---
 trace-normalization/src/normalize_utils.rs | 223 ++++++++++++------
 trace-normalization/src/normalizer.rs      | 250 ++++++++++++++++++++-
 2 files changed, 393 insertions(+), 80 deletions(-)
diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs
index 9a0d18e23..85ea9dfaf 100644
--- a/trace-normalization/src/normalize_utils.rs
+++ b/trace-normalization/src/normalize_utils.rs
@@ -3,8 +3,15 @@
 // developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present
 // Datadog, Inc.
 
+// DEFAULT_SERVICE_NAME is the default name we assign a service if it's missing and we have no reasonable fallback
+pub(crate) const DEFAULT_SERVICE_NAME: &str = "unnamed-service";
+
 // MAX_NAME_LEN the maximum length a name can have
 pub(crate) const MAX_NAME_LEN: usize = 100;
+// MAX_SERVICE_LEN the maximum length a service can have
+pub(crate) const MAX_SERVICE_LEN: usize = 100;
+// MAX_SERVICE_LEN the maximum length a tag can have
+pub(crate) const MAX_TAG_LEN: usize = 200;
 
 // TruncateUTF8 truncates the given string to make sure it uses less than limit bytes.
 // If the last character is a utf8 character that would be split, it removes it
@@ -25,23 +32,133 @@ pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str {
     s
 }
 
-// NormalizeService returns a span service or an error describing why normalization failed.
-// TODO: Implement this in a future PR
-// pub fn normalize_service(svc: String, lang: String) -> (String, Option<errors::NormalizeErrors>) {
-// if svc == "" {
-//     return (fallback_service(lang), errors::NormalizeErrors::ErrorEmpty);
-// }
-// if svc.len() > MAX_SERVICE_LEN {
-//     return (truncate_utf8(svc, MAX_SERVICE_LEN), errors::NormalizeErrors::ErrorTooLong.into());
-// }
-// TODO: implement tag normalization
-// let s: String = normalize_tag(svc);
-// if s == "" {
-//     return (fallbackService(lang), errors::NormalizeErrors::ErrorInvalid)
-// }
-// return (s, err)
-// (svc, None)
-// }
+// fallbackService returns the fallback service name for a service
+// belonging to language lang.
+pub(crate) fn fallback_service(lang: String) -> String {
+    if lang.is_empty() {
+		return DEFAULT_SERVICE_NAME.to_string();
+	}
+    let mut service_name = String::new();
+    service_name.push_str("unnamed-");
+    service_name.push_str(&lang);
+    service_name.push_str("-service");
+    // TODO: the original golang implementation uses a map to cache previously created
+    // service names. Implement that here.
+    service_name
+}
+
+// NormalizeService normalizes a span service and returns an error describing the reason
+// (if any) why the name was modified.
+pub(crate) fn normalize_service(svc: &str) -> anyhow::Result<String> {
+    anyhow::ensure!(!svc.is_empty(), "Normalizer Error: Empty service name.");
+
+    let truncated_service = if svc.len() > MAX_SERVICE_LEN as usize {
+        truncate_utf8(svc, MAX_SERVICE_LEN)
+    } else {
+        svc
+    };
+
+    normalize_tag(truncated_service)
+}
+
+// NormalizeTag applies some normalization to ensure the tags match the backend requirements.
+pub(crate) fn normalize_tag(tag: &str) -> anyhow::Result<String> {
+    // Fast path: Check if the tag is valid and only contains ASCII characters,
+	// if yes return it as-is right away. For most use-cases this reduces CPU usage.
+	if is_normalized_ascii_tag(tag) {
+		return Ok(tag.to_string());
+	}
+
+    anyhow::ensure!(!tag.is_empty(), "Normalizer Error: Empty tag name.");
+
+    // given a dummy value
+    let mut last_char: char = 'a';
+
+    let mut result = String::with_capacity(tag.len());
+
+    let char_vec: Vec<char> = tag.chars().collect();
+
+    for cur_char in char_vec {
+        if result.len() == MAX_TAG_LEN as usize {
+            break;
+        }
+        if cur_char.is_lowercase() {
+            result.push(cur_char);
+            last_char = cur_char;
+            continue;
+        }
+        if cur_char.is_uppercase() {
+            let mut iter = cur_char.to_lowercase();
+            if iter.len() == 1 {
+                let c: char = iter.next().unwrap();
+                result.push(c);
+                last_char = c;
+            }
+            continue;
+        }
+        if cur_char.is_alphabetic() {
+            result.push(cur_char);
+            last_char = cur_char;
+            continue;
+        }
+        if cur_char == ':' {
+            result.push(cur_char);
+            last_char = cur_char;
+            continue;
+        }
+        if !result.is_empty() && (cur_char.is_ascii_digit() || cur_char == '.' || cur_char == '/' || cur_char == '-') {
+            result.push(cur_char);
+            last_char = cur_char;
+            continue;
+        }
+        if !result.is_empty() && last_char != '_' {
+            result.push('_');
+            last_char = '_';
+        }
+    }
+
+    if last_char == '_' {
+        result.remove(result.len() - 1);
+    }
+
+    Ok(result.to_string())
+}
+
+pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool {
+    if tag.is_empty() {
+        return true;
+    }
+    if tag.len() > MAX_TAG_LEN as usize {
+        return false;
+    }
+    if !is_valid_ascii_start_char(tag.chars().next().unwrap()) {
+        return false;
+    }
+    for mut i in 0..tag.len() {
+        let b: char = tag.chars().nth(i).unwrap();
+        if is_valid_ascii_tag_char(b) {
+            continue;
+        }
+        if b == '_' {
+            // an underscore is only okay if followed by a valid non-underscore character
+			i+=1;
+			if i == tag.len() || !is_valid_ascii_tag_char(tag.chars().nth(i).unwrap()) {
+				return false;
+			}
+        } else {
+            return false;
+        }
+    }
+    true
+}
+
+pub(crate) fn is_valid_ascii_start_char(c: char) -> bool {
+    ('a'..='z').contains(&c) || c == ':'
+}
+
+pub(crate) fn is_valid_ascii_tag_char(c: char) -> bool {
+    is_valid_ascii_start_char(c) || ('0'..='9').contains(&c) || c == '.' || c == '/' || c == '-'
+}
 
 // normalize_name normalizes a span name or an error describing why normalization failed.
 pub(crate) fn normalize_name(name: &str) -> anyhow::Result<String> {
@@ -56,58 +173,6 @@ pub(crate) fn normalize_name(name: &str) -> anyhow::Result<String> {
     normalize_metric_names(truncated_name)
 }
 
-// TODO: Implement this in a future PR
-// NormalizeTag applies some normalization to ensure the tags match the backend requirements.
-// pub fn normalize_tag(v: String) -> String {
-// Fast path: Check if the tag is valid and only contains ASCII characters,
-// if yes return it as-is right away. For most use-cases this reduces CPU usage.
-// 	if is_normalized_ascii_tag(v.clone()) {
-// 		return v;
-// 	}
-
-//     if v.is_empty() {
-//         return "".to_string();
-//     }
-
-//     "".to_string()
-// }
-
-// pub fn is_normalized_ascii_tag(tag: String) -> bool {
-//     if tag.is_empty() {
-//         return true;
-//     }
-//     if tag.len() > MAX_TAG_LEN {
-//         return false;
-//     }
-//     if !is_valid_ascii_start_char(tag.chars().next().unwrap()) {
-//         return false;
-//     }
-//     for mut i in 0..tag.len() {
-//         let b: char = tag.chars().nth(i).unwrap();
-//         if is_valid_ascii_tag_char(b) {
-//             continue;
-//         }
-//         if b == '_' {
-//             // an underscore is only okay if followed by a valid non-underscore character
-// 			i+=1;
-// 			if i == tag.len() || !is_valid_ascii_tag_char(tag.chars().nth(i).unwrap()) {
-// 				return false;
-// 			}
-//         } else {
-//             return false;
-//         }
-//     }
-//     true
-// }
-
-// pub fn is_valid_ascii_start_char(c: char) -> bool {
-//     ('a'..='z').contains(&c) || c == ':'
-// }
-
-// pub fn is_valid_ascii_tag_char(c: char) -> bool {
-//     is_valid_ascii_start_char(c) || ('0'..='9').contains(&c) || c == '.' || c == '/' || c == '-'
-// }
-
 pub(crate) fn normalize_metric_names(name: &str) -> anyhow::Result<String> {
     let mut result = String::with_capacity(name.len());
 
@@ -190,4 +255,24 @@ mod tests {
             }
         }
     }
+
+    #[duplicate_item(
+        test_name                       input                               expected                    expected_err;
+        [test_normalize_empty_service]   [""]                                [normalize_utils::DEFAULT_SERVICE_NAME]      ["Normalizer Error: Empty service name."];
+        [test_normalize_valid_service]   ["good"]                            ["good"]                    [""];
+        [test_normalize_long_service]    ["Too$Long$.".repeat(20).as_str()]  ["too_long_.".repeat(10)]    [""];
+        [test_normalize_dash_service]    ["bad&service"]                        ["bad_service"]                [""];
+    )]
+    #[test]
+    fn test_name() {
+        match normalize_utils::normalize_service(input) {
+            Ok(val) => {
+                assert_eq!(expected_err, "");
+                assert_eq!(val, expected)
+            },
+            Err(err) => {
+                assert_eq!(format!("{err}"), expected_err);
+            }
+        }
+    }
 }
diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs
index 099904d9d..990d3a741 100644
--- a/trace-normalization/src/normalizer.rs
+++ b/trace-normalization/src/normalizer.rs
@@ -21,9 +21,12 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> {
     anyhow::ensure!(s.trace_id != 0, "TraceID is zero (reason:trace_id_zero)");
     anyhow::ensure!(s.span_id != 0, "SpanID is zero (reason:span_id_zero)");
 
-    // TODO: Implement service name normalizer in future PR
-    // let (svc, _) = normalize_utils::normalize_service(s.service.clone(), "".to_string());
-    // s.service = svc;
+    let normalized_service = match normalize_utils::normalize_service(&s.service) {
+        Ok(service) => service,
+        Err(_) => normalize_utils::fallback_service("".to_string())
+    };
+
+    s.service = normalized_service;
 
     // TODO: check for a feature flag to determine the component tag to become the span name
     // https://github.com/DataDog/datadog-agent/blob/dc88d14851354cada1d15265220a39dce8840dcc/pkg/trace/agent/normalizer.go#L64
@@ -76,14 +79,18 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> {
         s.r#type = normalize_utils::truncate_utf8(&s.r#type, MAX_TYPE_LEN).to_string();
     }
 
-    // TODO: Implement tag normalization in future PR
-    // if s.meta.contains_key("env") {
-    //     let env_tag: String = s.meta.get("env").unwrap().to_string();
-    //     s.meta.insert("env".to_string(), normalize_utils::normalize_tag(env_tag));
-    // }
+    if s.meta.contains_key("env") {
+        let env_tag: &str = s.meta.get("env").unwrap();
+        match normalize_utils::normalize_tag(env_tag) {
+            Ok(normalized_tag) => {
+                s.meta.insert("env".to_string(), normalized_tag);
+            },
+            Err(_) => {}
+        }
+    };
 
     if let Some(code) = s.meta.get("http.status_code") {
-        if !is_valid_status_code(code.to_string()) {
+        if !is_valid_status_code(code) {
             s.meta.remove("http.status_code");
         }
     };
@@ -91,7 +98,7 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> {
     Ok(())
 }
 
-pub(crate) fn is_valid_status_code(sc: String) -> bool {
+pub(crate) fn is_valid_status_code(sc: &str) -> bool {
     if let Ok(code) = sc.parse::<i64>() {
         return (100..600).contains(&code);
     }
@@ -100,12 +107,12 @@ pub(crate) fn is_valid_status_code(sc: String) -> bool {
 
 #[cfg(test)]
 mod tests {
-
     use crate::normalize_utils;
     use crate::normalizer;
     use crate::pb;
     use rand::Rng;
     use std::collections::HashMap;
+    use std::time::SystemTime;
 
     pub fn new_test_span() -> pb::Span {
         let mut rng = rand::thread_rng();
@@ -198,4 +205,225 @@ mod tests {
         assert!(normalizer::normalize(&mut test_span).is_ok());
         assert_eq!(test_span.resource, test_span.name);
     }
+
+    #[test]
+    pub fn test_normalize_trace_id_passes() {
+        let mut test_span = new_test_span();
+        let before_trace_id = test_span.trace_id;
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(before_trace_id, test_span.trace_id);
+    }
+
+    #[test]
+    pub fn test_normalize_no_trace_id() {
+        let mut test_span = new_test_span();
+        test_span.trace_id = 0;
+        assert!(normalizer::normalize(&mut test_span).is_err());
+    }
+
+    #[test]
+    pub fn test_normalize_component_to_name() {
+        let mut test_span = new_test_span();
+        let before_trace_id = test_span.trace_id;
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(before_trace_id, test_span.trace_id);
+    }
+
+    // TODO: Add a unit test for testing Component2Name, one that is 
+    //       implemented within the normalize function.
+
+    #[test]
+    pub fn test_normalize_span_id_passes() {
+        let mut test_span = new_test_span();
+        let before_span_id = test_span.span_id;
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(before_span_id, test_span.span_id);
+    }
+
+    #[test]
+    pub fn test_normalize_no_span_id() {
+        let mut test_span = new_test_span();
+        test_span.span_id = 0;
+        assert!(normalizer::normalize(&mut test_span).is_err());
+    }
+
+    #[test]
+    pub fn test_normalize_start_passes() {
+        let mut test_span = new_test_span();
+        let before_start = test_span.start;
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(before_start, test_span.start);
+    }
+
+    fn get_current_time() -> i64 {
+        SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_nanos() as i64
+    }
+
+    #[test]
+    pub fn test_normalize_start_too_small() {
+        let mut test_span = new_test_span();
+
+        test_span.start = 42;
+        let min_start = get_current_time() - test_span.duration;
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert!(test_span.start >= min_start);
+        assert!(test_span.start <= get_current_time());
+    }
+
+    #[test]
+    pub fn test_normalize_start_too_small_with_large_duration() {
+        let mut test_span = new_test_span();
+        
+        test_span.start = 42;
+        test_span.duration = get_current_time() * 2;
+        let min_start = get_current_time();
+        
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert!(test_span.start >= min_start); // start should have been reset to current time
+        assert!(test_span.start <= get_current_time()); //start should have been reset to current time
+    }
+
+    #[test]
+    pub fn test_normalize_duration_passes() {
+        let mut test_span = new_test_span();
+        let before_duration = test_span.duration;
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(before_duration, test_span.duration);
+    }
+
+    #[test]
+    pub fn test_normalize_empty_duration() {
+        let mut test_span = new_test_span();
+        test_span.duration = 0;
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(test_span.duration, 0);
+    }
+
+    #[test]
+    pub fn test_normalize_negative_duration() {
+        let mut test_span = new_test_span();
+        test_span.duration = -50;
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(test_span.duration, 0);
+    }
+
+    #[test]
+    pub fn test_normalize_large_duration() {
+        let mut test_span = new_test_span();
+        test_span.duration = std::i64::MAX;
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(test_span.duration, 0);
+    }
+
+    #[test]
+    pub fn test_normalize_error_passes() {
+        let mut test_span = new_test_span();
+        let before_error = test_span.error;
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(before_error, test_span.error);
+    }
+
+    #[test]
+    pub fn test_normalize_metrics_passes() {
+        let mut test_span = new_test_span();
+        let before_metrics = test_span.metrics.clone();
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(before_metrics, test_span.metrics);
+    }
+
+    #[test]
+    pub fn test_normalize_meta_passes() {
+        let mut test_span = new_test_span();
+        let before_meta = test_span.meta.clone();
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(before_meta, test_span.meta);
+    }
+
+    #[test]
+    pub fn test_normalize_parent_id_passes() {
+        let mut test_span = new_test_span();
+        let before_parent_id = test_span.parent_id;
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(before_parent_id, test_span.parent_id);
+    }
+
+    #[test]
+    pub fn test_normalize_type_passes() {
+        let mut test_span = new_test_span();
+        let before_type = test_span.r#type.clone();
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(before_type, test_span.r#type);
+    }
+
+    #[test]
+    pub fn test_normalize_type_too_long() {
+        let mut test_span = new_test_span();
+        test_span.r#type = "sql".repeat(1000);
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(test_span.r#type.len(), normalizer::MAX_TYPE_LEN as usize);
+    }
+
+    #[test]
+    pub fn test_normalize_service_tag() {
+        let mut test_span = new_test_span();
+        test_span.service = "retargeting(api-Staging ".to_string();
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(test_span.service, "retargeting_api-staging");
+    }
+
+    #[test]
+    pub fn test_normalize_env() {
+        let mut test_span = new_test_span();
+        test_span.meta.insert("env".to_string(), "DEVELOPMENT".to_string());
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!("development", test_span.meta.get("env").unwrap());
+    }
+
+    #[test]
+    pub fn test_special_zipkin_root_span() {
+        let mut test_span = new_test_span();
+        test_span.parent_id = 42;
+        test_span.trace_id = 42;
+        test_span.span_id = 42;
+
+        let before_trace_id = test_span.trace_id;
+        let before_span_id = test_span.span_id;
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!(test_span.parent_id, 0);
+        assert_eq!(test_span.trace_id, before_trace_id);
+        assert_eq!(test_span.span_id, before_span_id);
+
+    }
+
+    #[test]
+    pub fn test_normalize_trace_empty() {
+        let mut test_span = new_test_span();
+        test_span.meta.insert("env".to_string(), "DEVELOPMENT".to_string());
+
+        assert!(normalizer::normalize(&mut test_span).is_ok());
+        assert_eq!("development", test_span.meta.get("env").unwrap());
+    }
+
+    #[test]
+    pub fn test_is_valid_status_code() {
+        assert!(normalizer::is_valid_status_code("100"));
+        assert!(normalizer::is_valid_status_code("599"));
+        assert!(!normalizer::is_valid_status_code("99"));
+        assert!(!normalizer::is_valid_status_code("600"));
+        assert!(!normalizer::is_valid_status_code("Invalid status code"));
+    }
 }

From 38d0c621e67ee503f73ee26c06e60d159fee104e Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Thu, 2 Feb 2023 15:20:29 -0800
Subject: [PATCH 02/16] Update normalizer.rs

---
 trace-normalization/src/normalizer.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs
index 990d3a741..b799ae1fd 100644
--- a/trace-normalization/src/normalizer.rs
+++ b/trace-normalization/src/normalizer.rs
@@ -28,7 +28,7 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> {
 
     s.service = normalized_service;
 
-    // TODO: check for a feature flag to determine the component tag to become the span name
+    // TODO: component2name: check for a feature flag to determine the component tag to become the span name
     // https://github.com/DataDog/datadog-agent/blob/dc88d14851354cada1d15265220a39dce8840dcc/pkg/trace/agent/normalizer.go#L64
 
     let normalized_name = match normalize_utils::normalize_name(&s.name) {
@@ -107,6 +107,7 @@ pub(crate) fn is_valid_status_code(sc: &str) -> bool {
 
 #[cfg(test)]
 mod tests {
+
     use crate::normalize_utils;
     use crate::normalizer;
     use crate::pb;

From 4058d5c340bab1dc789ef88619a8263f2334972b Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Thu, 2 Feb 2023 15:23:10 -0800
Subject: [PATCH 03/16] lint

---
 trace-normalization/src/normalize_utils.rs | 32 ++++++++++++----------
 trace-normalization/src/normalizer.rs      | 31 +++++++++++----------
 2 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs
index 85ea9dfaf..3ebd58169 100644
--- a/trace-normalization/src/normalize_utils.rs
+++ b/trace-normalization/src/normalize_utils.rs
@@ -36,8 +36,8 @@ pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str {
 // belonging to language lang.
 pub(crate) fn fallback_service(lang: String) -> String {
     if lang.is_empty() {
-		return DEFAULT_SERVICE_NAME.to_string();
-	}
+        return DEFAULT_SERVICE_NAME.to_string();
+    }
     let mut service_name = String::new();
     service_name.push_str("unnamed-");
     service_name.push_str(&lang);
@@ -52,7 +52,7 @@ pub(crate) fn fallback_service(lang: String) -> String {
 pub(crate) fn normalize_service(svc: &str) -> anyhow::Result<String> {
     anyhow::ensure!(!svc.is_empty(), "Normalizer Error: Empty service name.");
 
-    let truncated_service = if svc.len() > MAX_SERVICE_LEN as usize {
+    let truncated_service = if svc.len() > MAX_SERVICE_LEN {
         truncate_utf8(svc, MAX_SERVICE_LEN)
     } else {
         svc
@@ -64,10 +64,10 @@ pub(crate) fn normalize_service(svc: &str) -> anyhow::Result<String> {
 // NormalizeTag applies some normalization to ensure the tags match the backend requirements.
 pub(crate) fn normalize_tag(tag: &str) -> anyhow::Result<String> {
     // Fast path: Check if the tag is valid and only contains ASCII characters,
-	// if yes return it as-is right away. For most use-cases this reduces CPU usage.
-	if is_normalized_ascii_tag(tag) {
-		return Ok(tag.to_string());
-	}
+    // if yes return it as-is right away. For most use-cases this reduces CPU usage.
+    if is_normalized_ascii_tag(tag) {
+        return Ok(tag.to_string());
+    }
 
     anyhow::ensure!(!tag.is_empty(), "Normalizer Error: Empty tag name.");
 
@@ -79,7 +79,7 @@ pub(crate) fn normalize_tag(tag: &str) -> anyhow::Result<String> {
     let char_vec: Vec<char> = tag.chars().collect();
 
     for cur_char in char_vec {
-        if result.len() == MAX_TAG_LEN as usize {
+        if result.len() == MAX_TAG_LEN {
             break;
         }
         if cur_char.is_lowercase() {
@@ -106,7 +106,9 @@ pub(crate) fn normalize_tag(tag: &str) -> anyhow::Result<String> {
             last_char = cur_char;
             continue;
         }
-        if !result.is_empty() && (cur_char.is_ascii_digit() || cur_char == '.' || cur_char == '/' || cur_char == '-') {
+        if !result.is_empty()
+            && (cur_char.is_ascii_digit() || cur_char == '.' || cur_char == '/' || cur_char == '-')
+        {
             result.push(cur_char);
             last_char = cur_char;
             continue;
@@ -128,7 +130,7 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool {
     if tag.is_empty() {
         return true;
     }
-    if tag.len() > MAX_TAG_LEN as usize {
+    if tag.len() > MAX_TAG_LEN {
         return false;
     }
     if !is_valid_ascii_start_char(tag.chars().next().unwrap()) {
@@ -141,10 +143,10 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool {
         }
         if b == '_' {
             // an underscore is only okay if followed by a valid non-underscore character
-			i+=1;
-			if i == tag.len() || !is_valid_ascii_tag_char(tag.chars().nth(i).unwrap()) {
-				return false;
-			}
+            i += 1;
+            if i == tag.len() || !is_valid_ascii_tag_char(tag.chars().nth(i).unwrap()) {
+                return false;
+            }
         } else {
             return false;
         }
@@ -269,7 +271,7 @@ mod tests {
             Ok(val) => {
                 assert_eq!(expected_err, "");
                 assert_eq!(val, expected)
-            },
+            }
             Err(err) => {
                 assert_eq!(format!("{err}"), expected_err);
             }
diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs
index b799ae1fd..1b8f04084 100644
--- a/trace-normalization/src/normalizer.rs
+++ b/trace-normalization/src/normalizer.rs
@@ -23,7 +23,7 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> {
 
     let normalized_service = match normalize_utils::normalize_service(&s.service) {
         Ok(service) => service,
-        Err(_) => normalize_utils::fallback_service("".to_string())
+        Err(_) => normalize_utils::fallback_service("".to_string()),
     };
 
     s.service = normalized_service;
@@ -81,11 +81,8 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> {
 
     if s.meta.contains_key("env") {
         let env_tag: &str = s.meta.get("env").unwrap();
-        match normalize_utils::normalize_tag(env_tag) {
-            Ok(normalized_tag) => {
-                s.meta.insert("env".to_string(), normalized_tag);
-            },
-            Err(_) => {}
+        if let Ok(normalized_tag) = normalize_utils::normalize_tag(env_tag) {
+            s.meta.insert("env".to_string(), normalized_tag);
         }
     };
 
@@ -230,7 +227,7 @@ mod tests {
         assert_eq!(before_trace_id, test_span.trace_id);
     }
 
-    // TODO: Add a unit test for testing Component2Name, one that is 
+    // TODO: Add a unit test for testing Component2Name, one that is
     //       implemented within the normalize function.
 
     #[test]
@@ -257,7 +254,10 @@ mod tests {
     }
 
     fn get_current_time() -> i64 {
-        SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_nanos() as i64
+        SystemTime::now()
+            .duration_since(SystemTime::UNIX_EPOCH)
+            .unwrap()
+            .as_nanos() as i64
     }
 
     #[test]
@@ -275,11 +275,11 @@ mod tests {
     #[test]
     pub fn test_normalize_start_too_small_with_large_duration() {
         let mut test_span = new_test_span();
-        
+
         test_span.start = 42;
         test_span.duration = get_current_time() * 2;
         let min_start = get_current_time();
-        
+
         assert!(normalizer::normalize(&mut test_span).is_ok());
         assert!(test_span.start >= min_start); // start should have been reset to current time
         assert!(test_span.start <= get_current_time()); //start should have been reset to current time
@@ -372,7 +372,7 @@ mod tests {
         test_span.r#type = "sql".repeat(1000);
 
         assert!(normalizer::normalize(&mut test_span).is_ok());
-        assert_eq!(test_span.r#type.len(), normalizer::MAX_TYPE_LEN as usize);
+        assert_eq!(test_span.r#type.len(), normalizer::MAX_TYPE_LEN);
     }
 
     #[test]
@@ -387,7 +387,9 @@ mod tests {
     #[test]
     pub fn test_normalize_env() {
         let mut test_span = new_test_span();
-        test_span.meta.insert("env".to_string(), "DEVELOPMENT".to_string());
+        test_span
+            .meta
+            .insert("env".to_string(), "DEVELOPMENT".to_string());
 
         assert!(normalizer::normalize(&mut test_span).is_ok());
         assert_eq!("development", test_span.meta.get("env").unwrap());
@@ -407,13 +409,14 @@ mod tests {
         assert_eq!(test_span.parent_id, 0);
         assert_eq!(test_span.trace_id, before_trace_id);
         assert_eq!(test_span.span_id, before_span_id);
-
     }
 
     #[test]
     pub fn test_normalize_trace_empty() {
         let mut test_span = new_test_span();
-        test_span.meta.insert("env".to_string(), "DEVELOPMENT".to_string());
+        test_span
+            .meta
+            .insert("env".to_string(), "DEVELOPMENT".to_string());
 
         assert!(normalizer::normalize(&mut test_span).is_ok());
         assert_eq!("development", test_span.meta.get("env").unwrap());

From 2eb29407b79c3ba077f486f519777a6c3bde6fde Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Thu, 2 Feb 2023 15:29:31 -0800
Subject: [PATCH 04/16] remove .unwrap() outside of unit tests

---
 trace-normalization/src/normalizer.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs
index 1b8f04084..5b8484e8d 100644
--- a/trace-normalization/src/normalizer.rs
+++ b/trace-normalization/src/normalizer.rs
@@ -80,9 +80,10 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> {
     }
 
     if s.meta.contains_key("env") {
-        let env_tag: &str = s.meta.get("env").unwrap();
-        if let Ok(normalized_tag) = normalize_utils::normalize_tag(env_tag) {
-            s.meta.insert("env".to_string(), normalized_tag);
+        if let Some(env_tag) = s.meta.get("env") {
+            if let Ok(normalized_tag) = normalize_utils::normalize_tag(env_tag) {
+                s.meta.insert("env".to_string(), normalized_tag);
+            }
         }
     };
 

From 4236bd53ba46b9c7484d0e96d5170e0f23007155 Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Mon, 6 Feb 2023 08:19:43 -0800
Subject: [PATCH 05/16] change fallback_service to take an &str

---
 trace-normalization/src/normalize_utils.rs | 4 ++--
 trace-normalization/src/normalizer.rs      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs
index 3ebd58169..7676a41c0 100644
--- a/trace-normalization/src/normalize_utils.rs
+++ b/trace-normalization/src/normalize_utils.rs
@@ -34,13 +34,13 @@ pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str {
 
 // fallbackService returns the fallback service name for a service
 // belonging to language lang.
-pub(crate) fn fallback_service(lang: String) -> String {
+pub(crate) fn fallback_service(lang: &str) -> String {
     if lang.is_empty() {
         return DEFAULT_SERVICE_NAME.to_string();
     }
     let mut service_name = String::new();
     service_name.push_str("unnamed-");
-    service_name.push_str(&lang);
+    service_name.push_str(lang);
     service_name.push_str("-service");
     // TODO: the original golang implementation uses a map to cache previously created
     // service names. Implement that here.
diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs
index 5b8484e8d..9eee4d5d4 100644
--- a/trace-normalization/src/normalizer.rs
+++ b/trace-normalization/src/normalizer.rs
@@ -23,7 +23,7 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> {
 
     let normalized_service = match normalize_utils::normalize_service(&s.service) {
         Ok(service) => service,
-        Err(_) => normalize_utils::fallback_service("".to_string()),
+        Err(_) => normalize_utils::fallback_service(""),
     };
 
     s.service = normalized_service;

From aca86becad04388d61b971db3ebe073080648b3d Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Mon, 6 Feb 2023 08:26:26 -0800
Subject: [PATCH 06/16] modify fallback service

---
 trace-normalization/src/normalize_utils.rs | 11 +++--------
 trace-normalization/src/normalizer.rs      |  2 +-
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs
index 7676a41c0..6d977cfec 100644
--- a/trace-normalization/src/normalize_utils.rs
+++ b/trace-normalization/src/normalize_utils.rs
@@ -34,17 +34,12 @@ pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str {
 
 // fallbackService returns the fallback service name for a service
 // belonging to language lang.
-pub(crate) fn fallback_service(lang: &str) -> String {
+pub(crate) fn fallback_service() -> String {
     if lang.is_empty() {
         return DEFAULT_SERVICE_NAME.to_string();
     }
-    let mut service_name = String::new();
-    service_name.push_str("unnamed-");
-    service_name.push_str(lang);
-    service_name.push_str("-service");
-    // TODO: the original golang implementation uses a map to cache previously created
-    // service names. Implement that here.
-    service_name
+    // In the go agent implementation, if a lang was specified in TagStats
+    // (extracted from the payload header) the fallback_service name would be "unnamed-{lang}-service". 
 }
 
 // NormalizeService normalizes a span service and returns an error describing the reason
diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs
index 9eee4d5d4..e309c2cdd 100644
--- a/trace-normalization/src/normalizer.rs
+++ b/trace-normalization/src/normalizer.rs
@@ -23,7 +23,7 @@ pub fn normalize(s: &mut pb::Span) -> anyhow::Result<()> {
 
     let normalized_service = match normalize_utils::normalize_service(&s.service) {
         Ok(service) => service,
-        Err(_) => normalize_utils::fallback_service(""),
+        Err(_) => normalize_utils::fallback_service(),
     };
 
     s.service = normalized_service;

From 263e1065423ea72b1d1f3ffcf5ebfb9a641d6839 Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Mon, 6 Feb 2023 08:27:10 -0800
Subject: [PATCH 07/16] lint

---
 trace-normalization/src/normalize_utils.rs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs
index 6d977cfec..6f6f6e6c2 100644
--- a/trace-normalization/src/normalize_utils.rs
+++ b/trace-normalization/src/normalize_utils.rs
@@ -35,11 +35,9 @@ pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str {
 // fallbackService returns the fallback service name for a service
 // belonging to language lang.
 pub(crate) fn fallback_service() -> String {
-    if lang.is_empty() {
-        return DEFAULT_SERVICE_NAME.to_string();
-    }
     // In the go agent implementation, if a lang was specified in TagStats
-    // (extracted from the payload header) the fallback_service name would be "unnamed-{lang}-service". 
+    // (extracted from the payload header) the fallback_service name would be "unnamed-{lang}-service".
+    DEFAULT_SERVICE_NAME.to_string()
 }
 
 // NormalizeService normalizes a span service and returns an error describing the reason

From 659c309396eded9469a0bf1e8d33a374088a3ce4 Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Mon, 6 Feb 2023 09:00:44 -0800
Subject: [PATCH 08/16] remove more .unwrap()

---
 trace-normalization/src/normalize_utils.rs | 33 +++++++++++++++-------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs
index 6f6f6e6c2..df63e7f78 100644
--- a/trace-normalization/src/normalize_utils.rs
+++ b/trace-normalization/src/normalize_utils.rs
@@ -82,8 +82,7 @@ pub(crate) fn normalize_tag(tag: &str) -> anyhow::Result<String> {
         }
         if cur_char.is_uppercase() {
             let mut iter = cur_char.to_lowercase();
-            if iter.len() == 1 {
-                let c: char = iter.next().unwrap();
+            if let Some(c) = iter.next() {
                 result.push(c);
                 last_char = c;
             }
@@ -126,20 +125,34 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool {
     if tag.len() > MAX_TAG_LEN {
         return false;
     }
-    if !is_valid_ascii_start_char(tag.chars().next().unwrap()) {
-        return false;
+    match tag.chars().next() {
+        Some(c) => {
+            if !is_valid_ascii_start_char(c) {
+                return false;
+            }
+        }
+        None => return false,
     }
+
     for mut i in 0..tag.len() {
-        let b: char = tag.chars().nth(i).unwrap();
-        if is_valid_ascii_tag_char(b) {
+        let cur_char: char = match tag.chars().nth(i) {
+            Some(c) => c,
+            None => return false,
+        };
+        if is_valid_ascii_tag_char(cur_char) {
             continue;
         }
-        if b == '_' {
+        if cur_char == '_' {
             // an underscore is only okay if followed by a valid non-underscore character
             i += 1;
-            if i == tag.len() || !is_valid_ascii_tag_char(tag.chars().nth(i).unwrap()) {
-                return false;
-            }
+            match tag.chars().nth(i) {
+                Some(c) => {
+                    if !is_valid_ascii_tag_char(c) {
+                        return false;
+                    }
+                }
+                None => return false,
+            };
         } else {
             return false;
         }

From 0e35bbd9b8effc30276df96bda600db720b2d8a7 Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Mon, 6 Feb 2023 09:49:40 -0800
Subject: [PATCH 09/16] add mountain of tag normalization unit tests

---
 trace-normalization/src/normalize_utils.rs | 74 +++++++++++++++++++---
 1 file changed, 65 insertions(+), 9 deletions(-)

diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs
index df63e7f78..567d8661f 100644
--- a/trace-normalization/src/normalize_utils.rs
+++ b/trace-normalization/src/normalize_utils.rs
@@ -13,7 +13,7 @@ pub(crate) const MAX_SERVICE_LEN: usize = 100;
 // MAX_SERVICE_LEN the maximum length a tag can have
 pub(crate) const MAX_TAG_LEN: usize = 200;
 
-// TruncateUTF8 truncates the given string to make sure it uses less than limit bytes.
+// truncate_utf8 truncates the given string to make sure it uses less than limit bytes.
 // If the last character is a utf8 character that would be split, it removes it
 // entirely to make sure the resulting string is not broken.
 pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str {
@@ -32,7 +32,7 @@ pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str {
     s
 }
 
-// fallbackService returns the fallback service name for a service
+// fallback_service returns the fallback service name for a service
 // belonging to language lang.
 pub(crate) fn fallback_service() -> String {
     // In the go agent implementation, if a lang was specified in TagStats
@@ -40,7 +40,7 @@ pub(crate) fn fallback_service() -> String {
     DEFAULT_SERVICE_NAME.to_string()
 }
 
-// NormalizeService normalizes a span service and returns an error describing the reason
+// normalize_service normalizes a span service and returns an error describing the reason
 // (if any) why the name was modified.
 pub(crate) fn normalize_service(svc: &str) -> anyhow::Result<String> {
     anyhow::ensure!(!svc.is_empty(), "Normalizer Error: Empty service name.");
@@ -54,7 +54,7 @@ pub(crate) fn normalize_service(svc: &str) -> anyhow::Result<String> {
     normalize_tag(truncated_service)
 }
 
-// NormalizeTag applies some normalization to ensure the tags match the backend requirements.
+// normalize_tag applies some normalization to ensure the tags match the backend requirements.
 pub(crate) fn normalize_tag(tag: &str) -> anyhow::Result<String> {
     // Fast path: Check if the tag is valid and only contains ASCII characters,
     // if yes return it as-is right away. For most use-cases this reduces CPU usage.
@@ -265,11 +265,11 @@ mod tests {
     }
 
     #[duplicate_item(
-        test_name                       input                               expected                    expected_err;
-        [test_normalize_empty_service]   [""]                                [normalize_utils::DEFAULT_SERVICE_NAME]      ["Normalizer Error: Empty service name."];
-        [test_normalize_valid_service]   ["good"]                            ["good"]                    [""];
-        [test_normalize_long_service]    ["Too$Long$.".repeat(20).as_str()]  ["too_long_.".repeat(10)]    [""];
-        [test_normalize_dash_service]    ["bad&service"]                        ["bad_service"]                [""];
+        test_name                       input                               expected                                expected_err;
+        [test_normalize_empty_service]  [""]                                [normalize_utils::DEFAULT_SERVICE_NAME] ["Normalizer Error: Empty service name."];
+        [test_normalize_valid_service]  ["good"]                            ["good"]                                [""];
+        [test_normalize_long_service]   ["Too$Long$.".repeat(20).as_str()]  ["too_long_.".repeat(10)]               [""];
+        [test_normalize_dash_service]   ["bad&service"]                     ["bad_service"]                         [""];
     )]
     #[test]
     fn test_name() {
@@ -283,4 +283,60 @@ mod tests {
             }
         }
     }
+    #[duplicate_item(
+        test_name               input   expected    expected_err;
+        [test_normalize_tag_1]  ["#test_starting_hash"] ["test_starting_hash"] [""];
+        [test_normalize_tag_2]  ["TestCAPSandSuch"] ["testcapsandsuch"] [""];
+        [test_normalize_tag_3]  ["Test Conversion Of Weird !@#$%^&**() Characters"] ["test_conversion_of_weird_characters"] [""];
+        [test_normalize_tag_4]  ["$#weird_starting"] ["weird_starting"] [""];
+        [test_normalize_tag_5]  ["allowed:c0l0ns"] ["allowed:c0l0ns"] [""];
+        [test_normalize_tag_6]  ["1love"] ["love"] [""];
+        [test_normalize_tag_7]  ["ünicöde"] ["ünicöde"] [""];
+        [test_normalize_tag_8]  ["ünicöde:metäl"] ["ünicöde:metäl"] [""];
+        [test_normalize_tag_9]  ["Data🐨dog🐶 繋がっ⛰てて"] ["data_dog_繋がっ_てて"] [""];
+        [test_normalize_tag_10] [" spaces   "] ["spaces"] [""];
+        [test_normalize_tag_11] [" #hashtag!@#spaces #__<>#  "] ["hashtag_spaces"] [""];
+        [test_normalize_tag_12] [":testing"] [":testing"] [""];
+        [test_normalize_tag_13] ["_foo"] ["foo"] [""];
+        [test_normalize_tag_14] [":::test"] [":::test"] [""];
+        [test_normalize_tag_15] ["contiguous_____underscores"] ["contiguous_underscores"] [""];
+        [test_normalize_tag_16] ["foo_"] ["foo"] [""];
+        [test_normalize_tag_17] ["\u{017F}odd_\u{017F}case\u{017F}"] ["\u{017F}odd_\u{017F}case\u{017F}"]  [""]; // edge-case
+        [test_normalize_tag_18] [""] [""] [""];
+        [test_normalize_tag_19] [" "] [""] [""];
+        [test_normalize_tag_20] ["ok"] ["ok"] [""];
+        [test_normalize_tag_21] ["™Ö™Ö™™Ö™"] ["ö_ö_ö"] [""];
+        [test_normalize_tag_22] ["AlsO:ök"] ["also:ök"] [""];
+        [test_normalize_tag_23] [":still_ok"] [":still_ok"] [""];
+        [test_normalize_tag_24] ["___trim"] ["trim"] [""];
+        [test_normalize_tag_25] ["12.:trim@"] [":trim"] [""];
+        [test_normalize_tag_26] ["12.:trim@@"] [":trim"] [""];
+        [test_normalize_tag_27] ["fun:ky__tag/1"] ["fun:ky_tag/1"] [""];
+        [test_normalize_tag_28] ["fun:ky@tag/2"] ["fun:ky_tag/2"] [""];
+        [test_normalize_tag_29] ["fun:ky@@@tag/3"] ["fun:ky_tag/3"] [""];
+        [test_normalize_tag_30] ["tag:1/2.3"] ["tag:1/2.3"] [""];
+        [test_normalize_tag_31] ["---fun:k####y_ta@#g/1_@@#"]["fun:k_y_ta_g/1"] [""];
+        [test_normalize_tag_32] ["AlsO:œ#@ö))œk"] ["also:œ_ö_œk"] [""];
+        [test_normalize_tag_33] ["a".repeat(888).as_str()] ["a".repeat(200)] [""];
+        [test_normalize_tag_34] [("a".to_owned() + &"🐶".repeat(799)).as_str()] ["a"] [""];
+        [test_normalize_tag_35] [("a".to_string() + &char::REPLACEMENT_CHARACTER.to_string()).as_str()] ["a"] [""];
+        [test_normalize_tag_36] [("a".to_string() + &char::REPLACEMENT_CHARACTER.to_string() + &char::REPLACEMENT_CHARACTER.to_string()).as_str()] ["a"] [""];
+        [test_normalize_tag_37] [("a".to_string() + &char::REPLACEMENT_CHARACTER.to_string() + &char::REPLACEMENT_CHARACTER.to_string() + "b").as_str()] ["a_b"] [""];
+        [test_normalize_tag_38]
+            ["A00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 000000000000"]
+            ["a00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000_0"]
+            [""];
+    )]
+    #[test]
+    fn test_name() {
+        match normalize_utils::normalize_tag(input) {
+            Ok(normalized_tag) => {
+                assert_eq!(expected_err, "");
+                assert_eq!(normalized_tag, expected)
+            }
+            Err(err) => {
+                assert_eq!(format!("{err}"), expected_err);
+            }
+        }
+    }
 }

From 12c80c2450c3087752bc7c0834015b6a4380e432 Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Mon, 6 Feb 2023 15:58:37 -0800
Subject: [PATCH 10/16] address PR comments

---
 trace-normalization/src/normalize_utils.rs | 15 ++----
 trace-normalization/src/normalizer.rs      | 62 +++++++++++-----------
 2 files changed, 36 insertions(+), 41 deletions(-)

diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs
index 567d8661f..e052381ac 100644
--- a/trace-normalization/src/normalize_utils.rs
+++ b/trace-normalization/src/normalize_utils.rs
@@ -4,14 +4,14 @@
 // Datadog, Inc.
 
 // DEFAULT_SERVICE_NAME is the default name we assign a service if it's missing and we have no reasonable fallback
-pub(crate) const DEFAULT_SERVICE_NAME: &str = "unnamed-service";
+const DEFAULT_SERVICE_NAME: &str = "unnamed-service";
 
 // MAX_NAME_LEN the maximum length a name can have
 pub(crate) const MAX_NAME_LEN: usize = 100;
 // MAX_SERVICE_LEN the maximum length a service can have
-pub(crate) const MAX_SERVICE_LEN: usize = 100;
+const MAX_SERVICE_LEN: usize = 100;
 // MAX_SERVICE_LEN the maximum length a tag can have
-pub(crate) const MAX_TAG_LEN: usize = 200;
+const MAX_TAG_LEN: usize = 200;
 
 // truncate_utf8 truncates the given string to make sure it uses less than limit bytes.
 // If the last character is a utf8 character that would be split, it removes it
@@ -69,9 +69,7 @@ pub(crate) fn normalize_tag(tag: &str) -> anyhow::Result<String> {
 
     let mut result = String::with_capacity(tag.len());
 
-    let char_vec: Vec<char> = tag.chars().collect();
-
-    for cur_char in char_vec {
+    for cur_char in tag.chars() {
         if result.len() == MAX_TAG_LEN {
             break;
         }
@@ -135,10 +133,7 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool {
     }
 
     for mut i in 0..tag.len() {
-        let cur_char: char = match tag.chars().nth(i) {
-            Some(c) => c,
-            None => return false,
-        };
+        let Some(cur_char) = tag.chars().nth(i) else { return false; };
         if is_valid_ascii_tag_char(cur_char) {
             continue;
         }
diff --git a/trace-normalization/src/normalizer.rs b/trace-normalization/src/normalizer.rs
index e309c2cdd..345ad80a3 100644
--- a/trace-normalization/src/normalizer.rs
+++ b/trace-normalization/src/normalizer.rs
@@ -113,7 +113,7 @@ mod tests {
     use std::collections::HashMap;
     use std::time::SystemTime;
 
-    pub fn new_test_span() -> pb::Span {
+    fn new_test_span() -> pb::Span {
         let mut rng = rand::thread_rng();
 
         pb::Span {
@@ -137,7 +137,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_name_passes() {
+    fn test_normalize_name_passes() {
         let mut test_span = new_test_span();
         let before_name = test_span.name.clone();
         assert!(normalizer::normalize(&mut test_span).is_ok());
@@ -145,7 +145,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_empty_name() {
+    fn test_normalize_empty_name() {
         let mut test_span = new_test_span();
         test_span.name = "".to_string();
         assert!(normalizer::normalize(&mut test_span).is_ok());
@@ -153,7 +153,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_long_name() {
+    fn test_normalize_long_name() {
         let mut test_span = new_test_span();
         test_span.name = "CAMEMBERT".repeat(100);
         assert!(normalizer::normalize(&mut test_span).is_ok());
@@ -161,7 +161,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_name_no_alphanumeric() {
+    fn test_normalize_name_no_alphanumeric() {
         let mut test_span = new_test_span();
         test_span.name = "/".to_string();
         assert!(normalizer::normalize(&mut test_span).is_ok());
@@ -169,7 +169,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_name_for_metrics() {
+    fn test_normalize_name_for_metrics() {
         let expected_names = HashMap::from([
             (
                 "pylons.controller".to_string(),
@@ -190,7 +190,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_resource_passes() {
+    fn test_normalize_resource_passes() {
         let mut test_span = new_test_span();
         let before_resource = test_span.resource.clone();
         assert!(normalizer::normalize(&mut test_span).is_ok());
@@ -198,7 +198,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_empty_resource() {
+    fn test_normalize_empty_resource() {
         let mut test_span = new_test_span();
         test_span.resource = "".to_string();
         assert!(normalizer::normalize(&mut test_span).is_ok());
@@ -206,7 +206,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_trace_id_passes() {
+    fn test_normalize_trace_id_passes() {
         let mut test_span = new_test_span();
         let before_trace_id = test_span.trace_id;
         assert!(normalizer::normalize(&mut test_span).is_ok());
@@ -214,14 +214,14 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_no_trace_id() {
+    fn test_normalize_no_trace_id() {
         let mut test_span = new_test_span();
         test_span.trace_id = 0;
         assert!(normalizer::normalize(&mut test_span).is_err());
     }
 
     #[test]
-    pub fn test_normalize_component_to_name() {
+    fn test_normalize_component_to_name() {
         let mut test_span = new_test_span();
         let before_trace_id = test_span.trace_id;
         assert!(normalizer::normalize(&mut test_span).is_ok());
@@ -232,7 +232,7 @@ mod tests {
     //       implemented within the normalize function.
 
     #[test]
-    pub fn test_normalize_span_id_passes() {
+    fn test_normalize_span_id_passes() {
         let mut test_span = new_test_span();
         let before_span_id = test_span.span_id;
         assert!(normalizer::normalize(&mut test_span).is_ok());
@@ -240,14 +240,14 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_no_span_id() {
+    fn test_normalize_no_span_id() {
         let mut test_span = new_test_span();
         test_span.span_id = 0;
         assert!(normalizer::normalize(&mut test_span).is_err());
     }
 
     #[test]
-    pub fn test_normalize_start_passes() {
+    fn test_normalize_start_passes() {
         let mut test_span = new_test_span();
         let before_start = test_span.start;
         assert!(normalizer::normalize(&mut test_span).is_ok());
@@ -262,7 +262,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_start_too_small() {
+    fn test_normalize_start_too_small() {
         let mut test_span = new_test_span();
 
         test_span.start = 42;
@@ -274,7 +274,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_start_too_small_with_large_duration() {
+    fn test_normalize_start_too_small_with_large_duration() {
         let mut test_span = new_test_span();
 
         test_span.start = 42;
@@ -287,7 +287,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_duration_passes() {
+    fn test_normalize_duration_passes() {
         let mut test_span = new_test_span();
         let before_duration = test_span.duration;
 
@@ -296,7 +296,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_empty_duration() {
+    fn test_normalize_empty_duration() {
         let mut test_span = new_test_span();
         test_span.duration = 0;
 
@@ -305,7 +305,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_negative_duration() {
+    fn test_normalize_negative_duration() {
         let mut test_span = new_test_span();
         test_span.duration = -50;
 
@@ -314,7 +314,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_large_duration() {
+    fn test_normalize_large_duration() {
         let mut test_span = new_test_span();
         test_span.duration = std::i64::MAX;
 
@@ -323,7 +323,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_error_passes() {
+    fn test_normalize_error_passes() {
         let mut test_span = new_test_span();
         let before_error = test_span.error;
 
@@ -332,7 +332,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_metrics_passes() {
+    fn test_normalize_metrics_passes() {
         let mut test_span = new_test_span();
         let before_metrics = test_span.metrics.clone();
 
@@ -341,7 +341,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_meta_passes() {
+    fn test_normalize_meta_passes() {
         let mut test_span = new_test_span();
         let before_meta = test_span.meta.clone();
 
@@ -350,7 +350,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_parent_id_passes() {
+    fn test_normalize_parent_id_passes() {
         let mut test_span = new_test_span();
         let before_parent_id = test_span.parent_id;
 
@@ -359,7 +359,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_type_passes() {
+    fn test_normalize_type_passes() {
         let mut test_span = new_test_span();
         let before_type = test_span.r#type.clone();
 
@@ -368,7 +368,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_type_too_long() {
+    fn test_normalize_type_too_long() {
         let mut test_span = new_test_span();
         test_span.r#type = "sql".repeat(1000);
 
@@ -377,7 +377,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_service_tag() {
+    fn test_normalize_service_tag() {
         let mut test_span = new_test_span();
         test_span.service = "retargeting(api-Staging ".to_string();
 
@@ -386,7 +386,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_env() {
+    fn test_normalize_env() {
         let mut test_span = new_test_span();
         test_span
             .meta
@@ -397,7 +397,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_special_zipkin_root_span() {
+    fn test_special_zipkin_root_span() {
         let mut test_span = new_test_span();
         test_span.parent_id = 42;
         test_span.trace_id = 42;
@@ -413,7 +413,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_normalize_trace_empty() {
+    fn test_normalize_trace_empty() {
         let mut test_span = new_test_span();
         test_span
             .meta
@@ -424,7 +424,7 @@ mod tests {
     }
 
     #[test]
-    pub fn test_is_valid_status_code() {
+    fn test_is_valid_status_code() {
         assert!(normalizer::is_valid_status_code("100"));
         assert!(normalizer::is_valid_status_code("599"));
         assert!(!normalizer::is_valid_status_code("99"));

From 7c3a31d039d11eacd0db52d721039ddbcb4e1118 Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Mon, 6 Feb 2023 15:58:49 -0800
Subject: [PATCH 11/16] modify comment wording

---
 trace-normalization/src/normalize_utils.rs | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs
index e052381ac..72e79004b 100644
--- a/trace-normalization/src/normalize_utils.rs
+++ b/trace-normalization/src/normalize_utils.rs
@@ -34,14 +34,13 @@ pub(crate) fn truncate_utf8(s: &str, limit: usize) -> &str {
 
 // fallback_service returns the fallback service name for a service
 // belonging to language lang.
+// In the go agent implementation, if a lang was specified in TagStats
+// (extracted from the payload header) the fallback_service name would be "unnamed-{lang}-service".
 pub(crate) fn fallback_service() -> String {
-    // In the go agent implementation, if a lang was specified in TagStats
-    // (extracted from the payload header) the fallback_service name would be "unnamed-{lang}-service".
     DEFAULT_SERVICE_NAME.to_string()
 }
 
-// normalize_service normalizes a span service and returns an error describing the reason
-// (if any) why the name was modified.
+// normalize_service normalizes a span service
 pub(crate) fn normalize_service(svc: &str) -> anyhow::Result<String> {
     anyhow::ensure!(!svc.is_empty(), "Normalizer Error: Empty service name.");
 

From 924c6d3292b5f7fd6da612a3f0fb13c7991f79b9 Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Tue, 7 Feb 2023 08:07:34 -0800
Subject: [PATCH 12/16] remove let ... else

---
 trace-normalization/src/normalize_utils.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs
index 72e79004b..b7244ca34 100644
--- a/trace-normalization/src/normalize_utils.rs
+++ b/trace-normalization/src/normalize_utils.rs
@@ -132,7 +132,10 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool {
     }
 
     for mut i in 0..tag.len() {
-        let Some(cur_char) = tag.chars().nth(i) else { return false; };
+        let cur_char = match tag.chars().nth(i) {
+            Some(c) => c,
+            None => return false,
+        };
         if is_valid_ascii_tag_char(cur_char) {
             continue;
         }

From 8b1f91adfc012af88e3d16af1a5bf4e99364f573 Mon Sep 17 00:00:00 2001
From: David Lee <thedavl2001@gmail.com>
Date: Wed, 8 Feb 2023 14:37:07 -0800
Subject: [PATCH 13/16] Update trace-normalization/src/normalize_utils.rs

Co-authored-by: Levi Morrison <levi.morrison@datadoghq.com>
---
 trace-normalization/src/normalize_utils.rs | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs
index b7244ca34..5e1f55cab 100644
--- a/trace-normalization/src/normalize_utils.rs
+++ b/trace-normalization/src/normalize_utils.rs
@@ -44,11 +44,7 @@ pub(crate) fn fallback_service() -> String {
 pub(crate) fn normalize_service(svc: &str) -> anyhow::Result<String> {
     anyhow::ensure!(!svc.is_empty(), "Normalizer Error: Empty service name.");
 
-    let truncated_service = if svc.len() > MAX_SERVICE_LEN {
-        truncate_utf8(svc, MAX_SERVICE_LEN)
-    } else {
-        svc
-    };
+    let truncated_service = truncate_utf8(svc, MAX_SERVICE_LEN);
 
     normalize_tag(truncated_service)
 }

From 8be8691e8623b5017c1d0545477808f8c8192c88 Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Wed, 8 Feb 2023 14:43:44 -0800
Subject: [PATCH 14/16] address PR comments

---
 trace-normalization/src/normalize_utils.rs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs
index 5e1f55cab..a0dc6467f 100644
--- a/trace-normalization/src/normalize_utils.rs
+++ b/trace-normalization/src/normalize_utils.rs
@@ -127,8 +127,10 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool {
         None => return false,
     }
 
-    for mut i in 0..tag.len() {
-        let cur_char = match tag.chars().nth(i) {
+    let mut tag_iter = tag.chars().peekable();
+
+    while tag_iter.peek().is_some() {
+        let cur_char = match tag_iter.next() {
             Some(c) => c,
             None => return false,
         };
@@ -137,8 +139,7 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool {
         }
         if cur_char == '_' {
             // an underscore is only okay if followed by a valid non-underscore character
-            i += 1;
-            match tag.chars().nth(i) {
+            match tag_iter.next() {
                 Some(c) => {
                     if !is_valid_ascii_tag_char(c) {
                         return false;

From 912f026c246d273550b055253e8d8a8000513ba9 Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Wed, 8 Feb 2023 14:55:25 -0800
Subject: [PATCH 15/16] remove .peekable() in is_normalized_ascii_tag func

---
 trace-normalization/src/normalize_utils.rs | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs
index a0dc6467f..e0899550a 100644
--- a/trace-normalization/src/normalize_utils.rs
+++ b/trace-normalization/src/normalize_utils.rs
@@ -127,13 +127,9 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool {
         None => return false,
     }
 
-    let mut tag_iter = tag.chars().peekable();
+    let mut tag_iter = tag.chars();
 
-    while tag_iter.peek().is_some() {
-        let cur_char = match tag_iter.next() {
-            Some(c) => c,
-            None => return false,
-        };
+    while let Some(cur_char) = tag_iter.next() {
         if is_valid_ascii_tag_char(cur_char) {
             continue;
         }

From 0c8de519221684e30c9385b320ecae58730b2229 Mon Sep 17 00:00:00 2001
From: David Lee <david.lee@datadoghq.com>
Date: Wed, 8 Feb 2023 15:01:42 -0800
Subject: [PATCH 16/16] Update normalize_utils.rs

---
 trace-normalization/src/normalize_utils.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/trace-normalization/src/normalize_utils.rs b/trace-normalization/src/normalize_utils.rs
index e0899550a..3a7e28b7b 100644
--- a/trace-normalization/src/normalize_utils.rs
+++ b/trace-normalization/src/normalize_utils.rs
@@ -118,7 +118,10 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool {
     if tag.len() > MAX_TAG_LEN {
         return false;
     }
-    match tag.chars().next() {
+
+    let mut tag_iter = tag.chars();
+
+    match tag_iter.next() {
         Some(c) => {
             if !is_valid_ascii_start_char(c) {
                 return false;
@@ -127,8 +130,6 @@ pub(crate) fn is_normalized_ascii_tag(tag: &str) -> bool {
         None => return false,
     }
 
-    let mut tag_iter = tag.chars();
-
     while let Some(cur_char) = tag_iter.next() {
         if is_valid_ascii_tag_char(cur_char) {
             continue;