From 99890363c282f5527afdb4e28ce760d7a47e1707 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Tue, 12 Apr 2022 10:29:36 -0600 Subject: [PATCH] Cleanup slices, strings, tags, and vecs (#44) This started with enhancing tags, but when I was done I discovered a sigsev which could be triggered even from Rust code. I knew I had screwed up the lifetimes and unsafe code. So, I started working and I kept unravelling more and more. Slice and vec have been split into their own files as lib.rs was getting large. Many From traits were changed to work with &T instead of T because of lifetime issues. In this PR, some C FFI APIs for tags have been removed. A subsequent PR will add them back and enhance them. I wanted to keep the PR size to be somewhat manageable. Some places using some form of string have changed to use `Cow<'static, str>`. This allows you to borrow static strings, and own all others. When calling from C, the difference is very little because they _should_ have been copied (probably, was unsafe if not). I believe these are the changes which actually fixed the crash. --- ddprof-exporter/src/lib.rs | 41 ++--- ddprof-exporter/src/tag.rs | 120 ++++++++++++++ ddprof-exporter/tests/form.rs | 108 ++++++------- ddprof-ffi/src/exporter.rs | 286 ++++++---------------------------- ddprof-ffi/src/lib.rs | 229 +-------------------------- ddprof-ffi/src/profiles.rs | 144 +++++++++-------- ddprof-ffi/src/slice.rs | 254 ++++++++++++++++++++++++++++++ ddprof-ffi/src/vec.rs | 25 ++- 8 files changed, 593 insertions(+), 614 deletions(-) create mode 100644 ddprof-exporter/src/tag.rs create mode 100644 ddprof-ffi/src/slice.rs diff --git a/ddprof-exporter/src/lib.rs b/ddprof-exporter/src/lib.rs index 870ac23..7f204b7 100644 --- a/ddprof-exporter/src/lib.rs +++ b/ddprof-exporter/src/lib.rs @@ -16,6 +16,9 @@ use tokio::runtime::Runtime; mod connector; mod container_id; mod errors; +pub mod tag; + +pub use tag::*; #[cfg(unix)] pub use connector::uds::socket_path_to_uri; @@ -30,11 +33,6 @@ pub struct Exporter { runtime: Runtime, } -pub struct Tag { - pub name: Cow<'static, str>, - pub value: Cow<'static, str>, -} - pub struct FieldsV3 { pub start: DateTime, pub end: DateTime, @@ -42,14 +40,14 @@ pub struct FieldsV3 { pub struct Endpoint { url: Uri, - api_key: Option, + api_key: Option>, } pub struct ProfileExporterV3 { exporter: Exporter, endpoint: Endpoint, - family: String, - tags: Vec, + family: Cow<'static, str>, + tags: Option>, } pub struct Request { @@ -139,20 +137,23 @@ impl Endpoint { /// # Arguments /// * `site` - e.g. "datadoghq.com". /// * `api_key` - pub fn agentless>(site: S, api_key: S) -> Result> { - let intake_url = format!("https://intake.profile.{}/v1/input", site.as_ref()); + pub fn agentless, IntoCow: Into>>( + site: AsStrRef, + api_key: IntoCow, + ) -> Result> { + let intake_url: String = format!("https://intake.profile.{}/v1/input", site.as_ref()); Ok(Endpoint { url: Uri::from_str(intake_url.as_str())?, - api_key: Some(String::from(api_key.as_ref())), + api_key: Some(api_key.into()), }) } } impl ProfileExporterV3 { - pub fn new>( - family: S, - tags: Vec, + pub fn new>>( + family: IntoCow, + tags: Option>, endpoint: Endpoint, ) -> Result> { Ok(Self { @@ -169,7 +170,7 @@ impl ProfileExporterV3 { start: chrono::DateTime, end: chrono::DateTime, files: &[File], - additional_tags: &[Tag], + additional_tags: Option<&Vec>, timeout: std::time::Duration, ) -> Result> { let mut form = multipart::Form::default(); @@ -177,10 +178,12 @@ impl ProfileExporterV3 { form.add_text("version", "3"); form.add_text("start", start.format("%Y-%m-%dT%H:%M:%S%.9fZ").to_string()); form.add_text("end", end.format("%Y-%m-%dT%H:%M:%S%.9fZ").to_string()); - form.add_text("family", String::from(&self.family)); + form.add_text("family", self.family.to_owned()); - for tag in self.tags.iter().chain(additional_tags.iter()) { - form.add_text("tags[]", format!("{}:{}", tag.name, tag.value)); + for tags in self.tags.as_ref().iter().chain(additional_tags.iter()) { + for tag in tags.iter() { + form.add_text("tags[]", tag.to_string()); + } } for file in files { @@ -200,7 +203,7 @@ impl ProfileExporterV3 { if let Some(api_key) = &self.endpoint.api_key { builder = builder.header( "DD-API-KEY", - HeaderValue::from_str(api_key.as_str()).expect("TODO"), + HeaderValue::from_str(api_key).expect("Error setting api_key"), ); } diff --git a/ddprof-exporter/src/tag.rs b/ddprof-exporter/src/tag.rs new file mode 100644 index 0000000..0bec080 --- /dev/null +++ b/ddprof-exporter/src/tag.rs @@ -0,0 +1,120 @@ +use std::borrow::Cow; +use std::fmt::{Debug, Display, Formatter}; + +#[derive(Clone, Eq, PartialEq)] +pub struct Tag { + key: Cow<'static, str>, + value: Cow<'static, str>, +} + +impl Debug for Tag { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Tag") + .field("key", &self.key) + .field("value", &self.value) + .finish() + } +} + +// Any type which implements Display automatically has to_string. +impl Display for Tag { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + // A tag isn't supposed to end with a colon, so if there isn't a value + // then don't follow the tag with a colon. + if self.value.is_empty() { + write!(f, "{}", self.key) + } else { + write!(f, "{}:{}", self.key, self.value) + } + } +} + +impl Tag { + pub fn new>>( + key: IntoCow, + value: IntoCow, + ) -> Result> { + let key = key.into(); + let value = value.into(); + if key.is_empty() { + return Err("tag key was empty".into()); + } + + let first_valid_char = key + .chars() + .find(|char| *char != std::char::REPLACEMENT_CHARACTER && !char.is_whitespace()); + + if first_valid_char.is_none() { + return Err("tag contained only whitespace or invalid unicode characters".into()); + } + + Ok(Self { key, value }) + } + + pub fn key(&self) -> &Cow { + &self.key + } + pub fn value(&self) -> &Cow { + &self.value + } + + pub fn into_owned(mut self) -> Self { + self.key = self.key.to_owned(); + self.value = self.value.to_owned(); + self + } +} + +#[cfg(test)] +mod tests { + use crate::Tag; + + #[test] + fn test_empty_key() { + let _ = Tag::new("", "woof").expect_err("empty key is not allowed"); + } + + #[test] + fn test_empty_value() { + let tag = Tag::new("key1", "").expect("empty value is okay"); + assert_eq!("key1", tag.to_string()); // notice no trailing colon! + } + + #[test] + fn test_bad_utf8() { + // 0b1111_0xxx is the start of a 4-byte sequence, but there aren't any + // more chars, so it will get converted into the utf8 replacement + // character. This results in a string with a space (32) and a + // replacement char, so it should be an error (no valid chars). + let bytes = &[32, 0b1111_0111]; + let key = String::from_utf8_lossy(bytes); + let _ = Tag::new(key, "value".into()).expect_err("invalid tag is rejected"); + } + + #[test] + fn test_value_has_colon() { + let result = Tag::new("env", "staging:east").expect("values can have colons"); + assert_eq!("env:staging:east", result.to_string()); + } + + #[test] + fn test_suspicious_tags() { + // Based on tag rules, these should all fail. However, there is a risk + // that profile tags will then differ or cause failures compared to + // trace tags. These require cross-team, cross-language collaboration. + let cases = [ + (":key-starts-with-colon".to_string(), "value".to_owned()), + ("key".to_string(), "value-ends-with-colon:".to_owned()), + ( + "the-tag-length-is-over-200-characters".repeat(6), + "value".to_owned(), + ), + ]; + + for case in cases { + let result = Tag::new(case.0, case.1); + // Again, these should fail, but it's not implemented yet + assert!(result.is_ok()) + } + } +} diff --git a/ddprof-exporter/tests/form.rs b/ddprof-exporter/tests/form.rs index b2351c0..22084f0 100644 --- a/ddprof-exporter/tests/form.rs +++ b/ddprof-exporter/tests/form.rs @@ -1,8 +1,7 @@ // Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0. // This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present Datadog, Inc. -use ddprof_exporter::{Endpoint, File, ProfileExporterV3, Request, Tag}; -use std::borrow::Cow; +use ddprof_exporter::{File, ProfileExporterV3, Request}; use std::error::Error; use std::io::Read; use std::ops::Sub; @@ -33,7 +32,7 @@ fn multipart(exporter: &ProfileExporterV3) -> Request { let timeout = std::time::Duration::from_secs(10); let request = exporter - .build(start, end, files, &[], timeout) + .build(start, end, files, None, timeout) .expect("request to be built"); let actual_timeout = request.timeout().expect("timeout to exist"); @@ -41,55 +40,56 @@ fn multipart(exporter: &ProfileExporterV3) -> Request { request } -fn default_tags() -> Vec { - vec![ - Tag { - name: Cow::Borrowed("service"), - value: Cow::Borrowed("php"), - }, - Tag { - name: Cow::Borrowed("host"), - value: Cow::Borrowed("bits"), - }, - ] -} - -#[test] -fn multipart_agent() { - let base_url = "http://localhost:8126".parse().expect("url to parse"); - let endpoint = Endpoint::agent(base_url).expect("endpoint to construct"); - let exporter = - ProfileExporterV3::new("php", default_tags(), endpoint).expect("exporter to construct"); - - let request = multipart(&exporter); - - assert_eq!( - request.uri().to_string(), - "http://localhost:8126/profiling/v1/input" - ); - - let actual_headers = request.headers(); - assert!(!actual_headers.contains_key("DD-API-KEY")); -} - -#[test] -fn multipart_agentless() { - let api_key = "1234567890123456789012"; - let endpoint = Endpoint::agentless("datadoghq.com", api_key).expect("endpoint to construct"); - let exporter = - ProfileExporterV3::new("php", default_tags(), endpoint).expect("exporter to construct"); - - let request = multipart(&exporter); - - assert_eq!( - request.uri().to_string(), - "https://intake.profile.datadoghq.com/v1/input" - ); - - let actual_headers = request.headers(); - - assert_eq!( - actual_headers.get("DD-API-KEY").expect("api key to exist"), - api_key - ); +#[cfg(test)] +mod tests { + use crate::multipart; + use ddprof_exporter::*; + + fn default_tags() -> Vec { + vec![ + Tag::new("service", "php").expect("static tags to be valid"), + Tag::new("host", "bits").expect("static tags to be valid"), + ] + } + + #[test] + fn multipart_agent() { + let base_url = "http://localhost:8126".parse().expect("url to parse"); + let endpoint = Endpoint::agent(base_url).expect("endpoint to construct"); + let exporter = ProfileExporterV3::new("php", Some(default_tags()), endpoint) + .expect("exporter to construct"); + + let request = multipart(&exporter); + + assert_eq!( + request.uri().to_string(), + "http://localhost:8126/profiling/v1/input" + ); + + let actual_headers = request.headers(); + assert!(!actual_headers.contains_key("DD-API-KEY")); + } + + #[test] + fn multipart_agentless() { + let api_key = "1234567890123456789012"; + let endpoint = + Endpoint::agentless("datadoghq.com", api_key).expect("endpoint to construct"); + let exporter = ProfileExporterV3::new("php", Some(default_tags()), endpoint) + .expect("exporter to construct"); + + let request = multipart(&exporter); + + assert_eq!( + request.uri().to_string(), + "https://intake.profile.datadoghq.com/v1/input" + ); + + let actual_headers = request.headers(); + + assert_eq!( + actual_headers.get("DD-API-KEY").expect("api key to exist"), + api_key + ); + } } diff --git a/ddprof-ffi/src/exporter.rs b/ddprof-ffi/src/exporter.rs index 902825b..a3b8aa9 100644 --- a/ddprof-ffi/src/exporter.rs +++ b/ddprof-ffi/src/exporter.rs @@ -4,13 +4,12 @@ #![allow(renamed_and_removed_lints)] #![allow(clippy::box_vec)] -use crate::{ByteSlice, CharSlice, Slice, Timespec}; +use crate::{AsBytes, ByteSlice, CharSlice, Slice, Timespec}; use ddprof_exporter as exporter; +use ddprof_exporter::Tag; use exporter::ProfileExporterV3; use std::borrow::Cow; -use std::convert::TryInto; use std::error::Error; -use std::fmt::{Debug, Display, Formatter}; use std::ptr::NonNull; use std::str::FromStr; @@ -39,22 +38,6 @@ pub unsafe extern "C" fn new_profile_exporter_v3_result_drop(result: NewProfileE } } -#[repr(C)] -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub struct Tag<'a> { - name: CharSlice<'a>, - value: CharSlice<'a>, -} - -impl<'a> Tag<'a> { - fn new(key: &'a str, value: &'a str) -> Tag<'a> { - Tag { - name: CharSlice::from(key), - value: CharSlice::from(value), - } - } -} - #[repr(C)] pub enum EndpointV3<'a> { Agent(CharSlice<'a>), @@ -95,53 +78,8 @@ pub extern "C" fn endpoint_agentless<'a>( EndpointV3::Agentless(site, api_key) } -struct TagsError { - message: String, -} - -impl Debug for TagsError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "Tag Error: {:?}.", self.message) - } -} - -impl Display for TagsError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "Tag Error: {}.", self.message) - } -} - -impl Error for TagsError {} - -fn try_to_tags(tags: Slice) -> Result, Box> { - let mut converted_tags = Vec::with_capacity(tags.len); - for tag in tags.into_slice().iter() { - let name: &str = tag.name.try_into()?; - let value: &str = tag.value.try_into()?; - - // If a tag name is empty, that's an error - if name.is_empty() { - return Err(Box::new(TagsError { - message: "tag name must not be empty".to_string(), - })); - } - - /* However, empty tag values are treated as if the tag was not sent; - * this makes it easier for the calling code to send a statically sized - * tags slice. - */ - if !value.is_empty() { - converted_tags.push(ddprof_exporter::Tag { - name: Cow::Owned(String::from(name)), - value: Cow::Owned(String::from(value)), - }); - } - } - Ok(converted_tags) -} - -fn try_to_url(slice: CharSlice) -> Result> { - let str: &str = slice.try_into()?; +unsafe fn try_to_url(slice: CharSlice) -> Result> { + let str: &str = slice.try_to_utf8()?; #[cfg(unix)] if let Some(path) = str.strip_prefix("unix://") { return ddprof_exporter::socket_path_to_uri(path.as_ref()); @@ -152,18 +90,23 @@ fn try_to_url(slice: CharSlice) -> Result } } -fn try_to_endpoint( +unsafe fn try_to_endpoint( endpoint: EndpointV3, ) -> Result> { + // convert to utf8 losslessly -- URLs and API keys should all be ASCII, so + // a failed result is likely to be an error. match endpoint { EndpointV3::Agent(url) => { let base_url = try_to_url(url)?; ddprof_exporter::Endpoint::agent(base_url) } EndpointV3::Agentless(site, api_key) => { - let site_str: &str = site.try_into()?; - let api_key_str: &str = api_key.try_into()?; - ddprof_exporter::Endpoint::agentless(site_str, api_key_str) + let site_str = site.try_to_utf8()?; + let api_key_str = api_key.try_to_utf8()?; + ddprof_exporter::Endpoint::agentless( + Cow::Owned(site_str.to_owned()), + Cow::Owned(api_key_str.to_owned()), + ) } } } @@ -172,14 +115,14 @@ fn try_to_endpoint( #[export_name = "ddprof_ffi_ProfileExporterV3_new"] pub extern "C" fn profile_exporter_new( family: CharSlice, - tags: Slice, + tags: Option<&crate::Vec>, endpoint: EndpointV3, ) -> NewProfileExporterV3Result { - match || -> Result> { - let converted_family: &str = family.try_into()?; - let converted_tags = try_to_tags(tags)?; - let converted_endpoint = try_to_endpoint(endpoint)?; - ProfileExporterV3::new(converted_family, converted_tags, converted_endpoint) + match || -> Result> { + let family = unsafe { family.to_utf8_lossy() }.into_owned(); + let converted_endpoint = unsafe { try_to_endpoint(endpoint)? }; + let tags = tags.map(|tags| tags.iter().map(|tag| tag.clone().into_owned()).collect()); + ProfileExporterV3::new(family, tags, converted_endpoint) }() { Ok(exporter) => NewProfileExporterV3Result::Ok(Box::into_raw(Box::new(exporter))), Err(err) => NewProfileExporterV3Result::Err(err.into()), @@ -191,15 +134,16 @@ pub extern "C" fn profile_exporter_delete(exporter: Option(slice: Slice<'a, File>) -> Option>> { - let mut vec = Vec::with_capacity(slice.len); - - for file in slice.into_slice().iter() { - let name = file.name.try_into().ok()?; - let bytes: &[u8] = file.file.into_slice(); - vec.push(ddprof_exporter::File { name, bytes }); - } - Some(vec) +unsafe fn into_vec_files<'a>(slice: Slice<'a, File>) -> Vec> { + slice + .into_slice() + .iter() + .map(|file| { + let name = file.name.try_to_utf8().unwrap_or("{invalid utf-8}"); + let bytes = file.file.as_slice(); + ddprof_exporter::File { name, bytes } + }) + .collect() } /// Builds a Request object based on the profile data supplied. @@ -213,20 +157,20 @@ pub unsafe extern "C" fn profile_exporter_build( start: Timespec, end: Timespec, files: Slice, - additional_tags: Slice, + additional_tags: Option<&crate::Vec>, timeout_ms: u64, ) -> Option> { match exporter { None => None, Some(exporter) => { let timeout = std::time::Duration::from_millis(timeout_ms); - let converted_files = try_into_vec_files(files)?; - let converted_tags = try_to_tags(additional_tags).ok()?; + let converted_files = into_vec_files(files); + let tags = additional_tags.map(|tags| tags.iter().map(Tag::clone).collect()); match exporter.as_ref().build( start.into(), end.into(), converted_files.as_slice(), - converted_tags.as_slice(), + tags.as_ref(), timeout, ) { Ok(request) => Some(Box::new(Request(request))), @@ -282,106 +226,13 @@ pub unsafe extern "C" fn send_result_drop(result: SendResult) { std::mem::drop(result) } -#[must_use] -#[export_name = "ddprof_ffi_Vec_tag_new"] -pub extern "C" fn vec_tag_new<'a>() -> crate::Vec> { - crate::Vec::default() -} - -/// Pushes the tag into the vec. -#[export_name = "ddprof_ffi_Vec_tag_push"] -pub unsafe extern "C" fn vec_tag_push<'a>(vec: &mut crate::Vec>, tag: Tag<'a>) { - vec.push(tag) -} - -#[allow(clippy::ptr_arg)] -#[export_name = "ddprof_ffi_Vec_tag_as_slice"] -pub extern "C" fn vec_tag_as_slice<'a>(vec: &'a crate::Vec>) -> Slice<'a, Tag<'a>> { - vec.as_slice() -} - -#[export_name = "ddprof_ffi_Vec_tag_drop"] -pub extern "C" fn vec_tag_drop(vec: crate::Vec) { - std::mem::drop(vec) -} - -fn parse_tag_chunk(chunk: &str) -> Result { - if let Some(first_colon_position) = chunk.find(':') { - if first_colon_position == 0 { - return Err(TagsError { - message: format!("tag cannot start with a colon: \"{}\"", chunk), - }); - } - - if chunk.ends_with(':') { - return Err(TagsError { - message: format!("tag cannot end with a colon: \"{}\"", chunk), - }); - } - let name = &chunk[..first_colon_position]; - let value = &chunk[(first_colon_position + 1)..]; - Ok(Tag::new(name, value)) - } else { - Ok(Tag::new(chunk, "")) - } -} - -/// Parse a string of tags typically provided by environment variables -/// The tags are expected to be either space or comma separated: -/// "key1:value1,key2:value2" -/// "key1:value1 key2:value2" -/// Tag names and values are required and may not be empty. -fn parse_tags(str: &str) -> Result, TagsError> { - let vec: Vec<_> = str - .split(&[',', ' '][..]) - .flat_map(parse_tag_chunk) - .collect(); - Ok(vec.into()) -} - -#[repr(C)] -pub enum VecTagResult<'a> { - Ok(crate::Vec>), - Err(crate::Vec), -} - -#[export_name = "ddprof_ffi_VecTagResult_drop"] -pub extern "C" fn vec_tag_result_drop(result: VecTagResult) { - std::mem::drop(result) -} - -#[must_use] -#[export_name = "ddprof_ffi_Vec_tag_parse"] -pub extern "C" fn vec_tag_parse(string: CharSlice) -> VecTagResult { - match string.try_into() { - Ok(str) => match parse_tags(str) { - Ok(vec) => VecTagResult::Ok(vec), - Err(err) => VecTagResult::Err(crate::Vec::from(&err as &dyn Error)), - }, - - Err(err) => VecTagResult::Err(crate::Vec::from(&err as &dyn Error)), - } -} - -#[must_use] -#[allow(clippy::ptr_arg)] -#[export_name = "ddprof_ffi_Vec_tag_clone"] -pub extern "C" fn vec_tag_clone<'a>(vec: &'a crate::Vec>) -> VecTagResult { - let mut clone = Vec::new(); - for tag in vec.into_iter() { - clone.push(tag.to_owned()) - } - VecTagResult::Ok(crate::Vec::from(clone)) -} - #[cfg(test)] mod test { use crate::exporter::*; use crate::Slice; - use std::os::raw::c_char; fn family() -> CharSlice<'static> { - CharSlice::new("native".as_ptr() as *const c_char, "native".len()) + CharSlice::from("native") } fn base_url() -> &'static str { @@ -389,32 +240,16 @@ mod test { } fn endpoint() -> CharSlice<'static> { - CharSlice::new(base_url().as_ptr() as *const c_char, base_url().len()) - } - - #[test] - fn empty_tag_name() { - let tag = Tag { - name: Slice::new("".as_ptr() as *const c_char, 0), - value: Slice::new("1".as_ptr() as *const c_char, 1), - }; - let tags = Slice::new((&tag) as *const Tag, 1); - let result = try_to_tags(tags); - assert!(result.is_err()); + CharSlice::from(base_url()) } #[test] fn profile_exporter_v3_new_and_delete() { - let tags = [Tag { - name: CharSlice::new("host".as_ptr() as *const c_char, "host".len()), - value: CharSlice::new("localhost".as_ptr() as *const c_char, "localhost".len()), - }]; + let mut tags = crate::Vec::default(); + let host = Tag::new("host", "localhost").expect("static tags to be valid"); + tags.push(host); - let result = profile_exporter_new( - family(), - Slice::new(tags.as_ptr(), tags.len()), - endpoint_agent(endpoint()), - ); + let result = profile_exporter_new(family(), Some(&tags), endpoint_agent(endpoint())); match result { NewProfileExporterV3Result::Ok(exporter) => unsafe { @@ -429,8 +264,7 @@ mod test { #[test] fn profile_exporter_v3_build() { - let exporter_result = - profile_exporter_new(family(), Slice::default(), endpoint_agent(endpoint())); + let exporter_result = profile_exporter_new(family(), None, endpoint_agent(endpoint())); let exporter = match exporter_result { NewProfileExporterV3Result::Ok(exporter) => unsafe { @@ -442,9 +276,9 @@ mod test { } }; - let files = [File { - name: CharSlice::new("foo.pprof".as_ptr() as *const c_char, "foo.pprof".len()), - file: ByteSlice::new("dummy contents".as_ptr(), "dummy contents".len()), + let files: &[File] = &[File { + name: CharSlice::from("foo.pprof"), + file: ByteSlice::from(b"dummy contents" as &[u8]), }]; let start = Timespec { @@ -462,8 +296,8 @@ mod test { exporter, start, finish, - Slice::new(files.as_ptr(), files.len()), - Slice::default(), + Slice::from(files), + None, timeout_milliseconds, ) }; @@ -475,34 +309,4 @@ mod test { // It'd be nice to actually perform the request, capture its contents, and assert that // they are as expected. } - - #[test] - fn test_parse_tags() { - // See the docs for what we convey to users about tags: - // https://docs.datadoghq.com/getting_started/tagging/ - - let cases = [ - ("env:staging:east", vec![Tag::new("env", "staging:east")]), - ("value", vec![Tag::new("value", "")]), - ( - "state:utah,state:idaho", - vec![Tag::new("state", "utah"), Tag::new("state", "idaho")], - ), - ( - "key1:value1 key2:value2 key3:value3", - vec![ - Tag::new("key1", "value1"), - Tag::new("key2", "value2"), - Tag::new("key3", "value3"), - ], - ), - ("key1:", vec![]), - ]; - - for case in cases { - let expected = case.1; - let actual = parse_tags(case.0).unwrap(); - assert_eq!(expected, std::vec::Vec::from(actual)); - } - } } diff --git a/ddprof-ffi/src/lib.rs b/ddprof-ffi/src/lib.rs index d84d21a..548b065 100644 --- a/ddprof-ffi/src/lib.rs +++ b/ddprof-ffi/src/lib.rs @@ -2,21 +2,19 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present Datadog, Inc. use std::convert::{TryFrom, TryInto}; -use std::fmt::{Debug, Formatter}; -use std::marker::PhantomData; +use std::fmt::Debug; use std::ops::Sub; -use std::os::raw::c_char; -use std::str::Utf8Error; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use chrono::{DateTime, TimeZone, Utc}; -use libc::size_t; mod exporter; mod profiles; +mod slice; mod vec; -pub use vec::*; +pub use slice::{AsBytes, ByteSlice, CharSlice, Slice}; +pub use vec::Vec; /// Represents time since the Unix Epoch in seconds plus nanoseconds. #[repr(C)] @@ -46,222 +44,3 @@ impl TryFrom for Timespec { }) } } - -#[repr(C)] -#[derive(Copy, Clone)] -pub struct Slice<'a, T> { - pub ptr: *const T, - pub len: size_t, - phantom: PhantomData<&'a [T]>, -} - -impl<'a, T> IntoIterator for Slice<'a, T> { - type Item = &'a T; - type IntoIter = std::slice::Iter<'a, T>; - - fn into_iter(self) -> Self::IntoIter { - self.into_slice().iter() - } -} - -impl<'a, T: Debug> Debug for Slice<'a, T> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_list().entries(self.as_slice().iter()).finish() - } -} - -impl<'a, T: Eq> PartialEq for Slice<'a, T> { - fn eq(&self, other: &Self) -> bool { - return self.as_slice() == other.as_slice(); - } -} - -impl<'a, T: Eq> Eq for Slice<'a, T> {} - -// Use to represent strings -- should be valid UTF-8. -type CharSlice<'a> = crate::Slice<'a, c_char>; - -/// Use to represent bytes -- does not need to be valid UTF-8. -type ByteSlice<'a> = crate::Slice<'a, u8>; - -/// This exists as an intrinsic, but it is private. -pub fn is_aligned_and_not_null(ptr: *const T) -> bool { - !ptr.is_null() && ptr as usize % std::mem::align_of::() == 0 -} - -impl<'a, T> Slice<'a, T> { - pub fn new(ptr: *const T, len: size_t) -> Self { - Self { - ptr, - len, - phantom: Default::default(), - } - } - - pub fn as_slice(&'a self) -> &'a [T] { - if self.is_empty() { - return &[]; - } - unsafe { std::slice::from_raw_parts(self.ptr, self.len) } - } - - /// # Safety - /// The Slice's ptr must point to contiguous storage of at least `self.len` - /// elements. If it is not suitably aligned, then it will return an empty slice. - pub fn into_slice(self) -> &'a [T] { - if self.is_empty() { - return &[]; - } - unsafe { std::slice::from_raw_parts(self.ptr, self.len) } - } - - pub fn len(&self) -> usize { - if is_aligned_and_not_null(self.ptr) { - self.len - } else { - 0 - } - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } -} - -impl<'a, T> Default for Slice<'a, T> { - fn default() -> Self { - /* The docs on std::slice::from_raw_parts indicate the pointer should be - * non-null and suitably aligned for T even for zero-length slices. - * Using a few tests, I wasn't actually able to create any harm with a - * null pointer; after all it shouldn't get de-referenced and such, but - * nonetheless we follow the documentation and use NonNull::dangling(), - * which it suggests. - * Since Slice's can be made from C, check for null and unaligned - * pointers in associated functions to defend against this. - */ - Self { - ptr: std::ptr::NonNull::dangling().as_ptr(), - len: 0, - phantom: Default::default(), - } - } -} - -impl<'a, T> From<&'a [T]> for Slice<'a, T> { - fn from(s: &'a [T]) -> Self { - Slice::new(s.as_ptr() as *const T, s.len()) - } -} - -impl<'a, T> From<&std::vec::Vec> for Slice<'a, T> { - fn from(value: &std::vec::Vec) -> Self { - let ptr = value.as_ptr(); - let len = value.len(); - Slice::new(ptr, len) - } -} - -impl<'a> From<&'a str> for Slice<'a, c_char> { - fn from(s: &'a str) -> Self { - Slice::new(s.as_ptr() as *const c_char, s.len()) - } -} - -impl<'a, T> From> for &'a [T] { - fn from(value: Slice<'a, T>) -> &'a [T] { - unsafe { std::slice::from_raw_parts(value.ptr, value.len) } - } -} - -impl<'a> TryFrom> for &'a str { - type Error = Utf8Error; - - fn try_from(value: Slice<'a, u8>) -> Result { - let slice = value.into_slice(); - std::str::from_utf8(slice) - } -} - -impl<'a> TryFrom> for &'a str { - type Error = Utf8Error; - - fn try_from(slice: Slice<'a, i8>) -> Result { - // delegate to Slice implementation - let bytes = Slice::new(slice.ptr as *const u8, slice.len); - bytes.try_into() - } -} - -impl<'a> From> for Option<&'a str> { - fn from(value: Slice<'a, c_char>) -> Self { - match value.try_into() { - Ok(str) => Some(str), - Err(_) => None, - } - } -} - -#[cfg(test)] -mod test { - use std::convert::TryInto; - use std::os::raw::c_char; - use std::str::Utf8Error; - - use crate::Slice; - - #[test] - fn slice_from_u8() { - let raw = b"_ZN9wikipedia7article6formatE"; - let slice = Slice::new(raw.as_ptr(), raw.len()); - - let converted: &[u8] = slice.into(); - assert_eq!(converted, raw); - } - - #[test] - fn string_try_from_u8() { - let raw = b"_ZN9wikipedia7article6formatE"; - let slice = Slice::new(raw.as_ptr(), raw.len()); - - let result: Result<&str, Utf8Error> = slice.try_into(); - assert!(result.is_ok()); - - let expected = "_ZN9wikipedia7article6formatE"; - assert_eq!(expected, result.unwrap()) - } - - #[test] - fn string_try_from_c_char() { - let raw = b"_ZN9wikipedia7article6formatE"; - let slice = Slice::new(raw.as_ptr() as *const c_char, raw.len()); - - let result: Result<&str, Utf8Error> = slice.try_into(); - assert!(result.is_ok()); - - let expected = "_ZN9wikipedia7article6formatE"; - assert_eq!(expected, result.unwrap()) - } - - #[derive(Debug, Eq, PartialEq)] - struct Foo(i64); - - #[test] - fn slice_from_foo() { - let raw = Foo(42); - let ptr = &raw as *const Foo; - let slice = Slice::new(ptr, 1); - - let expected = &[raw]; - let actual: &[Foo] = slice.into(); - - assert_eq!(expected, actual) - } - - #[test] - fn slice_from_null() { - let ptr: *const usize = std::ptr::null(); - let expected: &[usize] = &[]; - let actual: &[usize] = Slice::new(ptr, 0).into(); - assert_eq!(expected, actual); - } -} diff --git a/ddprof-ffi/src/profiles.rs b/ddprof-ffi/src/profiles.rs index 1b8609f..de05525 100644 --- a/ddprof-ffi/src/profiles.rs +++ b/ddprof-ffi/src/profiles.rs @@ -1,7 +1,7 @@ // Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0. // This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present Datadog, Inc. -use crate::{CharSlice, Slice, Timespec}; +use crate::{AsBytes, CharSlice, Slice, Timespec}; use ddprof_profiles as profiles; use std::convert::{TryFrom, TryInto}; use std::error::Error; @@ -147,12 +147,12 @@ pub struct Sample<'a> { pub labels: Slice<'a, Label<'a>>, } -impl<'a> TryFrom> for profiles::api::Mapping<'a> { +impl<'a> TryFrom<&'a Mapping<'a>> for profiles::api::Mapping<'a> { type Error = Utf8Error; - fn try_from(mapping: Mapping<'a>) -> Result { - let filename: &str = mapping.filename.try_into()?; - let build_id: &str = mapping.build_id.try_into()?; + fn try_from(mapping: &'a Mapping<'a>) -> Result { + let filename = unsafe { mapping.filename.try_to_utf8() }?; + let build_id = unsafe { mapping.build_id.try_to_utf8() }?; Ok(Self { memory_start: mapping.memory_start, memory_limit: mapping.memory_limit, @@ -163,68 +163,65 @@ impl<'a> TryFrom> for profiles::api::Mapping<'a> { } } -impl<'a> From> for profiles::api::ValueType<'a> { - fn from(vt: ValueType<'a>) -> Self { - Self { - r#type: vt.type_.try_into().unwrap_or(""), - unit: vt.unit.try_into().unwrap_or(""), - } - } -} - -impl<'a> From<&ValueType<'a>> for profiles::api::ValueType<'a> { - fn from(vt: &ValueType<'a>) -> Self { - Self { - r#type: vt.type_.try_into().unwrap_or(""), - unit: vt.unit.try_into().unwrap_or(""), +impl<'a> From<&'a ValueType<'a>> for profiles::api::ValueType<'a> { + fn from(vt: &'a ValueType<'a>) -> Self { + unsafe { + Self { + r#type: vt.type_.try_to_utf8().unwrap_or(""), + unit: vt.unit.try_to_utf8().unwrap_or(""), + } } } } -impl<'a> From<&Period<'a>> for profiles::api::Period<'a> { - fn from(period: &Period<'a>) -> Self { +impl<'a> From<&'a Period<'a>> for profiles::api::Period<'a> { + fn from(period: &'a Period<'a>) -> Self { Self { - r#type: profiles::api::ValueType::from(period.type_), + r#type: profiles::api::ValueType::from(&period.type_), value: period.value, } } } -impl<'a> TryFrom> for profiles::api::Function<'a> { +impl<'a> TryFrom<&'a Function<'a>> for profiles::api::Function<'a> { type Error = Utf8Error; - fn try_from(function: Function<'a>) -> Result { - let name = function.name.try_into()?; - let system_name = function.system_name.try_into()?; - let filename = function.filename.try_into()?; - Ok(Self { - name, - system_name, - filename, - start_line: function.start_line, - }) + fn try_from(function: &'a Function<'a>) -> Result { + unsafe { + let name = function.name.try_to_utf8()?; + let system_name = function.system_name.try_to_utf8()?; + let filename = function.filename.try_to_utf8()?; + Ok(Self { + name, + system_name, + filename, + start_line: function.start_line, + }) + } } } -impl<'a> TryFrom> for profiles::api::Line<'a> { +impl<'a> TryFrom<&'a Line<'a>> for profiles::api::Line<'a> { type Error = Utf8Error; - fn try_from(line: Line<'a>) -> Result { + fn try_from(line: &'a Line<'a>) -> Result { Ok(Self { - function: line.function.try_into()?, + function: profiles::api::Function::try_from(&line.function)?, line: line.line, }) } } -impl<'a> TryFrom> for profiles::api::Location<'a> { +impl<'a> TryFrom<&'a Location<'a>> for profiles::api::Location<'a> { type Error = Utf8Error; - fn try_from(location: Location<'a>) -> Result { - let mapping: profiles::api::Mapping = location.mapping.try_into()?; + fn try_from(location: &'a Location<'a>) -> Result { + let mapping = profiles::api::Mapping::try_from(&location.mapping)?; let mut lines: Vec = Vec::new(); - for &line in location.lines.into_iter() { - lines.push(line.try_into()?); + unsafe { + for line in location.lines.as_slice().iter() { + lines.push(line.try_into()?); + } } Ok(Self { mapping, @@ -235,20 +232,28 @@ impl<'a> TryFrom> for profiles::api::Location<'a> { } } -impl<'a> TryFrom> for profiles::api::Label<'a> { +impl<'a> TryFrom<&'a Label<'a>> for profiles::api::Label<'a> { type Error = Utf8Error; - fn try_from(label: Label<'a>) -> Result { - let key: &str = label.key.try_into()?; - let str: Option<&str> = label.str.into(); - let num_unit: Option<&str> = label.num_unit.into(); + fn try_from(label: &'a Label<'a>) -> Result { + unsafe { + let key = label.key.try_to_utf8()?; + let str = label.str.try_to_utf8()?; + let str = if str.is_empty() { None } else { Some(str) }; + let num_unit = label.num_unit.try_to_utf8()?; + let num_unit = if num_unit.is_empty() { + None + } else { + Some(num_unit) + }; - Ok(Self { - key, - str, - num: label.num, - num_unit, - }) + Ok(Self { + key, + str, + num: label.num, + num_unit, + }) + } } } @@ -256,23 +261,26 @@ impl<'a> TryFrom> for profiles::api::Sample<'a> { type Error = Utf8Error; fn try_from(sample: Sample<'a>) -> Result { - let mut locations: Vec = Vec::with_capacity(sample.locations.len); - for &location in sample.locations.into_iter() { - locations.push(location.try_into()?) - } - - let values: Vec = sample.values.into_slice().to_vec(); - - let mut labels: Vec = Vec::with_capacity(sample.labels.len); - for &label in sample.labels.into_iter() { - labels.push(label.try_into()?); + let mut locations: Vec = + Vec::with_capacity(sample.locations.len()); + unsafe { + for location in sample.locations.as_slice().iter() { + locations.push(location.try_into()?) + } + + let values: Vec = sample.values.into_slice().to_vec(); + + let mut labels: Vec = Vec::with_capacity(sample.labels.len()); + for label in sample.labels.as_slice().iter() { + labels.push(label.try_into()?); + } + + Ok(Self { + locations, + values, + labels, + }) } - - Ok(Self { - locations, - values, - labels, - }) } } diff --git a/ddprof-ffi/src/slice.rs b/ddprof-ffi/src/slice.rs new file mode 100644 index 0000000..7349960 --- /dev/null +++ b/ddprof-ffi/src/slice.rs @@ -0,0 +1,254 @@ +use std::borrow::Cow; +use std::fmt::{Debug, Formatter}; +use std::marker::PhantomData; +use std::os::raw::c_char; +use std::str::Utf8Error; + +/// Remember, the data inside of each member is potentially coming from FFI, +/// so every operation on it is unsafe! +#[repr(C)] +#[derive(Copy, Clone)] +pub struct Slice<'a, T: 'a> { + ptr: *const T, + len: usize, + marker: PhantomData<&'a [T]>, +} + +impl<'a, T: Debug> Debug for Slice<'a, T> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + unsafe { f.debug_list().entries(self.as_slice().iter()).finish() } + } +} + +impl<'a, T: Eq> PartialEq for Slice<'a, T> { + fn eq(&self, other: &Self) -> bool { + unsafe { + return self.as_slice() == other.as_slice(); + } + } +} + +impl<'a, T: Eq> Eq for Slice<'a, T> {} + +// Use to represent strings -- should be valid UTF-8. +pub type CharSlice<'a> = crate::Slice<'a, c_char>; + +/// Use to represent bytes -- does not need to be valid UTF-8. +pub type ByteSlice<'a> = crate::Slice<'a, u8>; + +/// This exists as an intrinsic, but it is private. +pub fn is_aligned_and_not_null(ptr: *const T) -> bool { + !ptr.is_null() && ptr as usize % std::mem::align_of::() == 0 +} + +pub trait AsBytes<'a> { + /// # Safety + /// Each implementor must document their safety requirements, but this is expected to be + /// unsafe as this is for FFI types. + unsafe fn as_bytes(&'a self) -> &'a [u8]; + + /// # Safety + /// This function has the same safety requirements as `as_bytes`. + unsafe fn try_to_utf8(&'a self) -> Result<&'a str, Utf8Error> { + std::str::from_utf8(self.as_bytes()) + } + + /// # Safety + /// This function has the same safety requirements as `as_bytes` + unsafe fn to_utf8_lossy(&'a self) -> Cow<'a, str> { + String::from_utf8_lossy(self.as_bytes()) + } +} + +impl<'a> AsBytes<'a> for Slice<'a, u8> { + /// # Safety + /// Slice needs to satisfy most of the requirements of std::slice::from_raw_parts except the + /// aligned and non-null requirements, as this function will detect these conditions and + /// return an empty slice instead. + unsafe fn as_bytes(&'a self) -> &'a [u8] { + if is_aligned_and_not_null(self.ptr) { + std::slice::from_raw_parts(self.ptr, self.len) + } else { + &[] + } + } +} + +impl<'a> AsBytes<'a> for Slice<'a, i8> { + /// # Safety + /// Slice needs to satisfy most of the requirements of std::slice::from_raw_parts except the + /// aligned and non-null requirements, as this function will detect these conditions and + /// return an empty slice instead. + unsafe fn as_bytes(&'a self) -> &'a [u8] { + if is_aligned_and_not_null(self.ptr) { + std::slice::from_raw_parts(self.ptr as *const u8, self.len) + } else { + &[] + } + } +} + +impl<'a, T: 'a> Slice<'a, T> { + /// # Safety + /// This function mostly has the same safety requirements as `std::str::from_raw_parts`, but + /// it can tolerate mis-aligned and null pointers. + pub unsafe fn new(ptr: *const T, len: usize) -> Self { + if is_aligned_and_not_null(ptr) { + Self { + ptr, + len, + ..Default::default() + } + } else { + Slice::default() + } + } + + /// # Safety + /// This function mostly has the same safety requirements as `std::str::from_raw_parts`, but + /// it can tolerate mis-aligned and null pointers. + pub unsafe fn as_slice(&self) -> &'a [T] { + if is_aligned_and_not_null(self.ptr) { + std::slice::from_raw_parts(self.ptr, self.len) + } else { + &[] + } + } + + /// # Safety + /// This function mostly has the same safety requirements as `std::str::from_raw_parts`, but + /// it can tolerate mis-aligned and null pointers. + pub unsafe fn into_slice(self) -> &'a [T] { + if is_aligned_and_not_null(self.ptr) { + std::slice::from_raw_parts(self.ptr, self.len) + } else { + &[] + } + } + + pub fn len(&self) -> usize { + if is_aligned_and_not_null(self.ptr) { + self.len + } else { + 0 + } + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +impl<'a, T> Default for Slice<'a, T> { + fn default() -> Self { + /* The docs on std::slice::from_raw_parts indicate the pointer should be + * non-null and suitably aligned for T even for zero-length slices. + * Using a few tests, I wasn't actually able to create any harm with a + * null pointer; after all it shouldn't get de-referenced and such, but + * nonetheless we follow the documentation and use NonNull::dangling(), + * which it suggests. + * Since Slice's can be made from C, check for null and unaligned + * pointers in associated functions to defend against this. + */ + Self { + ptr: std::ptr::NonNull::dangling().as_ptr(), + len: 0, + marker: Default::default(), + } + } +} + +impl<'a, T: 'a> From<&'a [T]> for Slice<'a, T> { + fn from(s: &'a [T]) -> Self { + // SAFETY: Rust slices meet all the invariants required for Slice::new. + unsafe { Slice::new(s.as_ptr() as *const T, s.len()) } + } +} + +impl<'a, T> From<&'a Vec> for Slice<'a, T> { + fn from(value: &'a Vec) -> Self { + Slice::from(value.as_slice()) + } +} + +impl<'a> From<&'a str> for Slice<'a, c_char> { + fn from(s: &'a str) -> Self { + // SAFETY: Rust strings meet all the invariants required for Slice::new. + unsafe { Slice::new(s.as_ptr() as *const c_char, s.len()) } + } +} + +#[cfg(test)] +mod test { + use std::os::raw::c_char; + + use crate::*; + + #[test] + fn slice_from_into_slice() { + let raw: &[u8] = b"_ZN9wikipedia7article6formatE"; + let slice = Slice::from(raw); + + let converted: &[u8] = unsafe { slice.into_slice() }; + assert_eq!(converted, raw); + } + + #[test] + fn string_try_to_utf8() { + let raw: &[u8] = b"_ZN9wikipedia7article6formatE"; + let slice = Slice::from(raw); + + let result = unsafe { slice.try_to_utf8() }; + assert!(result.is_ok()); + + let expected = "_ZN9wikipedia7article6formatE"; + assert_eq!(expected, result.unwrap()) + } + + #[test] + fn string_from_c_char() { + let raw: &[u8] = b"_ZN9wikipedia7article6formatE"; + let slice = unsafe { Slice::new(raw.as_ptr() as *const c_char, raw.len()) }; + + let result = unsafe { slice.try_to_utf8() }; + assert!(result.is_ok()); + + let expected = "_ZN9wikipedia7article6formatE"; + assert_eq!(expected, result.unwrap()) + } + + #[derive(Debug, Eq, PartialEq)] + struct Foo(i64); + + #[test] + fn slice_from_foo() { + let raw = Foo(42); + let ptr = &raw as *const Foo; + let slice = unsafe { Slice::new(ptr, 1) }; + + let expected: &[Foo] = &[raw]; + let actual: &[Foo] = unsafe { slice.as_slice() }; + + assert_eq!(expected, actual) + } + + #[test] + fn slice_from_null() { + let ptr: *const usize = std::ptr::null(); + let expected: &[usize] = &[]; + let actual: &[usize] = unsafe { Slice::new(ptr, 0).as_slice() }; + assert_eq!(expected, actual); + } + + #[test] + fn test_iterator() { + let slice: &[i32] = &[1, 2, 3]; + let slice = Slice::from(slice); + + let mut iter = unsafe { slice.into_slice() }.iter(); + + assert_eq!(Some(&1), iter.next()); + assert_eq!(Some(&2), iter.next()); + assert_eq!(Some(&3), iter.next()); + } +} diff --git a/ddprof-ffi/src/vec.rs b/ddprof-ffi/src/vec.rs index e2b3feb..b51fd6d 100644 --- a/ddprof-ffi/src/vec.rs +++ b/ddprof-ffi/src/vec.rs @@ -66,7 +66,7 @@ impl<'a, T> IntoIterator for &'a Vec { type IntoIter = core::slice::Iter<'a, T>; fn into_iter(self) -> Self::IntoIter { - self.as_slice().into_slice().iter() + unsafe { self.as_slice().into_slice() }.iter() } } @@ -95,7 +95,18 @@ impl Vec { } pub fn as_slice(&self) -> Slice { - Slice::new(self.ptr, self.len) + unsafe { Slice::new(self.ptr, self.len) } + } + + pub fn iter(&self) -> std::slice::Iter { + unsafe { self.as_slice().into_slice() }.iter() + } + + pub fn last(&self) -> Option<&T> { + if self.len == 0 { + return None; + } + unsafe { self.ptr.add(self.len - 1).as_ref() } } } @@ -134,11 +145,11 @@ mod test { assert_eq!(ffi_vec.len(), 2); assert!(ffi_vec.capacity >= 2); - let slice = ffi_vec.as_slice(); - let first = unsafe { *(slice.ptr) }; - let second = unsafe { *(slice.ptr).add(1) }; - assert_eq!(first, 1); - assert_eq!(second, 2); + let slice = unsafe { ffi_vec.as_slice().as_slice() }; + let first = slice.get(0).unwrap(); + let second = slice.get(1).unwrap(); + assert_eq!(first, &1); + assert_eq!(second, &2); } #[test]