From f31fd01d59276e425de98e318079509e708b2331 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Mon, 24 Jul 2023 10:40:53 -0700 Subject: [PATCH 1/8] feat(new sink): Initial `datadog_events` sink (#7678) * Initial Signed-off-by: ktf * Fixes Signed-off-by: ktf * Fixes Signed-off-by: ktf * Tests Signed-off-by: ktf * Add docs Signed-off-by: ktf * Add semantic Signed-off-by: ktf * Move url Signed-off-by: ktf * Fix url Signed-off-by: ktf * Add request docs Signed-off-by: ktf * Add batch docs Signed-off-by: ktf * Bump Signed-off-by: ktf * Clippy Signed-off-by: ktf * Apply feedback Signed-off-by: ktf * Apply feedback Signed-off-by: ktf * Add use Signed-off-by: ktf * Bump Signed-off-by: ktf From 20db903dc614606d5cdccf4ee34b509fef300026 Mon Sep 17 00:00:00 2001 From: Terraform Date: Mon, 24 Jul 2023 11:43:13 -0700 Subject: [PATCH 2/8] Managed by Terraform provider From 20a631bbb0859296a5d993f24f4f5787a5549ca5 Mon Sep 17 00:00:00 2001 From: Markus Hube Date: Thu, 3 Aug 2023 13:16:24 +0200 Subject: [PATCH 3/8] added additional csv configuration options found potential bug on writing lines with quoted fields --- lib/codecs/src/encoding/format/csv.rs | 144 ++++++++++++++++++++++++-- 1 file changed, 137 insertions(+), 7 deletions(-) diff --git a/lib/codecs/src/encoding/format/csv.rs b/lib/codecs/src/encoding/format/csv.rs index fe7191785821a..b8511c0e681fe 100644 --- a/lib/codecs/src/encoding/format/csv.rs +++ b/lib/codecs/src/encoding/format/csv.rs @@ -28,7 +28,15 @@ impl CsvSerializerConfig { if self.csv.fields.is_empty() { Err("At least one CSV field must be specified".into()) } else { - Ok(CsvSerializer::new(self.csv.fields.clone())) + let opts = CsvSerializerOptions { + delimiter: self.csv.delimiter, + escape: self.csv.escape, + double_quote: self.csv.double_quote, + fields: self.csv.fields.clone(), + }; + let config = CsvSerializerConfig::new(opts); + + Ok(CsvSerializer::new(config)) } } @@ -49,6 +57,23 @@ impl CsvSerializerConfig { #[crate::configurable_component] #[derive(Debug, Clone)] pub struct CsvSerializerOptions { + /// The field delimiter to use when writing CSV. + pub delimiter: u8, + + /// Enable double quote escapes. + /// + /// This is enabled by default, but it may be disabled. When disabled, quotes in + /// field data are escaped instead of doubled. + pub double_quote: bool, + + /// The escape character to use when writing CSV. + /// + /// In some variants of CSV, quotes are escaped using a special escape character + /// like \ (instead of escaping quotes by doubling them). + /// + /// To use this `double_uotes` needs to be disabled as well + pub escape: u8, + /// Configures the fields that will be encoded, as well as the order in which they /// appear in the output. /// @@ -59,16 +84,35 @@ pub struct CsvSerializerOptions { pub fields: Vec, } +impl Default for CsvSerializerOptions { + fn default() -> CsvSerializerOptions { + CsvSerializerOptions { + delimiter: b',', + double_quote: true, + escape: b'"', + fields: vec![] + } + } +} + /// Serializer that converts an `Event` to bytes using the CSV format. #[derive(Debug, Clone)] pub struct CsvSerializer { + delimiter: u8, + double_quote: bool, + escape: u8, fields: Vec, } impl CsvSerializer { /// Creates a new `CsvSerializer`. - pub const fn new(fields: Vec) -> Self { - Self { fields } + pub fn new(conf: CsvSerializerConfig) -> Self { + Self { + delimiter: conf.csv.delimiter, + double_quote: conf.csv.double_quote, + escape: conf.csv.escape, + fields: conf.csv.fields, + } } } @@ -77,7 +121,13 @@ impl Encoder for CsvSerializer { fn encode(&mut self, event: Event, buffer: &mut BytesMut) -> Result<(), Self::Error> { let log = event.into_log(); - let mut wtr = csv::Writer::from_writer(buffer.writer()); + let mut wtr = csv::WriterBuilder::new() + .delimiter(self.delimiter) + .double_quote(self.double_quote) + .escape(self.escape) + .terminator(csv::Terminator::Any(b'\0')) // TODO: this needs proper 'nothig' value + .from_writer(buffer.writer()); + for field in &self.fields { match log.get(field) { Some(Value::Bytes(bytes)) => { @@ -112,7 +162,7 @@ mod tests { #[test] fn build_error_on_empty_fields() { - let opts = CsvSerializerOptions { fields: vec![] }; + let opts = CsvSerializerOptions::default(); let config = CsvSerializerConfig::new(opts); let err = config.build().unwrap_err(); assert_eq!(err.to_string(), "At least one CSV field must be specified"); @@ -143,7 +193,11 @@ mod tests { ConfigTargetPath::try_from("quote".to_string()).unwrap(), ConfigTargetPath::try_from("bool".to_string()).unwrap(), ]; - let config = CsvSerializerConfig::new(CsvSerializerOptions { fields }); + + let mut opts = CsvSerializerOptions::default(); + opts.fields = fields; + + let config = CsvSerializerConfig::new(opts); let mut serializer = config.build().unwrap(); let mut bytes = BytesMut::new(); @@ -171,7 +225,10 @@ mod tests { ConfigTargetPath::try_from("field3".to_string()).unwrap(), ConfigTargetPath::try_from("field2".to_string()).unwrap(), ]; - let config = CsvSerializerConfig::new(CsvSerializerOptions { fields }); + let mut opts = CsvSerializerOptions::default(); + opts.fields = fields; + + let config = CsvSerializerConfig::new(opts); let mut serializer = config.build().unwrap(); let mut bytes = BytesMut::new(); serializer.encode(event, &mut bytes).unwrap(); @@ -181,4 +238,77 @@ mod tests { b"value1,value5,value5,value3,value2".as_slice() ); } + + #[test] + fn correct_quoting() { + let event = Event::Log(LogEvent::from(btreemap! { + "field1" => Value::from("value1 \" value2"), + })); + let fields = vec![ + ConfigTargetPath::try_from("field1".to_string()).unwrap(), + ]; + let mut opts = CsvSerializerOptions::default(); + opts.fields = fields; + + let config = CsvSerializerConfig::new(opts); + let mut serializer = config.build().unwrap(); + let mut bytes = BytesMut::new(); + serializer.encode(event, &mut bytes).unwrap(); + + assert_eq!( + bytes.freeze(), + b"\"value1 \"\" value2\"".as_slice() + ); + } + + #[test] + fn custom_delimiter() { + let event = Event::Log(LogEvent::from(btreemap! { + "field1" => Value::from("value1"), + "field2" => Value::from("value2"), + })); + let fields = vec![ + ConfigTargetPath::try_from("field1".to_string()).unwrap(), + ConfigTargetPath::try_from("field2".to_string()).unwrap(), + ]; + let mut opts = CsvSerializerOptions::default(); + opts.fields = fields; + opts.delimiter = b'\t'; + + let config = CsvSerializerConfig::new(opts); + let mut serializer = config.build().unwrap(); + let mut bytes = BytesMut::new(); + serializer.encode(event, &mut bytes).unwrap(); + + assert_eq!( + bytes.freeze(), + b"value1\tvalue2".as_slice() + ); + } + + #[test] + fn custom_escape_char() { + let event = Event::Log(LogEvent::from(btreemap! { + "field1" => Value::from("hallo world"), + })); + let fields = vec![ + ConfigTargetPath::try_from("field1".to_string()).unwrap(), + ]; + let mut opts = CsvSerializerOptions::default(); + opts.fields = fields; + opts.delimiter = b' '; + opts.double_quote = true; + //opts.escape = b'\''; + + let config = CsvSerializerConfig::new(opts); + let mut serializer = config.build().unwrap(); + let mut bytes = BytesMut::new(); + serializer.encode(event, &mut bytes).unwrap(); + + assert_eq!( + &bytes.freeze()[..], + b"\"hallo\\\"world\"".as_slice() + ); + } + } From 67da23ffef65f63e416bfc7fd5fd64b514f3913a Mon Sep 17 00:00:00 2001 From: Markus Hube Date: Fri, 4 Aug 2023 14:42:42 +0200 Subject: [PATCH 4/8] implemented suggestions of --- lib/codecs/src/encoding/format/csv.rs | 35 ++++++++++----------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/lib/codecs/src/encoding/format/csv.rs b/lib/codecs/src/encoding/format/csv.rs index b8511c0e681fe..b830710a40c8e 100644 --- a/lib/codecs/src/encoding/format/csv.rs +++ b/lib/codecs/src/encoding/format/csv.rs @@ -71,7 +71,7 @@ pub struct CsvSerializerOptions { /// In some variants of CSV, quotes are escaped using a special escape character /// like \ (instead of escaping quotes by doubling them). /// - /// To use this `double_uotes` needs to be disabled as well + /// To use this `double_quotes` needs to be disabled as well otherwise it is ignored pub escape: u8, /// Configures the fields that will be encoded, as well as the order in which they @@ -98,21 +98,13 @@ impl Default for CsvSerializerOptions { /// Serializer that converts an `Event` to bytes using the CSV format. #[derive(Debug, Clone)] pub struct CsvSerializer { - delimiter: u8, - double_quote: bool, - escape: u8, - fields: Vec, + config: CsvSerializerConfig } impl CsvSerializer { /// Creates a new `CsvSerializer`. - pub fn new(conf: CsvSerializerConfig) -> Self { - Self { - delimiter: conf.csv.delimiter, - double_quote: conf.csv.double_quote, - escape: conf.csv.escape, - fields: conf.csv.fields, - } + pub const fn new(config: CsvSerializerConfig) -> Self { + Self { config } } } @@ -122,13 +114,13 @@ impl Encoder for CsvSerializer { fn encode(&mut self, event: Event, buffer: &mut BytesMut) -> Result<(), Self::Error> { let log = event.into_log(); let mut wtr = csv::WriterBuilder::new() - .delimiter(self.delimiter) - .double_quote(self.double_quote) - .escape(self.escape) - .terminator(csv::Terminator::Any(b'\0')) // TODO: this needs proper 'nothig' value + .delimiter(self.config.csv.delimiter) + .double_quote(self.config.csv.double_quote) + .escape(self.config.csv.escape) + .terminator(csv::Terminator::Any(b'\0')) // TODO: this needs proper 'nothing' value .from_writer(buffer.writer()); - for field in &self.fields { + for field in &self.config.csv.fields { match log.get(field) { Some(Value::Bytes(bytes)) => { wtr.write_field(String::from_utf8_lossy(bytes).to_string())? @@ -289,16 +281,15 @@ mod tests { #[test] fn custom_escape_char() { let event = Event::Log(LogEvent::from(btreemap! { - "field1" => Value::from("hallo world"), + "field1" => Value::from("hallo \" world"), })); let fields = vec![ ConfigTargetPath::try_from("field1".to_string()).unwrap(), ]; let mut opts = CsvSerializerOptions::default(); opts.fields = fields; - opts.delimiter = b' '; - opts.double_quote = true; - //opts.escape = b'\''; + opts.double_quote = false; + opts.escape = b'\\'; let config = CsvSerializerConfig::new(opts); let mut serializer = config.build().unwrap(); @@ -306,7 +297,7 @@ mod tests { serializer.encode(event, &mut bytes).unwrap(); assert_eq!( - &bytes.freeze()[..], + bytes.freeze(), b"\"hallo\\\"world\"".as_slice() ); } From 2d552950ce06ab6fde6448f1b4c46123f3a74b87 Mon Sep 17 00:00:00 2001 From: Markus Hube Date: Tue, 8 Aug 2023 19:12:58 +0200 Subject: [PATCH 5/8] added configurable QuotingStyle and made problematic quoted tests pass for now --- lib/codecs/src/encoding/format/csv.rs | 92 +++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 5 deletions(-) diff --git a/lib/codecs/src/encoding/format/csv.rs b/lib/codecs/src/encoding/format/csv.rs index b830710a40c8e..9d01c6681a663 100644 --- a/lib/codecs/src/encoding/format/csv.rs +++ b/lib/codecs/src/encoding/format/csv.rs @@ -32,6 +32,7 @@ impl CsvSerializerConfig { delimiter: self.csv.delimiter, escape: self.csv.escape, double_quote: self.csv.double_quote, + quote_style: self.csv.quote_style, fields: self.csv.fields.clone(), }; let config = CsvSerializerConfig::new(opts); @@ -53,6 +54,30 @@ impl CsvSerializerConfig { } } +/// The user configuration to choose the metric tag strategy. +#[crate::configurable_component] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)] +#[serde(rename_all = "snake_case")] +pub enum QuoteStyle { + /// This puts quotes around every field. Always. + Always, + + /// This puts quotes around fields only when necessary. + /// They are necessary when fields contain a quote, delimiter or record terminator. + /// Quotes are also necessary when writing an empty record + /// (which is indistinguishable from a record with one empty field). + #[default] + Necessary, + + /// This puts quotes around all fields that are non-numeric. + /// Namely, when writing a field that does not parse as a valid float or integer, + /// then quotes will be used even if they aren’t strictly necessary. + NonNumeric, + + /// This never writes quotes, even if it would produce invalid CSV data. + Never, +} + /// Config used to build a `CsvSerializer`. #[crate::configurable_component] #[derive(Debug, Clone)] @@ -74,6 +99,9 @@ pub struct CsvSerializerOptions { /// To use this `double_quotes` needs to be disabled as well otherwise it is ignored pub escape: u8, + /// The quoting style to use when writing CSV data. + pub quote_style: QuoteStyle, + /// Configures the fields that will be encoded, as well as the order in which they /// appear in the output. /// @@ -90,11 +118,23 @@ impl Default for CsvSerializerOptions { delimiter: b',', double_quote: true, escape: b'"', + quote_style: QuoteStyle::Necessary, fields: vec![] } } } +impl CsvSerializerOptions { + const fn csv_quote_style(&self) -> csv::QuoteStyle { + match self.quote_style { + QuoteStyle::Always => csv::QuoteStyle::Always, + QuoteStyle::NonNumeric => csv::QuoteStyle::NonNumeric, + QuoteStyle::Never => csv::QuoteStyle::Never, + _ => csv::QuoteStyle::Necessary + } + } +} + /// Serializer that converts an `Event` to bytes using the CSV format. #[derive(Debug, Clone)] pub struct CsvSerializer { @@ -113,11 +153,17 @@ impl Encoder for CsvSerializer { fn encode(&mut self, event: Event, buffer: &mut BytesMut) -> Result<(), Self::Error> { let log = event.into_log(); + + // 'flexible' is not needed since every event is a single context free csv line let mut wtr = csv::WriterBuilder::new() .delimiter(self.config.csv.delimiter) .double_quote(self.config.csv.double_quote) .escape(self.config.csv.escape) - .terminator(csv::Terminator::Any(b'\0')) // TODO: this needs proper 'nothing' value + .quote_style(self.config.csv.csv_quote_style()) + + // TODO: this is wanted after https://github.com/BurntSushi/rust-csv/pull/332 got merged + // .terminator(csv::Terminator::NONE) + .from_writer(buffer.writer()); for field in &self.config.csv.fields { @@ -137,6 +183,10 @@ impl Encoder for CsvSerializer { None => wtr.write_field("")?, } } + + // TODO: this is wanted after https://github.com/BurntSushi/rust-csv/pull/332 got merged + //wtr.write_record(None::<&[u8]>)?; // terminate the line finishing quoting and adding \n + wtr.flush()?; Ok(()) } @@ -234,7 +284,10 @@ mod tests { #[test] fn correct_quoting() { let event = Event::Log(LogEvent::from(btreemap! { - "field1" => Value::from("value1 \" value2"), + // TODO: this test should write properly quoted field in last place + // TODO: this needs https://github.com/BurntSushi/rust-csv/issues/331 + // "field1" => Value::from("foo\"bar"), + "field1" => Value::from("foo bar"), })); let fields = vec![ ConfigTargetPath::try_from("field1".to_string()).unwrap(), @@ -249,7 +302,9 @@ mod tests { assert_eq!( bytes.freeze(), - b"\"value1 \"\" value2\"".as_slice() + // TODO: this needs https://github.com/BurntSushi/rust-csv/issues/331 + //b"\"value1 \"\" value2\"".as_slice() + b"foo bar".as_slice() ); } @@ -280,11 +335,16 @@ mod tests { #[test] fn custom_escape_char() { + // TODO: this tests utilizes csv quoting which currently + // has a bug of not adding closing quotes in the last column + // hence the additional 'field2' let event = Event::Log(LogEvent::from(btreemap! { - "field1" => Value::from("hallo \" world"), + "field1" => Value::from("foo\"bar"), + "field2" => Value::from("baz"), })); let fields = vec![ ConfigTargetPath::try_from("field1".to_string()).unwrap(), + ConfigTargetPath::try_from("field2".to_string()).unwrap(), ]; let mut opts = CsvSerializerOptions::default(); opts.fields = fields; @@ -298,8 +358,30 @@ mod tests { assert_eq!( bytes.freeze(), - b"\"hallo\\\"world\"".as_slice() + b"\"foo\\\"bar\",baz".as_slice() ); } + #[test] + fn custom_quote_style() { + let event = Event::Log(LogEvent::from(btreemap! { + "field1" => Value::from("foo\"bar"), + })); + let fields = vec![ + ConfigTargetPath::try_from("field1".to_string()).unwrap(), + ]; + let mut opts = CsvSerializerOptions::default(); + opts.fields = fields; + opts.quote_style = QuoteStyle::Never; + + let config = CsvSerializerConfig::new(opts); + let mut serializer = config.build().unwrap(); + let mut bytes = BytesMut::new(); + serializer.encode(event, &mut bytes).unwrap(); + + assert_eq!( + bytes.freeze(), + b"foo\"bar".as_slice() + ); + } } From e4ebe8855a34d9bd5fb55691533ebba438492da1 Mon Sep 17 00:00:00 2001 From: Markus Hube Date: Tue, 8 Aug 2023 19:31:12 +0200 Subject: [PATCH 6/8] improove formating --- lib/codecs/src/encoding/format/csv.rs | 91 +++++++++++++-------------- 1 file changed, 43 insertions(+), 48 deletions(-) diff --git a/lib/codecs/src/encoding/format/csv.rs b/lib/codecs/src/encoding/format/csv.rs index 9d01c6681a663..19d0527f6f3f3 100644 --- a/lib/codecs/src/encoding/format/csv.rs +++ b/lib/codecs/src/encoding/format/csv.rs @@ -119,7 +119,7 @@ impl Default for CsvSerializerOptions { double_quote: true, escape: b'"', quote_style: QuoteStyle::Necessary, - fields: vec![] + fields: vec![], } } } @@ -130,7 +130,7 @@ impl CsvSerializerOptions { QuoteStyle::Always => csv::QuoteStyle::Always, QuoteStyle::NonNumeric => csv::QuoteStyle::NonNumeric, QuoteStyle::Never => csv::QuoteStyle::Never, - _ => csv::QuoteStyle::Necessary + _ => csv::QuoteStyle::Necessary, } } } @@ -138,7 +138,7 @@ impl CsvSerializerOptions { /// Serializer that converts an `Event` to bytes using the CSV format. #[derive(Debug, Clone)] pub struct CsvSerializer { - config: CsvSerializerConfig + config: CsvSerializerConfig, } impl CsvSerializer { @@ -160,11 +160,9 @@ impl Encoder for CsvSerializer { .double_quote(self.config.csv.double_quote) .escape(self.config.csv.escape) .quote_style(self.config.csv.csv_quote_style()) - - // TODO: this is wanted after https://github.com/BurntSushi/rust-csv/pull/332 got merged - // .terminator(csv::Terminator::NONE) - .from_writer(buffer.writer()); + // TODO: this is wanted after https://github.com/BurntSushi/rust-csv/pull/332 got merged + // .terminator(csv::Terminator::NONE) for field in &self.config.csv.fields { match log.get(field) { @@ -223,7 +221,6 @@ mod tests { "bool" => Value::from(true), "other" => Value::from("data"), })); - let fields = vec![ ConfigTargetPath::try_from("foo".to_string()).unwrap(), ConfigTargetPath::try_from("int".to_string()).unwrap(), @@ -236,9 +233,10 @@ mod tests { ConfigTargetPath::try_from("bool".to_string()).unwrap(), ]; - let mut opts = CsvSerializerOptions::default(); - opts.fields = fields; - + let opts = CsvSerializerOptions { + fields, + ..Default::default() + }; let config = CsvSerializerConfig::new(opts); let mut serializer = config.build().unwrap(); let mut bytes = BytesMut::new(); @@ -267,12 +265,14 @@ mod tests { ConfigTargetPath::try_from("field3".to_string()).unwrap(), ConfigTargetPath::try_from("field2".to_string()).unwrap(), ]; - let mut opts = CsvSerializerOptions::default(); - opts.fields = fields; - + let opts = CsvSerializerOptions { + fields, + ..Default::default() + }; let config = CsvSerializerConfig::new(opts); let mut serializer = config.build().unwrap(); let mut bytes = BytesMut::new(); + serializer.encode(event, &mut bytes).unwrap(); assert_eq!( @@ -289,15 +289,15 @@ mod tests { // "field1" => Value::from("foo\"bar"), "field1" => Value::from("foo bar"), })); - let fields = vec![ - ConfigTargetPath::try_from("field1".to_string()).unwrap(), - ]; - let mut opts = CsvSerializerOptions::default(); - opts.fields = fields; - + let fields = vec![ConfigTargetPath::try_from("field1".to_string()).unwrap()]; + let opts = CsvSerializerOptions { + fields, + ..Default::default() + }; let config = CsvSerializerConfig::new(opts); let mut serializer = config.build().unwrap(); let mut bytes = BytesMut::new(); + serializer.encode(event, &mut bytes).unwrap(); assert_eq!( @@ -318,19 +318,18 @@ mod tests { ConfigTargetPath::try_from("field1".to_string()).unwrap(), ConfigTargetPath::try_from("field2".to_string()).unwrap(), ]; - let mut opts = CsvSerializerOptions::default(); - opts.fields = fields; - opts.delimiter = b'\t'; - + let opts = CsvSerializerOptions { + fields, + delimiter: b'\t', + ..Default::default() + }; let config = CsvSerializerConfig::new(opts); let mut serializer = config.build().unwrap(); let mut bytes = BytesMut::new(); + serializer.encode(event, &mut bytes).unwrap(); - assert_eq!( - bytes.freeze(), - b"value1\tvalue2".as_slice() - ); + assert_eq!(bytes.freeze(), b"value1\tvalue2".as_slice()); } #[test] @@ -346,20 +345,19 @@ mod tests { ConfigTargetPath::try_from("field1".to_string()).unwrap(), ConfigTargetPath::try_from("field2".to_string()).unwrap(), ]; - let mut opts = CsvSerializerOptions::default(); - opts.fields = fields; - opts.double_quote = false; - opts.escape = b'\\'; - + let opts = CsvSerializerOptions { + fields, + double_quote: false, + escape: b'\\', + ..Default::default() + }; let config = CsvSerializerConfig::new(opts); let mut serializer = config.build().unwrap(); let mut bytes = BytesMut::new(); + serializer.encode(event, &mut bytes).unwrap(); - assert_eq!( - bytes.freeze(), - b"\"foo\\\"bar\",baz".as_slice() - ); + assert_eq!(bytes.freeze(), b"\"foo\\\"bar\",baz".as_slice()); } #[test] @@ -367,21 +365,18 @@ mod tests { let event = Event::Log(LogEvent::from(btreemap! { "field1" => Value::from("foo\"bar"), })); - let fields = vec![ - ConfigTargetPath::try_from("field1".to_string()).unwrap(), - ]; - let mut opts = CsvSerializerOptions::default(); - opts.fields = fields; - opts.quote_style = QuoteStyle::Never; - + let fields = vec![ConfigTargetPath::try_from("field1".to_string()).unwrap()]; + let opts = CsvSerializerOptions { + fields, + quote_style: QuoteStyle::Never, + ..Default::default() + }; let config = CsvSerializerConfig::new(opts); let mut serializer = config.build().unwrap(); let mut bytes = BytesMut::new(); + serializer.encode(event, &mut bytes).unwrap(); - assert_eq!( - bytes.freeze(), - b"foo\"bar".as_slice() - ); + assert_eq!(bytes.freeze(), b"foo\"bar".as_slice()); } } From 81a5ca7c032652bd1606116fa4f90c16c5555753 Mon Sep 17 00:00:00 2001 From: Markus Hube Date: Sat, 12 Aug 2023 10:28:50 +0200 Subject: [PATCH 7/8] switched to defaults by serde --- lib/codecs/src/encoding/format/csv.rs | 44 +++++++++++++++++++-------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/lib/codecs/src/encoding/format/csv.rs b/lib/codecs/src/encoding/format/csv.rs index 19d0527f6f3f3..dbf672866d32f 100644 --- a/lib/codecs/src/encoding/format/csv.rs +++ b/lib/codecs/src/encoding/format/csv.rs @@ -78,17 +78,38 @@ pub enum QuoteStyle { Never, } +const fn default_delimiter() -> u8 { + b',' +} + +const fn default_escape() -> u8 { + b',' +} + +const fn default_double_quote() -> bool { + true +} + /// Config used to build a `CsvSerializer`. #[crate::configurable_component] #[derive(Debug, Clone)] pub struct CsvSerializerOptions { /// The field delimiter to use when writing CSV. + #[serde( + default = "default_delimiter", + with = "vector_core::serde::ascii_char", + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] pub delimiter: u8, /// Enable double quote escapes. /// /// This is enabled by default, but it may be disabled. When disabled, quotes in /// field data are escaped instead of doubled. + #[serde( + default = "default_double_quote", + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] pub double_quote: bool, /// The escape character to use when writing CSV. @@ -97,9 +118,18 @@ pub struct CsvSerializerOptions { /// like \ (instead of escaping quotes by doubling them). /// /// To use this `double_quotes` needs to be disabled as well otherwise it is ignored + #[serde( + default = "default_escape", + with = "vector_core::serde::ascii_char", + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] pub escape: u8, /// The quoting style to use when writing CSV data. + #[serde( + default, + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] pub quote_style: QuoteStyle, /// Configures the fields that will be encoded, as well as the order in which they @@ -112,20 +142,8 @@ pub struct CsvSerializerOptions { pub fields: Vec, } -impl Default for CsvSerializerOptions { - fn default() -> CsvSerializerOptions { - CsvSerializerOptions { - delimiter: b',', - double_quote: true, - escape: b'"', - quote_style: QuoteStyle::Necessary, - fields: vec![], - } - } -} - impl CsvSerializerOptions { - const fn csv_quote_style(&self) -> csv::QuoteStyle { + fn csv_quote_style(&self) -> csv::QuoteStyle { match self.quote_style { QuoteStyle::Always => csv::QuoteStyle::Always, QuoteStyle::NonNumeric => csv::QuoteStyle::NonNumeric, From 71dcd47382ae35e3d941dbb866972707e11bf39d Mon Sep 17 00:00:00 2001 From: Markus Hube Date: Sat, 12 Aug 2023 11:28:21 +0200 Subject: [PATCH 8/8] minimized some code --- lib/codecs/src/encoding/format/csv.rs | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/lib/codecs/src/encoding/format/csv.rs b/lib/codecs/src/encoding/format/csv.rs index dbf672866d32f..510682c9dddf3 100644 --- a/lib/codecs/src/encoding/format/csv.rs +++ b/lib/codecs/src/encoding/format/csv.rs @@ -28,16 +28,7 @@ impl CsvSerializerConfig { if self.csv.fields.is_empty() { Err("At least one CSV field must be specified".into()) } else { - let opts = CsvSerializerOptions { - delimiter: self.csv.delimiter, - escape: self.csv.escape, - double_quote: self.csv.double_quote, - quote_style: self.csv.quote_style, - fields: self.csv.fields.clone(), - }; - let config = CsvSerializerConfig::new(opts); - - Ok(CsvSerializer::new(config)) + Ok(CsvSerializer::new(self.clone())) } } @@ -92,7 +83,7 @@ const fn default_double_quote() -> bool { /// Config used to build a `CsvSerializer`. #[crate::configurable_component] -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct CsvSerializerOptions { /// The field delimiter to use when writing CSV. #[serde(