diff --git a/Changelog.md b/Changelog.md index d0172282..aa9ecb9b 100644 --- a/Changelog.md +++ b/Changelog.md @@ -39,6 +39,7 @@ MSRV bumped to 1.56! Crate now uses Rust 2021 edition. - [#619]: Allow to raise application errors in `ElementWriter::write_inner_content` (and newly added `ElementWriter::write_inner_content_async` of course). - [#662]: Get rid of some allocations during serde deserialization. +- [#665]: Improve serialization of `xs:list`s when some elements serialized to an empty string. [#545]: https://github.com/tafia/quick-xml/pull/545 [#567]: https://github.com/tafia/quick-xml/issues/567 @@ -51,6 +52,7 @@ MSRV bumped to 1.56! Crate now uses Rust 2021 edition. [#660]: https://github.com/tafia/quick-xml/pull/660 [#661]: https://github.com/tafia/quick-xml/pull/661 [#662]: https://github.com/tafia/quick-xml/pull/662 +[#665]: https://github.com/tafia/quick-xml/pull/665 ## 0.30.0 -- 2023-07-23 diff --git a/src/de/key.rs b/src/de/key.rs index 2a356741..2645d906 100644 --- a/src/de/key.rs +++ b/src/de/key.rs @@ -315,6 +315,9 @@ mod tests { #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Newtype(String); + #[derive(Debug, Deserialize, Serialize, PartialEq)] + struct Tuple((), ()); + #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Struct { key: String, @@ -459,8 +462,8 @@ mod tests { => Custom("invalid type: string \"name\", expected a sequence")); err!(tuple: ((), ()) = "name" => Custom("invalid type: string \"name\", expected a tuple of size 2")); - err!(tuple_struct: ((), ()) = "name" - => Custom("invalid type: string \"name\", expected a tuple of size 2")); + err!(tuple_struct: Tuple = "name" + => Custom("invalid type: string \"name\", expected tuple struct Tuple")); err!(map: HashMap<(), ()> = "name" => Custom("invalid type: string \"name\", expected a map")); diff --git a/src/de/simple_type.rs b/src/de/simple_type.rs index e8968e1d..32d7585d 100644 --- a/src/de/simple_type.rs +++ b/src/de/simple_type.rs @@ -881,6 +881,9 @@ mod tests { #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Newtype(String); + #[derive(Debug, Deserialize, Serialize, PartialEq)] + struct Tuple((), ()); + #[derive(Debug, Deserialize, Serialize, PartialEq)] struct BorrowedNewtype<'a>(&'a str); @@ -950,15 +953,17 @@ mod tests { assert_eq!(data, $result); // Roundtrip to ensure that serializer corresponds to deserializer - assert_eq!( - data.serialize(AtomicSerializer { - writer: String::new(), + let mut buffer = String::new(); + let has_written = data + .serialize(AtomicSerializer { + writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, + indent: Some(Indent::None), }) - .unwrap(), - $input - ); + .unwrap(); + assert_eq!(buffer, $input); + assert_eq!(has_written, !buffer.is_empty()); } }; } @@ -1040,7 +1045,7 @@ mod tests { => Unsupported("sequences are not supported as `xs:list` items")); err!(tuple: ((), ()) = "non-escaped string" => Unsupported("tuples are not supported as `xs:list` items")); - err!(tuple_struct: ((), ()) = "non-escaped string" + err!(tuple_struct: Tuple = "non-escaped string" => Unsupported("tuples are not supported as `xs:list` items")); err!(map: HashMap<(), ()> = "non-escaped string" diff --git a/src/se/content.rs b/src/se/content.rs index 7a1973bb..376f55ac 100644 --- a/src/se/content.rs +++ b/src/se/content.rs @@ -338,12 +338,12 @@ impl<'w, 'i, W: Write> SerializeTuple for ContentSerializer<'w, 'i, W> { where T: ?Sized + Serialize, { - ::serialize_element(self, value) + SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { - ::end(self) + SerializeSeq::end(self) } } @@ -356,12 +356,12 @@ impl<'w, 'i, W: Write> SerializeTupleStruct for ContentSerializer<'w, 'i, W> { where T: ?Sized + Serialize, { - ::serialize_element(self, value) + SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { - ::end(self) + SerializeSeq::end(self) } } diff --git a/src/se/element.rs b/src/se/element.rs index 75be2da9..f5a3e187 100644 --- a/src/se/element.rs +++ b/src/se/element.rs @@ -265,12 +265,12 @@ impl<'w, 'k, W: Write> SerializeTuple for ElementSerializer<'w, 'k, W> { where T: ?Sized + Serialize, { - ::serialize_element(self, value) + SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { - ::end(self) + SerializeSeq::end(self) } } @@ -283,12 +283,12 @@ impl<'w, 'k, W: Write> SerializeTupleStruct for ElementSerializer<'w, 'k, W> { where T: ?Sized + Serialize, { - ::serialize_element(self, value) + SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { - ::end(self) + SerializeSeq::end(self) } } @@ -314,16 +314,16 @@ impl<'w, 'k, W: Write> SerializeTupleVariant for Tuple<'w, 'k, W> { T: ?Sized + Serialize, { match self { - Tuple::Element(ser) => SerializeTuple::serialize_element(ser, value), - Tuple::Text(ser) => SerializeTuple::serialize_element(ser, value), + Self::Element(ser) => SerializeTuple::serialize_element(ser, value), + Self::Text(ser) => SerializeTuple::serialize_element(ser, value), } } #[inline] fn end(self) -> Result { match self { - Tuple::Element(ser) => SerializeTuple::end(ser), - Tuple::Text(ser) => SerializeTuple::end(ser).map(|_| ()), + Self::Element(ser) => SerializeTuple::end(ser), + Self::Text(ser) => SerializeTuple::end(ser).map(|_| ()), } } } @@ -461,12 +461,12 @@ impl<'w, 'k, W: Write> SerializeStructVariant for Struct<'w, 'k, W> { where T: ?Sized + Serialize, { - ::serialize_field(self, key, value) + SerializeStruct::serialize_field(self, key, value) } #[inline] fn end(self) -> Result { - ::end(self) + SerializeStruct::end(self) } } diff --git a/src/se/mod.rs b/src/se/mod.rs index 06b80044..bbd3ce2e 100644 --- a/src/se/mod.rs +++ b/src/se/mod.rs @@ -390,8 +390,12 @@ impl<'n> XmlName<'n> { //////////////////////////////////////////////////////////////////////////////////////////////////// pub(crate) enum Indent<'i> { + /// No indent should be written before the element None, + /// The specified indent should be written. The type owns the buffer with indent Owned(Indentation), + /// The specified indent should be written. The type borrows buffer with indent + /// from its owner Borrow(&'i mut Indentation), } diff --git a/src/se/simple_type.rs b/src/se/simple_type.rs index 58a4ca3b..dad26b1b 100644 --- a/src/se/simple_type.rs +++ b/src/se/simple_type.rs @@ -150,6 +150,15 @@ fn escape_list(value: &str, target: QuoteTarget, level: QuoteLevel) -> Cow //////////////////////////////////////////////////////////////////////////////////////////////////// +macro_rules! write_atomic { + ($method:ident ( $ty:ty )) => { + fn $method(mut self, value: $ty) -> Result { + self.write_str(&value.to_string())?; + Ok(true) + } + }; +} + /// A serializer that handles ordinary [simple type definition][item] with /// `{variety} = atomic`, or an ordinary [simple type] definition with /// `{variety} = union` whose basic members are all atomic. @@ -166,25 +175,35 @@ fn escape_list(value: &str, target: QuoteTarget, level: QuoteLevel) -> Cow /// /// Serialization of all other types returns [`Unsupported`][DeError::Unsupported] error. /// +/// This serializer returns `true` if something was written and `false` otherwise. +/// /// [item]: https://www.w3.org/TR/xmlschema11-1/#std-item_type_definition /// [simple type]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition -pub struct AtomicSerializer { +pub struct AtomicSerializer<'i, W: Write> { pub writer: W, pub target: QuoteTarget, /// Defines which XML characters need to be escaped pub level: QuoteLevel, + /// When `Some`, the indent that should be written before the content + /// if content is not an empty string. + /// When `None` an `xs:list` delimiter (a space) should be written + pub(crate) indent: Option>, } -impl AtomicSerializer { +impl<'i, W: Write> AtomicSerializer<'i, W> { fn write_str(&mut self, value: &str) -> Result<(), DeError> { - Ok(self - .writer - .write_str(&escape_item(value, self.target, self.level))?) + if let Some(indent) = self.indent.as_mut() { + indent.write_indent(&mut self.writer)?; + } else { + // TODO: Customization point -- possible non-XML compatible extension to specify delimiter char + self.writer.write_char(' ')?; + } + Ok(self.writer.write_str(value)?) } } -impl Serializer for AtomicSerializer { - type Ok = W; +impl<'i, W: Write> Serializer for AtomicSerializer<'i, W> { + type Ok = bool; type Error = DeError; type SerializeSeq = Impossible; @@ -195,11 +214,53 @@ impl Serializer for AtomicSerializer { type SerializeStruct = Impossible; type SerializeStructVariant = Impossible; - write_primitive!(); + fn serialize_bool(mut self, value: bool) -> Result { + self.write_str(if value { "true" } else { "false" })?; + Ok(true) + } + + write_atomic!(serialize_i8(i8)); + write_atomic!(serialize_i16(i16)); + write_atomic!(serialize_i32(i32)); + write_atomic!(serialize_i64(i64)); + + write_atomic!(serialize_u8(u8)); + write_atomic!(serialize_u16(u16)); + write_atomic!(serialize_u32(u32)); + write_atomic!(serialize_u64(u64)); + + serde_if_integer128! { + write_atomic!(serialize_i128(i128)); + write_atomic!(serialize_u128(u128)); + } + + write_atomic!(serialize_f32(f32)); + write_atomic!(serialize_f64(f64)); + + fn serialize_char(self, value: char) -> Result { + self.serialize_str(&value.to_string()) + } fn serialize_str(mut self, value: &str) -> Result { - self.write_str(value)?; - Ok(self.writer) + if !value.is_empty() { + self.write_str(&escape_item(value, self.target, self.level))?; + } + Ok(!value.is_empty()) + } + + fn serialize_bytes(self, _value: &[u8]) -> Result { + //TODO: Customization point - allow user to decide how to encode bytes + Err(DeError::Unsupported( + "`serialize_bytes` not supported yet".into(), + )) + } + + fn serialize_none(self) -> Result { + Ok(false) + } + + fn serialize_some(self, value: &T) -> Result { + value.serialize(self) } /// We cannot store anything, so the absence of a unit and presence of it @@ -222,6 +283,23 @@ impl Serializer for AtomicSerializer { )) } + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + ) -> Result { + self.serialize_str(variant) + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + value: &T, + ) -> Result { + value.serialize(self) + } + /// We cannot store both a variant discriminant and a variant value, /// so serialization of enum newtype variant returns `Err(Unsupported)` fn serialize_newtype_variant( @@ -337,9 +415,7 @@ pub struct SimpleTypeSerializer<'i, W: Write> { impl<'i, W: Write> SimpleTypeSerializer<'i, W> { fn write_str(&mut self, value: &str) -> Result<(), DeError> { self.indent.write_indent(&mut self.writer)?; - Ok(self - .writer - .write_str(&escape_list(value, self.target, self.level))?) + Ok(self.writer.write_str(value)?) } } @@ -358,10 +434,9 @@ impl<'i, W: Write> Serializer for SimpleTypeSerializer<'i, W> { write_primitive!(); fn serialize_str(mut self, value: &str) -> Result { - if value.is_empty() { - self.indent = Indent::None; + if !value.is_empty() { + self.write_str(&escape_list(value, self.target, self.level))?; } - self.write_str(value)?; Ok(self.writer) } @@ -395,8 +470,8 @@ impl<'i, W: Write> Serializer for SimpleTypeSerializer<'i, W> { writer: self.writer, target: self.target, level: self.level, - first: true, indent: self.indent, + is_empty: true, }) } @@ -464,10 +539,10 @@ pub struct SimpleSeq<'i, W: Write> { writer: W, target: QuoteTarget, level: QuoteLevel, - /// If `true`, nothing was written yet - first: bool, /// Indent that should be written before the content if content is not an empty string indent: Indent<'i>, + /// If `true`, nothing was written yet to the `writer` + is_empty: bool, } impl<'i, W: Write> SerializeSeq for SimpleSeq<'i, W> { @@ -479,18 +554,19 @@ impl<'i, W: Write> SerializeSeq for SimpleSeq<'i, W> { T: ?Sized + Serialize, { // Write indent for the first element and delimiter for others - //FIXME: sequence with only empty strings will be serialized as indent only + delimiters - if self.first { - self.indent.write_indent(&mut self.writer)?; + let indent = if self.is_empty { + Some(self.indent.borrow()) } else { - self.writer.write_char(' ')?; - } - self.first = false; - value.serialize(AtomicSerializer { + None + }; + if value.serialize(AtomicSerializer { writer: &mut self.writer, target: self.target, level: self.level, - })?; + indent, + })? { + self.is_empty = false; + } Ok(()) } @@ -509,12 +585,12 @@ impl<'i, W: Write> SerializeTuple for SimpleSeq<'i, W> { where T: ?Sized + Serialize, { - ::serialize_element(self, value) + SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { - ::end(self) + SerializeSeq::end(self) } } @@ -527,12 +603,12 @@ impl<'i, W: Write> SerializeTupleStruct for SimpleSeq<'i, W> { where T: ?Sized + Serialize, { - ::serialize_element(self, value) + SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { - ::end(self) + SerializeSeq::end(self) } } @@ -829,14 +905,17 @@ mod tests { ($name:ident: $data:expr => $expected:literal) => { #[test] fn $name() { + let mut buffer = String::new(); let ser = AtomicSerializer { - writer: String::new(), + writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, + indent: Some(Indent::None), }; - let buffer = $data.serialize(ser).unwrap(); + let has_written = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); + assert_eq!(has_written, !buffer.is_empty()); } }; } @@ -852,6 +931,7 @@ mod tests { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, + indent: Some(Indent::None), }; match $data.serialize(ser).unwrap_err() { @@ -1038,7 +1118,7 @@ mod tests { serialize_as!(seq: vec![1, 2, 3] => "1 2 3"); serialize_as!(seq_empty: Vec::::new() => ""); serialize_as!(seq_with_1_empty_str: vec![""] => ""); - serialize_as!(seq_with_2_empty_strs: vec!["", ""] => " "); + serialize_as!(seq_with_2_empty_strs: vec!["", ""] => ""); serialize_as!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> with spaces 3"); serialize_as!(tuple_struct: Tuple("first", 42) => "first 42"); @@ -1052,4 +1132,107 @@ mod tests { err!(enum_struct: Enum::Struct { key: "answer", val: 42 } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as an attribute or text content value")); } + + mod simple_seq { + use super::*; + use crate::writer::Indentation; + use pretty_assertions::assert_eq; + + #[test] + fn empty_seq() { + let mut buffer = String::new(); + let mut indent = Indentation::new(b'*', 2); + indent.grow(); + let ser = SimpleSeq { + writer: &mut buffer, + target: QuoteTarget::Text, + level: QuoteLevel::Full, + indent: Indent::Owned(indent), + is_empty: true, + }; + + SerializeSeq::end(ser).unwrap(); + assert_eq!(buffer, ""); + } + + #[test] + fn all_items_empty() { + let mut buffer = String::new(); + let mut indent = Indentation::new(b'*', 2); + indent.grow(); + let mut ser = SimpleSeq { + writer: &mut buffer, + target: QuoteTarget::Text, + level: QuoteLevel::Full, + indent: Indent::Owned(indent), + is_empty: true, + }; + + SerializeSeq::serialize_element(&mut ser, "").unwrap(); + SerializeSeq::serialize_element(&mut ser, "").unwrap(); + SerializeSeq::serialize_element(&mut ser, "").unwrap(); + SerializeSeq::end(ser).unwrap(); + assert_eq!(buffer, ""); + } + + #[test] + fn some_items_empty1() { + let mut buffer = String::new(); + let mut indent = Indentation::new(b'*', 2); + indent.grow(); + let mut ser = SimpleSeq { + writer: &mut buffer, + target: QuoteTarget::Text, + level: QuoteLevel::Full, + indent: Indent::Owned(indent), + is_empty: true, + }; + + SerializeSeq::serialize_element(&mut ser, "").unwrap(); + SerializeSeq::serialize_element(&mut ser, &1).unwrap(); + SerializeSeq::serialize_element(&mut ser, "").unwrap(); + SerializeSeq::end(ser).unwrap(); + assert_eq!(buffer, "\n**1"); + } + + #[test] + fn some_items_empty2() { + let mut buffer = String::new(); + let mut indent = Indentation::new(b'*', 2); + indent.grow(); + let mut ser = SimpleSeq { + writer: &mut buffer, + target: QuoteTarget::Text, + level: QuoteLevel::Full, + indent: Indent::Owned(indent), + is_empty: true, + }; + + SerializeSeq::serialize_element(&mut ser, &1).unwrap(); + SerializeSeq::serialize_element(&mut ser, "").unwrap(); + SerializeSeq::serialize_element(&mut ser, &2).unwrap(); + SerializeSeq::end(ser).unwrap(); + assert_eq!(buffer, "\n**1 2"); + } + + #[test] + fn items() { + let mut buffer = String::new(); + let mut indent = Indentation::new(b'*', 2); + indent.grow(); + let mut ser = SimpleSeq { + writer: &mut buffer, + target: QuoteTarget::Text, + level: QuoteLevel::Full, + indent: Indent::Owned(indent), + is_empty: true, + }; + + SerializeSeq::serialize_element(&mut ser, &1).unwrap(); + SerializeSeq::serialize_element(&mut ser, &2).unwrap(); + SerializeSeq::serialize_element(&mut ser, &3).unwrap(); + SerializeSeq::end(ser).unwrap(); + assert_eq!(buffer, "\n**1 2 3"); + } + } } diff --git a/tests/serde-se.rs b/tests/serde-se.rs index c91c241e..0ea01a72 100644 --- a/tests/serde-se.rs +++ b/tests/serde-se.rs @@ -384,7 +384,7 @@ mod without_root { serialize_as!(newtype: ExternallyTagged::Newtype(true) => "true"); - serialize_as!(tuple_struct: + serialize_as!(tuple: ExternallyTagged::Tuple(42.0, "answer") => "42\ answer"); @@ -500,7 +500,7 @@ mod without_root { => "\ true\ "); - serialize_as_only!(tuple_struct: + serialize_as_only!(tuple: Root { field: ExternallyTagged::Tuple(42.0, "answer") } => "\ 42\ @@ -589,7 +589,7 @@ mod without_root { true\ \ "); - serialize_as_only!(tuple_struct: + serialize_as_only!(tuple: Root { field: Inner { inner: ExternallyTagged::Tuple(42.0, "answer") } } => "\ \ @@ -681,7 +681,7 @@ mod without_root { => "\ true\ "); - serialize_as!(tuple_struct: + serialize_as!(tuple: Root { field: ExternallyTagged::Tuple(42.0, "answer") } => "\ 42\ @@ -767,7 +767,7 @@ mod without_root { true\ \ "); - serialize_as!(tuple_struct: + serialize_as!(tuple: Root { field: Inner { inner: ExternallyTagged::Tuple(42.0, "answer") } } => "\ \ @@ -860,7 +860,7 @@ mod without_root { Root { field: ExternallyTagged::Newtype(true) } => Unsupported("cannot serialize enum newtype variant `ExternallyTagged::Newtype` as an attribute or text content value"), " Unsupported("cannot serialize enum tuple variant `ExternallyTagged::Tuple` as an attribute or text content value"), " Unsupported("cannot serialize enum newtype variant `ExternallyTagged::Newtype` as an attribute or text content value"), " Unsupported("cannot serialize enum tuple variant `ExternallyTagged::Tuple` as an attribute or text content value"), "Newtype\ true\ "); - serialize_as!(tuple_struct: + serialize_as!(tuple: AdjacentlyTagged::Tuple(42.0, "answer") => "\ Tuple\ @@ -1126,7 +1126,7 @@ mod without_root { => Unsupported("cannot serialize `()` without defined root tag")); err!(newtype: Untagged::Newtype(true) => Unsupported("cannot serialize `bool` without defined root tag")); - err!(tuple_struct: Untagged::Tuple(42.0, "answer") + err!(tuple: Untagged::Tuple(42.0, "answer") => Unsupported("cannot serialize unnamed tuple without defined root tag")); // NOTE: Cannot be deserialized in roundtrip due to // https://github.com/serde-rs/serde/issues/1183 @@ -1328,7 +1328,7 @@ mod without_root { serialize_as!(newtype: ExternallyTagged::Newtype(true) => "true"); - serialize_as!(tuple_struct: + serialize_as!(tuple: ExternallyTagged::Tuple(42.0, "answer") => "42\n\ answer"); @@ -1496,7 +1496,7 @@ mod without_root { Newtype\n \ true\n\ "); - serialize_as!(tuple_struct: + serialize_as!(tuple: AdjacentlyTagged::Tuple(42.0, "answer") => "\n \ Tuple\n \ @@ -1570,7 +1570,7 @@ mod without_root { => Unsupported("cannot serialize `()` without defined root tag")); err!(newtype: Untagged::Newtype(true) => Unsupported("cannot serialize `bool` without defined root tag")); - err!(tuple_struct: Untagged::Tuple(42.0, "answer") + err!(tuple: Untagged::Tuple(42.0, "answer") => Unsupported("cannot serialize unnamed tuple without defined root tag")); serialize_as!(struct_: Untagged::Struct { @@ -1817,7 +1817,7 @@ mod with_root { serialize_as!(newtype: ExternallyTagged::Newtype(true) => "true"); - serialize_as!(tuple_struct: + serialize_as!(tuple: ExternallyTagged::Tuple(42.0, "answer") => "42\ answer"); @@ -2002,7 +2002,7 @@ mod with_root { Newtype\ true\ "); - serialize_as!(tuple_struct: + serialize_as!(tuple: AdjacentlyTagged::Tuple(42.0, "answer") => "\ Tuple\ @@ -2086,7 +2086,7 @@ mod with_root { => "true"); // NOTE: Cannot be deserialized in roundtrip due to // https://github.com/serde-rs/serde/issues/1183 - serialize_as_only!(tuple_struct: + serialize_as_only!(tuple: Untagged::Tuple(42.0, "answer") => "42\ answer");