diff --git a/Changelog.md b/Changelog.md index 4d712574..45d4100a 100644 --- a/Changelog.md +++ b/Changelog.md @@ -20,6 +20,10 @@ MSRV bumped to 1.56! Crate now uses Rust 2021 edition. ### Bug Fixes +- [#660]: Fixed incorrect deserialization of `xs:list`s from empty tags (`` + or ``). Previously an `DeError::UnexpectedEof")` was returned in that case +- [#580]: Fixed incorrect deserialization of vectors of newtypes from sequences of tags. + ### Misc Changes - [#643]: Bumped MSRV to 1.56. In practice the previous MSRV was incorrect in many cases. @@ -32,11 +36,13 @@ MSRV bumped to 1.56! Crate now uses Rust 2021 edition. (and newly added `ElementWriter::write_inner_content_async` of course). [#545]: https://github.com/tafia/quick-xml/pull/545 +[#580]: https://github.com/tafia/quick-xml/issues/580 [#619]: https://github.com/tafia/quick-xml/issues/619 [#635]: https://github.com/tafia/quick-xml/pull/635 [#643]: https://github.com/tafia/quick-xml/pull/643 [#649]: https://github.com/tafia/quick-xml/pull/646 [#651]: https://github.com/tafia/quick-xml/pull/651 +[#660]: https://github.com/tafia/quick-xml/pull/660 ## 0.30.0 -- 2023-07-23 diff --git a/src/de/map.rs b/src/de/map.rs index c8ac059b..2202194d 100644 --- a/src/de/map.rs +++ b/src/de/map.rs @@ -373,6 +373,52 @@ macro_rules! forward { /// A deserializer for a value of map or struct. That deserializer slightly /// differently processes events for a primitive types and sequences than /// a [`Deserializer`]. +/// +/// This deserializer can see two kind of events at the start: +/// - [`DeEvent::Text`] +/// - [`DeEvent::Start`] +/// +/// which represents two possible variants of items: +/// ```xml +/// A tag item +/// A text item +/// +/// ``` +/// +/// This deserializer are very similar to a [`SeqItemDeserializer`]. The only difference +/// in the `deserialize_seq` method. This deserializer will act as an iterator +/// over tags / text within it's parent tag, whereas the [`SeqItemDeserializer`] +/// will represent sequences as an `xs:list`. +/// +/// This deserializer processes items as following: +/// - primitives (numbers, booleans, strings, characters) are deserialized either +/// from a text content, or unwrapped from a one level of a tag. So, `123` and +/// `123` both can be deserialized into an `u32`; +/// - `Option`: +/// - empty text of [`DeEvent::Text`] is deserialized as `None`; +/// - everything else are deserialized as `Some` using the same deserializer, +/// including `` or ``; +/// - units (`()`) and unit structs consumes the whole text or element subtree; +/// - newtype structs are deserialized by forwarding deserialization of inner type +/// with the same deserializer; +/// - sequences, tuples and tuple structs are deserialized by iterating within the +/// parent tag and deserializing each tag or text content using [`SeqItemDeserializer`]; +/// - structs and maps are deserialized using new instance of [`MapAccess`]; +/// - enums: +/// - in case of [`DeEvent::Text`] event the text content is deserialized as +/// a `$text` variant. Enum content is deserialized from the text using +/// [`SimpleTypeDeserializer`]; +/// - in case of [`DeEvent::Start`] event the tag name is deserialized as +/// an enum tag, and the content inside are deserialized as an enum content. +/// Depending on a variant kind deserialization is performed as: +/// - unit variants: consuming text content or a subtree; +/// - newtype variants: forward deserialization to the inner type using +/// this deserializer; +/// - tuple variants: call [`deserialize_tuple`] of this deserializer; +/// - struct variants: call [`deserialize_struct`] of this deserializer. +/// +/// [`deserialize_tuple`]: #method.deserialize_tuple +/// [`deserialize_struct`]: #method.deserialize_struct struct MapValueDeserializer<'de, 'a, 'm, R, E> where R: XmlRead<'de>, @@ -485,7 +531,6 @@ where forward!(deserialize_unit); - forward!(deserialize_map); forward!(deserialize_struct( name: &'static str, fields: &'static [&'static str] @@ -497,7 +542,6 @@ where )); forward!(deserialize_any); - forward!(deserialize_ignored_any); fn deserialize_option(self, visitor: V) -> Result where @@ -506,6 +550,19 @@ where deserialize_option!(self.map.de, self, visitor) } + /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] + /// with the same deserializer. + fn deserialize_newtype_struct( + self, + _name: &'static str, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + /// Deserializes each `` in /// ```xml /// @@ -716,7 +773,59 @@ where //////////////////////////////////////////////////////////////////////////////////////////////////// -/// A deserializer for a single item of a sequence. +/// A deserializer for a single item of a mixed sequence of tags and text. +/// +/// This deserializer can see two kind of events at the start: +/// - [`DeEvent::Text`] +/// - [`DeEvent::Start`] +/// +/// which represents two possible variants of items: +/// ```xml +/// A tag item +/// A text item +/// +/// ``` +/// +/// This deserializer are very similar to a [`MapValueDeserializer`]. The only difference +/// in the `deserialize_seq` method. This deserializer will perform deserialization +/// from the textual content (the text itself in case of [`DeEvent::Text`] event +/// and the text between tags in case of [`DeEvent::Start`] event), whereas +/// the [`MapValueDeserializer`] will iterate over tags / text within it's parent tag. +/// +/// This deserializer processes items as following: +/// - primitives (numbers, booleans, strings, characters) are deserialized either +/// from a text content, or unwrapped from a one level of a tag. So, `123` and +/// `123` both can be deserialized into an `u32`; +/// - `Option`: +/// - empty text of [`DeEvent::Text`] is deserialized as `None`; +/// - everything else are deserialized as `Some` using the same deserializer, +/// including `` or ``; +/// - units (`()`) and unit structs consumes the whole text or element subtree; +/// - newtype structs are deserialized as tuple structs with one element; +/// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`] +/// (this is the difference): +/// - in case of [`DeEvent::Text`] event text content passed to the deserializer directly; +/// - in case of [`DeEvent::Start`] event the start and end tags are stripped, +/// and text between them is passed to [`SimpleTypeDeserializer`]. If the tag +/// contains something else other than text, an error is returned, but if it +/// contains a text and something else (for example, `text`), +/// then the trail is just ignored; +/// - structs and maps are deserialized using new [`MapAccess`]; +/// - enums: +/// - in case of [`DeEvent::Text`] event the text content is deserialized as +/// a `$text` variant. Enum content is deserialized from the text using +/// [`SimpleTypeDeserializer`]; +/// - in case of [`DeEvent::Start`] event the tag name is deserialized as +/// an enum tag, and the content inside are deserialized as an enum content. +/// Depending on a variant kind deserialization is performed as: +/// - unit variants: consuming text content or a subtree; +/// - newtype variants: forward deserialization to the inner type using +/// this deserializer; +/// - tuple variants: deserialize it as an `xs:list`; +/// - struct variants: call [`deserialize_struct`] of this deserializer. +/// +/// [`deserialize_tuple`]: #method.deserialize_tuple +/// [`deserialize_struct`]: #method.deserialize_struct struct SeqItemDeserializer<'de, 'a, 'm, R, E> where R: XmlRead<'de>, @@ -754,7 +863,6 @@ where forward!(deserialize_unit); - forward!(deserialize_map); forward!(deserialize_struct( name: &'static str, fields: &'static [&'static str] @@ -766,7 +874,6 @@ where )); forward!(deserialize_any); - forward!(deserialize_ignored_any); fn deserialize_option(self, visitor: V) -> Result where @@ -775,6 +882,20 @@ where deserialize_option!(self.map.de, self, visitor) } + /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] + /// with the [`SimpleTypeDeserializer`]. + fn deserialize_newtype_struct( + mut self, + _name: &'static str, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + let text = self.read_string()?; + visitor.visit_newtype_struct(SimpleTypeDeserializer::from_text(text)) + } + /// This method deserializes a sequence inside of element that itself is a /// sequence element: /// @@ -787,34 +908,12 @@ where /// ... /// /// ``` - fn deserialize_seq(self, visitor: V) -> Result + fn deserialize_seq(mut self, visitor: V) -> Result where V: Visitor<'de>, { - match self.map.de.next()? { - DeEvent::Text(e) => { - SimpleTypeDeserializer::from_text_content(e).deserialize_seq(visitor) - } - // This is a sequence element. We cannot treat it as another flatten - // sequence if type will require `deserialize_seq` We instead forward - // it to `xs:simpleType` implementation - DeEvent::Start(e) => { - let value = match self.map.de.next()? { - DeEvent::Text(e) => { - SimpleTypeDeserializer::from_text_content(e).deserialize_seq(visitor) - } - e => Err(DeError::Unsupported( - format!("unsupported event {:?}", e).into(), - )), - }; - // TODO: May be assert that here we expect only matching closing tag? - self.map.de.read_to_end(e.name())?; - value - } - // SAFETY: we use that deserializer only when Start(element) or Text - // event was peeked already - _ => unreachable!(), - } + let text = self.read_string()?; + SimpleTypeDeserializer::from_text(text).deserialize_seq(visitor) } #[inline] diff --git a/src/de/mod.rs b/src/de/mod.rs index c3068b9e..eb60972d 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -1904,18 +1904,6 @@ macro_rules! deserialize_primitives { self.deserialize_unit(visitor) } - /// Representation of the newtypes the same as one-element [tuple](#method.deserialize_tuple). - fn deserialize_newtype_struct( - self, - _name: &'static str, - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - self.deserialize_tuple(1, visitor) - } - /// Representation of tuples the same as [sequences](#method.deserialize_seq). fn deserialize_tuple(self, _len: usize, visitor: V) -> Result where @@ -1937,6 +1925,16 @@ macro_rules! deserialize_primitives { self.deserialize_tuple(len, visitor) } + /// Forwards deserialization to the [`deserialize_struct`](#method.deserialize_struct) + /// with empty name and fields. + #[inline] + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_struct("", &[], visitor) + } + /// Identifiers represented as [strings](#method.deserialize_str). fn deserialize_identifier(self, visitor: V) -> Result where @@ -1944,6 +1942,15 @@ macro_rules! deserialize_primitives { { self.deserialize_str(visitor) } + + /// Forwards deserialization to the [`deserialize_unit`](#method.deserialize_unit). + #[inline] + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_unit(visitor) + } }; } @@ -2820,30 +2827,36 @@ where } } - fn deserialize_enum( + /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] + /// with the same deserializer. + fn deserialize_newtype_struct( self, _name: &'static str, - _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { - visitor.visit_enum(var::EnumAccess::new(self)) + visitor.visit_newtype_struct(self) } - fn deserialize_seq(self, visitor: V) -> Result + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result where V: Visitor<'de>, { - visitor.visit_seq(self) + visitor.visit_enum(var::EnumAccess::new(self)) } - fn deserialize_map(self, visitor: V) -> Result + fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { - self.deserialize_struct("", &[], visitor) + visitor.visit_seq(self) } fn deserialize_option(self, visitor: V) -> Result @@ -2853,39 +2866,13 @@ where deserialize_option!(self, self, visitor) } - /// Always call `visitor.visit_unit()` because returned value ignored in any case. - /// - /// This method consumes any single [event][DeEvent] except the [`Start`] - /// event, in which case all events up to and including corresponding [`End`] - /// event will be consumed. - /// - /// This method returns error if current event is [`End`] or [`Eof`]. - /// - /// [`Start`]: DeEvent::Start - /// [`End`]: DeEvent::End - /// [`Eof`]: DeEvent::Eof - fn deserialize_ignored_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - match self.next()? { - DeEvent::Start(e) => self.read_to_end(e.name())?, - DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())), - DeEvent::Eof => return Err(DeError::UnexpectedEof), - _ => (), - } - visitor.visit_unit() - } - fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { match self.peek()? { - DeEvent::Start(_) => self.deserialize_map(visitor), - // Redirect to deserialize_unit in order to consume an event and return an appropriate error - DeEvent::End(_) | DeEvent::Eof => self.deserialize_unit(visitor), - _ => self.deserialize_string(visitor), + DeEvent::Text(_) => self.deserialize_str(visitor), + _ => self.deserialize_map(visitor), } } } diff --git a/src/de/simple_type.rs b/src/de/simple_type.rs index 0335722a..e8968e1d 100644 --- a/src/de/simple_type.rs +++ b/src/de/simple_type.rs @@ -495,13 +495,17 @@ pub struct SimpleTypeDeserializer<'de, 'a> { impl<'de, 'a> SimpleTypeDeserializer<'de, 'a> { /// Creates a deserializer from a value, that possible borrowed from input - pub fn from_text_content(value: Text<'de>) -> Self { - let content = match value.text { + pub fn from_text(text: Cow<'de, str>) -> Self { + let content = match text { Cow::Borrowed(slice) => CowRef::Input(slice.as_bytes()), Cow::Owned(content) => CowRef::Owned(content.into_bytes()), }; Self::new(content, false, Decoder::utf8()) } + /// Creates a deserializer from a value, that possible borrowed from input + pub fn from_text_content(value: Text<'de>) -> Self { + Self::from_text(value.text) + } /// Creates a deserializer from a part of value at specified range #[allow(clippy::ptr_arg)] diff --git a/tests/serde-de-seq.rs b/tests/serde-de-seq.rs index bddb8069..ca6fa341 100644 --- a/tests/serde-de-seq.rs +++ b/tests/serde-de-seq.rs @@ -802,6 +802,15 @@ mod fixed_name { use super::*; use pretty_assertions::assert_eq; + #[derive(Debug, Deserialize, PartialEq)] + struct List { + /// Outer list mapped to elements, inner -- to `xs:list`. + /// + /// `#[serde(default)]` is not required, because correct + /// XML will always contains at least 1 element. + item: [Vec; 1], + } + /// Special case: zero elements #[test] fn zero() { @@ -832,15 +841,6 @@ mod fixed_name { /// Special case: one element #[test] fn one() { - #[derive(Debug, Deserialize, PartialEq)] - struct List { - /// Outer list mapped to elements, inner -- to `xs:list`. - /// - /// `#[serde(default)]` is not required, because correct - /// XML will always contains at least 1 element. - item: [Vec; 1], - } - let data: List = from_str( r#" @@ -858,6 +858,21 @@ mod fixed_name { ); } + /// Special case: empty `xs:list` + #[test] + fn empty() { + let data: List = from_str( + r#" + + + + "#, + ) + .unwrap(); + + assert_eq!(data, List { item: [vec![]] }); + } + /// Special case: outer list is always mapped to an elements sequence, /// not to an `xs:list` #[test] @@ -1669,6 +1684,21 @@ mod fixed_name { ); } + /// Special case: empty `xs:list` + #[test] + fn empty() { + let data: List = from_str( + r#" + + + + "#, + ) + .unwrap(); + + assert_eq!(data, List { item: vec![vec![]] }); + } + /// Special case: outer list is always mapped to an elements sequence, /// not to an `xs:list` #[test] @@ -2866,6 +2896,16 @@ mod variable_name { use super::*; use pretty_assertions::assert_eq; + #[derive(Debug, Deserialize, PartialEq)] + struct List { + /// Outer list mapped to elements, inner -- to `xs:list`. + /// + /// `#[serde(default)]` is not required, because correct + /// XML will always contains at least 1 element. + #[serde(rename = "$value")] + element: [Vec; 1], + } + /// Special case: zero elements #[test] fn zero() { @@ -2897,16 +2937,6 @@ mod variable_name { /// Special case: one element #[test] fn one() { - #[derive(Debug, Deserialize, PartialEq)] - struct List { - /// Outer list mapped to elements, inner -- to `xs:list`. - /// - /// `#[serde(default)]` is not required, because correct - /// XML will always contains at least 1 element. - #[serde(rename = "$value")] - element: [Vec; 1], - } - let data: List = from_str( r#" @@ -2924,6 +2954,21 @@ mod variable_name { ); } + /// Special case: empty `xs:list` + #[test] + fn empty() { + let data: List = from_str( + r#" + + + + "#, + ) + .unwrap(); + + assert_eq!(data, List { element: [vec![]] }); + } + /// Special case: outer list is always mapped to an elements sequence, /// not to an `xs:list` #[test] @@ -4008,6 +4053,26 @@ mod variable_name { ); } + /// Special case: empty `xs:list` + #[test] + fn empty() { + let data: List = from_str( + r#" + + + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + element: vec![vec![]] + } + ); + } + /// Special case: outer list is always mapped to an elements sequence, /// not to an `xs:list` #[test] diff --git a/tests/serde-issues.rs b/tests/serde-issues.rs index 62c38500..188c3fea 100644 --- a/tests/serde-issues.rs +++ b/tests/serde-issues.rs @@ -5,7 +5,7 @@ use pretty_assertions::assert_eq; use quick_xml::de::from_str; use quick_xml::se::{to_string, to_string_with_root}; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize}; use std::collections::HashMap; /// Regression tests for https://github.com/tafia/quick-xml/issues/252. @@ -377,3 +377,41 @@ fn issue540() { "" ); } + +/// Regression test for https://github.com/tafia/quick-xml/issues/580. +#[test] +fn issue580() { + #[derive(Debug, Deserialize, PartialEq, Eq)] + struct Seq { + #[serde(rename = "$value")] + items: Vec, + } + + #[derive(Debug, Deserialize, PartialEq, Eq)] + struct Wrapper(#[serde(deserialize_with = "Item::parse")] Item); + + #[derive(Debug, PartialEq, Eq)] + struct Item; + impl Item { + fn parse<'de, D>(_deserializer: D) -> Result + where + D: Deserializer<'de>, + { + Ok(Item) + } + } + + assert_eq!( + from_str::( + r#" + + + + "# + ) + .unwrap(), + Seq { + items: vec![Wrapper(Item), Wrapper(Item)], + } + ); +}