diff --git a/Changelog.md b/Changelog.md
index 4d712574..45d4100a 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -20,6 +20,10 @@ MSRV bumped to 1.56! Crate now uses Rust 2021 edition.
### Bug Fixes
+- [#660]: Fixed incorrect deserialization of `xs:list`s from empty tags (``
+ or ``). Previously an `DeError::UnexpectedEof")` was returned in that case
+- [#580]: Fixed incorrect deserialization of vectors of newtypes from sequences of tags.
+
### Misc Changes
- [#643]: Bumped MSRV to 1.56. In practice the previous MSRV was incorrect in many cases.
@@ -32,11 +36,13 @@ MSRV bumped to 1.56! Crate now uses Rust 2021 edition.
(and newly added `ElementWriter::write_inner_content_async` of course).
[#545]: https://github.com/tafia/quick-xml/pull/545
+[#580]: https://github.com/tafia/quick-xml/issues/580
[#619]: https://github.com/tafia/quick-xml/issues/619
[#635]: https://github.com/tafia/quick-xml/pull/635
[#643]: https://github.com/tafia/quick-xml/pull/643
[#649]: https://github.com/tafia/quick-xml/pull/646
[#651]: https://github.com/tafia/quick-xml/pull/651
+[#660]: https://github.com/tafia/quick-xml/pull/660
## 0.30.0 -- 2023-07-23
diff --git a/src/de/map.rs b/src/de/map.rs
index c8ac059b..2202194d 100644
--- a/src/de/map.rs
+++ b/src/de/map.rs
@@ -373,6 +373,52 @@ macro_rules! forward {
/// A deserializer for a value of map or struct. That deserializer slightly
/// differently processes events for a primitive types and sequences than
/// a [`Deserializer`].
+///
+/// This deserializer can see two kind of events at the start:
+/// - [`DeEvent::Text`]
+/// - [`DeEvent::Start`]
+///
+/// which represents two possible variants of items:
+/// ```xml
+/// - A tag item
+/// A text item
+///
+/// ```
+///
+/// This deserializer are very similar to a [`SeqItemDeserializer`]. The only difference
+/// in the `deserialize_seq` method. This deserializer will act as an iterator
+/// over tags / text within it's parent tag, whereas the [`SeqItemDeserializer`]
+/// will represent sequences as an `xs:list`.
+///
+/// This deserializer processes items as following:
+/// - primitives (numbers, booleans, strings, characters) are deserialized either
+/// from a text content, or unwrapped from a one level of a tag. So, `123` and
+/// `123` both can be deserialized into an `u32`;
+/// - `Option`:
+/// - empty text of [`DeEvent::Text`] is deserialized as `None`;
+/// - everything else are deserialized as `Some` using the same deserializer,
+/// including `` or ``;
+/// - units (`()`) and unit structs consumes the whole text or element subtree;
+/// - newtype structs are deserialized by forwarding deserialization of inner type
+/// with the same deserializer;
+/// - sequences, tuples and tuple structs are deserialized by iterating within the
+/// parent tag and deserializing each tag or text content using [`SeqItemDeserializer`];
+/// - structs and maps are deserialized using new instance of [`MapAccess`];
+/// - enums:
+/// - in case of [`DeEvent::Text`] event the text content is deserialized as
+/// a `$text` variant. Enum content is deserialized from the text using
+/// [`SimpleTypeDeserializer`];
+/// - in case of [`DeEvent::Start`] event the tag name is deserialized as
+/// an enum tag, and the content inside are deserialized as an enum content.
+/// Depending on a variant kind deserialization is performed as:
+/// - unit variants: consuming text content or a subtree;
+/// - newtype variants: forward deserialization to the inner type using
+/// this deserializer;
+/// - tuple variants: call [`deserialize_tuple`] of this deserializer;
+/// - struct variants: call [`deserialize_struct`] of this deserializer.
+///
+/// [`deserialize_tuple`]: #method.deserialize_tuple
+/// [`deserialize_struct`]: #method.deserialize_struct
struct MapValueDeserializer<'de, 'a, 'm, R, E>
where
R: XmlRead<'de>,
@@ -485,7 +531,6 @@ where
forward!(deserialize_unit);
- forward!(deserialize_map);
forward!(deserialize_struct(
name: &'static str,
fields: &'static [&'static str]
@@ -497,7 +542,6 @@ where
));
forward!(deserialize_any);
- forward!(deserialize_ignored_any);
fn deserialize_option(self, visitor: V) -> Result
where
@@ -506,6 +550,19 @@ where
deserialize_option!(self.map.de, self, visitor)
}
+ /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
+ /// with the same deserializer.
+ fn deserialize_newtype_struct(
+ self,
+ _name: &'static str,
+ visitor: V,
+ ) -> Result
+ where
+ V: Visitor<'de>,
+ {
+ visitor.visit_newtype_struct(self)
+ }
+
/// Deserializes each `` in
/// ```xml
///
@@ -716,7 +773,59 @@ where
////////////////////////////////////////////////////////////////////////////////////////////////////
-/// A deserializer for a single item of a sequence.
+/// A deserializer for a single item of a mixed sequence of tags and text.
+///
+/// This deserializer can see two kind of events at the start:
+/// - [`DeEvent::Text`]
+/// - [`DeEvent::Start`]
+///
+/// which represents two possible variants of items:
+/// ```xml
+/// - A tag item
+/// A text item
+///
+/// ```
+///
+/// This deserializer are very similar to a [`MapValueDeserializer`]. The only difference
+/// in the `deserialize_seq` method. This deserializer will perform deserialization
+/// from the textual content (the text itself in case of [`DeEvent::Text`] event
+/// and the text between tags in case of [`DeEvent::Start`] event), whereas
+/// the [`MapValueDeserializer`] will iterate over tags / text within it's parent tag.
+///
+/// This deserializer processes items as following:
+/// - primitives (numbers, booleans, strings, characters) are deserialized either
+/// from a text content, or unwrapped from a one level of a tag. So, `123` and
+/// `123` both can be deserialized into an `u32`;
+/// - `Option`:
+/// - empty text of [`DeEvent::Text`] is deserialized as `None`;
+/// - everything else are deserialized as `Some` using the same deserializer,
+/// including `` or ``;
+/// - units (`()`) and unit structs consumes the whole text or element subtree;
+/// - newtype structs are deserialized as tuple structs with one element;
+/// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`]
+/// (this is the difference):
+/// - in case of [`DeEvent::Text`] event text content passed to the deserializer directly;
+/// - in case of [`DeEvent::Start`] event the start and end tags are stripped,
+/// and text between them is passed to [`SimpleTypeDeserializer`]. If the tag
+/// contains something else other than text, an error is returned, but if it
+/// contains a text and something else (for example, `- text
`),
+/// then the trail is just ignored;
+/// - structs and maps are deserialized using new [`MapAccess`];
+/// - enums:
+/// - in case of [`DeEvent::Text`] event the text content is deserialized as
+/// a `$text` variant. Enum content is deserialized from the text using
+/// [`SimpleTypeDeserializer`];
+/// - in case of [`DeEvent::Start`] event the tag name is deserialized as
+/// an enum tag, and the content inside are deserialized as an enum content.
+/// Depending on a variant kind deserialization is performed as:
+/// - unit variants: consuming text content or a subtree;
+/// - newtype variants: forward deserialization to the inner type using
+/// this deserializer;
+/// - tuple variants: deserialize it as an `xs:list`;
+/// - struct variants: call [`deserialize_struct`] of this deserializer.
+///
+/// [`deserialize_tuple`]: #method.deserialize_tuple
+/// [`deserialize_struct`]: #method.deserialize_struct
struct SeqItemDeserializer<'de, 'a, 'm, R, E>
where
R: XmlRead<'de>,
@@ -754,7 +863,6 @@ where
forward!(deserialize_unit);
- forward!(deserialize_map);
forward!(deserialize_struct(
name: &'static str,
fields: &'static [&'static str]
@@ -766,7 +874,6 @@ where
));
forward!(deserialize_any);
- forward!(deserialize_ignored_any);
fn deserialize_option(self, visitor: V) -> Result
where
@@ -775,6 +882,20 @@ where
deserialize_option!(self.map.de, self, visitor)
}
+ /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
+ /// with the [`SimpleTypeDeserializer`].
+ fn deserialize_newtype_struct(
+ mut self,
+ _name: &'static str,
+ visitor: V,
+ ) -> Result
+ where
+ V: Visitor<'de>,
+ {
+ let text = self.read_string()?;
+ visitor.visit_newtype_struct(SimpleTypeDeserializer::from_text(text))
+ }
+
/// This method deserializes a sequence inside of element that itself is a
/// sequence element:
///
@@ -787,34 +908,12 @@ where
/// ...
/// >
/// ```
- fn deserialize_seq(self, visitor: V) -> Result
+ fn deserialize_seq(mut self, visitor: V) -> Result
where
V: Visitor<'de>,
{
- match self.map.de.next()? {
- DeEvent::Text(e) => {
- SimpleTypeDeserializer::from_text_content(e).deserialize_seq(visitor)
- }
- // This is a sequence element. We cannot treat it as another flatten
- // sequence if type will require `deserialize_seq` We instead forward
- // it to `xs:simpleType` implementation
- DeEvent::Start(e) => {
- let value = match self.map.de.next()? {
- DeEvent::Text(e) => {
- SimpleTypeDeserializer::from_text_content(e).deserialize_seq(visitor)
- }
- e => Err(DeError::Unsupported(
- format!("unsupported event {:?}", e).into(),
- )),
- };
- // TODO: May be assert that here we expect only matching closing tag?
- self.map.de.read_to_end(e.name())?;
- value
- }
- // SAFETY: we use that deserializer only when Start(element) or Text
- // event was peeked already
- _ => unreachable!(),
- }
+ let text = self.read_string()?;
+ SimpleTypeDeserializer::from_text(text).deserialize_seq(visitor)
}
#[inline]
diff --git a/src/de/mod.rs b/src/de/mod.rs
index c3068b9e..eb60972d 100644
--- a/src/de/mod.rs
+++ b/src/de/mod.rs
@@ -1904,18 +1904,6 @@ macro_rules! deserialize_primitives {
self.deserialize_unit(visitor)
}
- /// Representation of the newtypes the same as one-element [tuple](#method.deserialize_tuple).
- fn deserialize_newtype_struct(
- self,
- _name: &'static str,
- visitor: V,
- ) -> Result
- where
- V: Visitor<'de>,
- {
- self.deserialize_tuple(1, visitor)
- }
-
/// Representation of tuples the same as [sequences](#method.deserialize_seq).
fn deserialize_tuple(self, _len: usize, visitor: V) -> Result
where
@@ -1937,6 +1925,16 @@ macro_rules! deserialize_primitives {
self.deserialize_tuple(len, visitor)
}
+ /// Forwards deserialization to the [`deserialize_struct`](#method.deserialize_struct)
+ /// with empty name and fields.
+ #[inline]
+ fn deserialize_map(self, visitor: V) -> Result
+ where
+ V: Visitor<'de>,
+ {
+ self.deserialize_struct("", &[], visitor)
+ }
+
/// Identifiers represented as [strings](#method.deserialize_str).
fn deserialize_identifier(self, visitor: V) -> Result
where
@@ -1944,6 +1942,15 @@ macro_rules! deserialize_primitives {
{
self.deserialize_str(visitor)
}
+
+ /// Forwards deserialization to the [`deserialize_unit`](#method.deserialize_unit).
+ #[inline]
+ fn deserialize_ignored_any(self, visitor: V) -> Result
+ where
+ V: Visitor<'de>,
+ {
+ self.deserialize_unit(visitor)
+ }
};
}
@@ -2820,30 +2827,36 @@ where
}
}
- fn deserialize_enum(
+ /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
+ /// with the same deserializer.
+ fn deserialize_newtype_struct(
self,
_name: &'static str,
- _variants: &'static [&'static str],
visitor: V,
) -> Result
where
V: Visitor<'de>,
{
- visitor.visit_enum(var::EnumAccess::new(self))
+ visitor.visit_newtype_struct(self)
}
- fn deserialize_seq(self, visitor: V) -> Result
+ fn deserialize_enum(
+ self,
+ _name: &'static str,
+ _variants: &'static [&'static str],
+ visitor: V,
+ ) -> Result
where
V: Visitor<'de>,
{
- visitor.visit_seq(self)
+ visitor.visit_enum(var::EnumAccess::new(self))
}
- fn deserialize_map(self, visitor: V) -> Result
+ fn deserialize_seq(self, visitor: V) -> Result
where
V: Visitor<'de>,
{
- self.deserialize_struct("", &[], visitor)
+ visitor.visit_seq(self)
}
fn deserialize_option(self, visitor: V) -> Result
@@ -2853,39 +2866,13 @@ where
deserialize_option!(self, self, visitor)
}
- /// Always call `visitor.visit_unit()` because returned value ignored in any case.
- ///
- /// This method consumes any single [event][DeEvent] except the [`Start`]
- /// event, in which case all events up to and including corresponding [`End`]
- /// event will be consumed.
- ///
- /// This method returns error if current event is [`End`] or [`Eof`].
- ///
- /// [`Start`]: DeEvent::Start
- /// [`End`]: DeEvent::End
- /// [`Eof`]: DeEvent::Eof
- fn deserialize_ignored_any(self, visitor: V) -> Result
- where
- V: Visitor<'de>,
- {
- match self.next()? {
- DeEvent::Start(e) => self.read_to_end(e.name())?,
- DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
- DeEvent::Eof => return Err(DeError::UnexpectedEof),
- _ => (),
- }
- visitor.visit_unit()
- }
-
fn deserialize_any(self, visitor: V) -> Result
where
V: Visitor<'de>,
{
match self.peek()? {
- DeEvent::Start(_) => self.deserialize_map(visitor),
- // Redirect to deserialize_unit in order to consume an event and return an appropriate error
- DeEvent::End(_) | DeEvent::Eof => self.deserialize_unit(visitor),
- _ => self.deserialize_string(visitor),
+ DeEvent::Text(_) => self.deserialize_str(visitor),
+ _ => self.deserialize_map(visitor),
}
}
}
diff --git a/src/de/simple_type.rs b/src/de/simple_type.rs
index 0335722a..e8968e1d 100644
--- a/src/de/simple_type.rs
+++ b/src/de/simple_type.rs
@@ -495,13 +495,17 @@ pub struct SimpleTypeDeserializer<'de, 'a> {
impl<'de, 'a> SimpleTypeDeserializer<'de, 'a> {
/// Creates a deserializer from a value, that possible borrowed from input
- pub fn from_text_content(value: Text<'de>) -> Self {
- let content = match value.text {
+ pub fn from_text(text: Cow<'de, str>) -> Self {
+ let content = match text {
Cow::Borrowed(slice) => CowRef::Input(slice.as_bytes()),
Cow::Owned(content) => CowRef::Owned(content.into_bytes()),
};
Self::new(content, false, Decoder::utf8())
}
+ /// Creates a deserializer from a value, that possible borrowed from input
+ pub fn from_text_content(value: Text<'de>) -> Self {
+ Self::from_text(value.text)
+ }
/// Creates a deserializer from a part of value at specified range
#[allow(clippy::ptr_arg)]
diff --git a/tests/serde-de-seq.rs b/tests/serde-de-seq.rs
index bddb8069..ca6fa341 100644
--- a/tests/serde-de-seq.rs
+++ b/tests/serde-de-seq.rs
@@ -802,6 +802,15 @@ mod fixed_name {
use super::*;
use pretty_assertions::assert_eq;
+ #[derive(Debug, Deserialize, PartialEq)]
+ struct List {
+ /// Outer list mapped to elements, inner -- to `xs:list`.
+ ///
+ /// `#[serde(default)]` is not required, because correct
+ /// XML will always contains at least 1 element.
+ item: [Vec; 1],
+ }
+
/// Special case: zero elements
#[test]
fn zero() {
@@ -832,15 +841,6 @@ mod fixed_name {
/// Special case: one element
#[test]
fn one() {
- #[derive(Debug, Deserialize, PartialEq)]
- struct List {
- /// Outer list mapped to elements, inner -- to `xs:list`.
- ///
- /// `#[serde(default)]` is not required, because correct
- /// XML will always contains at least 1 element.
- item: [Vec; 1],
- }
-
let data: List = from_str(
r#"
@@ -858,6 +858,21 @@ mod fixed_name {
);
}
+ /// Special case: empty `xs:list`
+ #[test]
+ fn empty() {
+ let data: List = from_str(
+ r#"
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(data, List { item: [vec![]] });
+ }
+
/// Special case: outer list is always mapped to an elements sequence,
/// not to an `xs:list`
#[test]
@@ -1669,6 +1684,21 @@ mod fixed_name {
);
}
+ /// Special case: empty `xs:list`
+ #[test]
+ fn empty() {
+ let data: List = from_str(
+ r#"
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(data, List { item: vec![vec![]] });
+ }
+
/// Special case: outer list is always mapped to an elements sequence,
/// not to an `xs:list`
#[test]
@@ -2866,6 +2896,16 @@ mod variable_name {
use super::*;
use pretty_assertions::assert_eq;
+ #[derive(Debug, Deserialize, PartialEq)]
+ struct List {
+ /// Outer list mapped to elements, inner -- to `xs:list`.
+ ///
+ /// `#[serde(default)]` is not required, because correct
+ /// XML will always contains at least 1 element.
+ #[serde(rename = "$value")]
+ element: [Vec; 1],
+ }
+
/// Special case: zero elements
#[test]
fn zero() {
@@ -2897,16 +2937,6 @@ mod variable_name {
/// Special case: one element
#[test]
fn one() {
- #[derive(Debug, Deserialize, PartialEq)]
- struct List {
- /// Outer list mapped to elements, inner -- to `xs:list`.
- ///
- /// `#[serde(default)]` is not required, because correct
- /// XML will always contains at least 1 element.
- #[serde(rename = "$value")]
- element: [Vec; 1],
- }
-
let data: List = from_str(
r#"
@@ -2924,6 +2954,21 @@ mod variable_name {
);
}
+ /// Special case: empty `xs:list`
+ #[test]
+ fn empty() {
+ let data: List = from_str(
+ r#"
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(data, List { element: [vec![]] });
+ }
+
/// Special case: outer list is always mapped to an elements sequence,
/// not to an `xs:list`
#[test]
@@ -4008,6 +4053,26 @@ mod variable_name {
);
}
+ /// Special case: empty `xs:list`
+ #[test]
+ fn empty() {
+ let data: List = from_str(
+ r#"
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ List {
+ element: vec![vec![]]
+ }
+ );
+ }
+
/// Special case: outer list is always mapped to an elements sequence,
/// not to an `xs:list`
#[test]
diff --git a/tests/serde-issues.rs b/tests/serde-issues.rs
index 62c38500..188c3fea 100644
--- a/tests/serde-issues.rs
+++ b/tests/serde-issues.rs
@@ -5,7 +5,7 @@
use pretty_assertions::assert_eq;
use quick_xml::de::from_str;
use quick_xml::se::{to_string, to_string_with_root};
-use serde::{Deserialize, Serialize};
+use serde::{Deserialize, Deserializer, Serialize};
use std::collections::HashMap;
/// Regression tests for https://github.com/tafia/quick-xml/issues/252.
@@ -377,3 +377,41 @@ fn issue540() {
""
);
}
+
+/// Regression test for https://github.com/tafia/quick-xml/issues/580.
+#[test]
+fn issue580() {
+ #[derive(Debug, Deserialize, PartialEq, Eq)]
+ struct Seq {
+ #[serde(rename = "$value")]
+ items: Vec,
+ }
+
+ #[derive(Debug, Deserialize, PartialEq, Eq)]
+ struct Wrapper(#[serde(deserialize_with = "Item::parse")] Item);
+
+ #[derive(Debug, PartialEq, Eq)]
+ struct Item;
+ impl Item {
+ fn parse<'de, D>(_deserializer: D) -> Result
+ where
+ D: Deserializer<'de>,
+ {
+ Ok(Item)
+ }
+ }
+
+ assert_eq!(
+ from_str::(
+ r#"
+
+
+
+ "#
+ )
+ .unwrap(),
+ Seq {
+ items: vec![Wrapper(Item), Wrapper(Item)],
+ }
+ );
+}