From c8332d90c01f6558e1c13fd5ebb96e85818c71d6 Mon Sep 17 00:00:00 2001 From: Dan Griffin Date: Wed, 24 May 2023 22:48:35 +0500 Subject: [PATCH 1/2] Add tests for comments and processing instructions in XML prolog See failures (2): xml_prolog::comments xml_prolog::pi --- tests/serde-de.rs | 74 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/tests/serde-de.rs b/tests/serde-de.rs index 7b12fd02..554abac4 100644 --- a/tests/serde-de.rs +++ b/tests/serde-de.rs @@ -6496,3 +6496,77 @@ mod resolve { ); } } + +/// Tests for https://github.com/tafia/quick-xml/pull/603. +/// +/// According to comments, +/// processing instructions and spaces are possible after XML declaration or DTD. +/// Their existence should not break deserializing +/// +/// ```text +/// [22] prolog ::= XMLDecl Misc* (doctypedecl Misc*)? +/// [27] Misc ::= Comment | PI | S +/// ``` +mod xml_prolog { + use super::*; + use pretty_assertions::assert_eq; + use std::collections::HashMap; + + #[test] + fn spaces() { + assert_eq!( + from_str::>( + r#" + + + + + + + "# + ) + .unwrap(), + HashMap::new() + ); + } + + #[test] + fn comments() { + assert_eq!( + from_str::>( + r#" + + + + + + + + + "#, + ) + .unwrap(), + HashMap::new() + ); + } + + #[test] + fn pi() { + assert_eq!( + from_str::>( + r#" + + + + + + + + + "#, + ) + .unwrap(), + HashMap::new() + ); + } +} From d49f2d54287542fcafac2943d39fb9be86581259 Mon Sep 17 00:00:00 2001 From: Dan Griffin Date: Sat, 10 Jun 2023 00:14:34 +0500 Subject: [PATCH 2/2] Trim `Text` events after DOCTYPE so spaces does not produce an event. Otherwise consequent `Text` events (which is possible if their delimited by Comment or PI events, which is skipped) will be merged but not trimmed. That will lead to returning a `Text` event when try to call `deserialize_struct` or `deserialize_map` which will trigger `DeError::ExpectedStart` error. The incorrect trim behavior was introduced in #581, when DocType event began to be processed --- Changelog.md | 6 ++++++ src/de/mod.rs | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Changelog.md b/Changelog.md index e0a01192..f1bf2801 100644 --- a/Changelog.md +++ b/Changelog.md @@ -18,9 +18,15 @@ ### Bug Fixes +- [#603]: Fix a regression from [#581] that an XML comment or a processing + instruction between a and the root element in the file brokes + deserialization of structs by returning `DeError::ExpectedStart` + ### Misc Changes +[#581]: https://github.com/tafia/quick-xml/pull/581 [#601]: https://github.com/tafia/quick-xml/pull/601 +[#603]: https://github.com/tafia/quick-xml/pull/603 [#606]: https://github.com/tafia/quick-xml/pull/606 diff --git a/src/de/mod.rs b/src/de/mod.rs index cc0d59d7..20017d00 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -2880,7 +2880,7 @@ impl StartTrimmer { #[inline(always)] fn trim<'a>(&mut self, event: Event<'a>) -> Option> { let (event, trim_next_event) = match event { - Event::DocType(e) => (PayloadEvent::DocType(e), false), + Event::DocType(e) => (PayloadEvent::DocType(e), true), Event::Start(e) => (PayloadEvent::Start(e), true), Event::End(e) => (PayloadEvent::End(e), true), Event::Eof => (PayloadEvent::Eof, true),