From c8b427f140b0b2b72b6fcc5970a7189defdb79b0 Mon Sep 17 00:00:00 2001 From: "Arellano Ruiz, Eugenio Salvador" Date: Thu, 28 Mar 2024 17:42:15 +0500 Subject: [PATCH] Implement API to add newlines between attributes when writing XML --- Changelog.md | 2 + src/events/mod.rs | 29 +++- src/writer.rs | 432 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 450 insertions(+), 13 deletions(-) diff --git a/Changelog.md b/Changelog.md index 9aecbb9e..fbe531b1 100644 --- a/Changelog.md +++ b/Changelog.md @@ -33,6 +33,7 @@ to get an offset of the error position. For `SyntaxError`s the range - [#629]: Added a default case to `impl_deserialize_for_internally_tagged_enum` macro so that it can handle every attribute that does not match existing cases within an enum variant. - [#722]: Allow to pass owned strings to `Writer::create_element`. This is breaking change! +- [#275]: Added `ElementWriter::new_line()` which enables pretty printing elements with multiple attributes. ### Bug Fixes @@ -67,6 +68,7 @@ to get an offset of the error position. For `SyntaxError`s the range - [#738]: Add an example of how to deserialize XML elements into Rust enums using an intermediate custom deserializer. +[#275]: https://github.com/tafia/quick-xml/issues/275 [#362]: https://github.com/tafia/quick-xml/issues/362 [#513]: https://github.com/tafia/quick-xml/issues/513 [#622]: https://github.com/tafia/quick-xml/issues/622 diff --git a/src/events/mod.rs b/src/events/mod.rs index 546ad392..25a0f408 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -242,13 +242,8 @@ impl<'a> BytesStart<'a> { where A: Into>, { - let a = attr.into(); - let bytes = self.buf.to_mut(); - bytes.push(b' '); - bytes.extend_from_slice(a.key.as_ref()); - bytes.extend_from_slice(b"=\""); - bytes.extend_from_slice(a.value.as_ref()); - bytes.push(b'"'); + self.buf.to_mut().push(b' '); + self.push_attr(attr.into()); } /// Remove all attributes from the ByteStart @@ -287,6 +282,26 @@ impl<'a> BytesStart<'a> { } Ok(None) } + + /// Adds an attribute to this element. + pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) { + let bytes = self.buf.to_mut(); + bytes.extend_from_slice(attr.key.as_ref()); + bytes.extend_from_slice(b"=\""); + // FIXME: need to escape attribute content + bytes.extend_from_slice(attr.value.as_ref()); + bytes.push(b'"'); + } + + /// Adds new line in existing element + pub(crate) fn push_newline(&mut self) { + self.buf.to_mut().push(b'\n'); + } + + /// Adds indentation bytes in existing element + pub(crate) fn push_indent(&mut self, indent: &[u8]) { + self.buf.to_mut().extend_from_slice(indent); + } } impl<'a> Debug for BytesStart<'a> { diff --git a/src/writer.rs b/src/writer.rs index 2d455c46..7e91df61 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -151,6 +151,8 @@ impl Writer { ElementWriter { writer: self, start_tag: BytesStart::new(name), + state: AttributeIndent::NoneAttributesWritten, + spaces: Vec::new(), } } } @@ -336,11 +338,48 @@ impl Writer { } } +/// Track indent inside elements state +/// +/// ```mermaid +/// stateDiagram-v2 +/// [*] --> NoneAttributesWritten +/// NoneAttributesWritten --> Spaces : .with_attribute() +/// NoneAttributesWritten --> WriteConfigured : .new_line() +/// +/// Spaces --> Spaces : .with_attribute() +/// Spaces --> WriteSpaces : .new_line() +/// +/// WriteSpaces --> Spaces : .with_attribute() +/// WriteSpaces --> WriteSpaces : .new_line() +/// +/// Configured --> Configured : .with_attribute() +/// Configured --> WriteConfigured : .new_line() +/// +/// WriteConfigured --> Configured : .with_attribute() +/// WriteConfigured --> WriteConfigured : .new_line() +/// ``` +#[derive(Debug)] +enum AttributeIndent { + /// Initial state. `ElementWriter` was just created and no attributes written yet + NoneAttributesWritten, + /// Write specified count of spaces to indent before writing attribute in `with_attribute()` + WriteSpaces(usize), + /// Keep space indent that should be used if `new_line()` would be called + Spaces(usize), + /// Write specified count of indent characters before writing attribute in `with_attribute()` + WriteConfigured(usize), + /// Keep indent that should be used if `new_line()` would be called + Configured(usize), +} + /// A struct to write an element. Contains methods to add attributes and inner /// elements to the element pub struct ElementWriter<'a, W> { writer: &'a mut Writer, start_tag: BytesStart<'a>, + state: AttributeIndent, + /// Contains spaces used to write space indents of attributes + spaces: Vec, } impl<'a, W> ElementWriter<'a, W> { @@ -349,7 +388,7 @@ impl<'a, W> ElementWriter<'a, W> { where I: Into>, { - self.start_tag.push_attribute(attr); + self.write_attr(attr.into()); self } @@ -361,9 +400,133 @@ impl<'a, W> ElementWriter<'a, W> { I: IntoIterator, I::Item: Into>, { - self.start_tag.extend_attributes(attributes); + let mut iter = attributes.into_iter(); + if let Some(attr) = iter.next() { + self.write_attr(attr.into()); + self.start_tag.extend_attributes(iter); + } self } + + /// Push a new line inside an element between attributes. Note, that this + /// method does nothing if [`Writer`] was created without indentation support. + /// + /// # Examples + /// + /// The following code + /// + /// ``` + /// # use quick_xml::writer::Writer; + /// let mut buffer = Vec::new(); + /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 2); + /// writer + /// .create_element("element") + /// //.new_line() (1) + /// .with_attribute(("first", "1")) + /// .with_attribute(("second", "2")) + /// .new_line() + /// .with_attributes([ + /// ("third", "3"), + /// ("fourth", "4"), + /// ]) + /// //.new_line() (2) + /// .write_empty(); + /// ``` + /// will produce the following XMLs: + /// ```xml + /// + /// + /// + /// + /// + /// + /// + /// + /// ``` + pub fn new_line(mut self) -> Self { + if let Some(i) = self.writer.indent.as_mut() { + match self.state { + // .new_line() called just after .create_element(). + // Use element indent to additionally indent attributes + AttributeIndent::NoneAttributesWritten => { + self.state = AttributeIndent::WriteConfigured(i.indent_size) + } + + AttributeIndent::WriteSpaces(_) => {} + // .new_line() called when .with_attribute() was called at least once. + // The spaces should be used to indent + // Plan saved indent + AttributeIndent::Spaces(indent) => { + self.state = AttributeIndent::WriteSpaces(indent) + } + + AttributeIndent::WriteConfigured(_) => {} + // .new_line() called when .with_attribute() was called at least once. + // The configured indent characters should be used to indent + // Plan saved indent + AttributeIndent::Configured(indent) => { + self.state = AttributeIndent::WriteConfigured(indent) + } + } + self.start_tag.push_newline(); + }; + self + } + + /// Writes attribute and maintain indentation state + fn write_attr<'b>(&mut self, attr: Attribute<'b>) { + if let Some(i) = self.writer.indent.as_mut() { + // Save the indent that we should use next time when .new_line() be called + self.state = match self.state { + // Neither .new_line() or .with_attribute() yet called + // If newline inside attributes will be requested, we should indent them + // by the length of tag name and +1 for `<` and +1 for one space + AttributeIndent::NoneAttributesWritten => { + self.start_tag.push_attribute(attr); + AttributeIndent::Spaces(self.start_tag.name().as_ref().len() + 2) + } + + // Indent was requested by previous call to .new_line(), write it + // New line was already written + AttributeIndent::WriteSpaces(indent) => { + if self.spaces.len() < indent { + self.spaces.resize(indent, b' '); + } + self.start_tag.push_indent(&self.spaces[..indent]); + self.start_tag.push_attr(attr.into()); + AttributeIndent::Spaces(indent) + } + // .new_line() was not called, but .with_attribute() was. + // use the previously calculated indent + AttributeIndent::Spaces(indent) => { + self.start_tag.push_attribute(attr); + AttributeIndent::Spaces(indent) + } + + // Indent was requested by previous call to .new_line(), write it + // New line was already written + AttributeIndent::WriteConfigured(indent) => { + self.start_tag.push_indent(i.additional(indent)); + self.start_tag.push_attr(attr.into()); + AttributeIndent::Configured(indent) + } + // .new_line() was not called, but .with_attribute() was. + // use the previously calculated indent + AttributeIndent::Configured(indent) => { + self.start_tag.push_attribute(attr); + AttributeIndent::Configured(indent) + } + }; + } else { + self.start_tag.push_attribute(attr); + } + } } impl<'a, W: Write> ElementWriter<'a, W> { @@ -459,10 +622,7 @@ impl Indentation { /// Increase indentation by one level pub fn grow(&mut self) { self.current_indent_len += self.indent_size; - if self.current_indent_len > self.indents.len() { - self.indents - .resize(self.current_indent_len, self.indent_char); - } + self.ensure(self.current_indent_len); } /// Decrease indentation by one level. Do nothing, if level already zero @@ -474,6 +634,19 @@ impl Indentation { pub fn current(&self) -> &[u8] { &self.indents[..self.current_indent_len] } + + /// Returns indent with current indent plus additional indent + pub fn additional(&mut self, additional_indent: usize) -> &[u8] { + let new_len = self.current_indent_len + additional_indent; + self.ensure(new_len); + &self.indents[..new_len] + } + + fn ensure(&mut self, new_len: usize) { + if self.indents.len() < new_len { + self.indents.resize(new_len, self.indent_char); + } + } } #[cfg(test)] @@ -782,4 +955,251 @@ mod indentation { "# ); } + + mod in_attributes { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn newline_first() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); + + writer + .create_element("element") + .new_line() + .with_attribute(("first", "1")) + .with_attribute(("second", "2")) + .new_line() + .with_attribute(("third", "3")) + .with_attribute(("fourth", "4")) + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + "" + ); + } + + #[test] + fn newline_inside() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); + + writer + .create_element("element") + .with_attribute(("first", "1")) + .with_attribute(("second", "2")) + .new_line() + .with_attribute(("third", "3")) + .with_attribute(("fourth", "4")) + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + "" + ); + } + + #[test] + fn newline_last() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); + + writer + .create_element("element") + .new_line() + .with_attribute(("first", "1")) + .with_attribute(("second", "2")) + .new_line() + .with_attribute(("third", "3")) + .with_attribute(("fourth", "4")) + .new_line() + .write_empty() + .expect("write tag failed"); + + writer + .create_element("element") + .with_attribute(("first", "1")) + .with_attribute(("second", "2")) + .new_line() + .with_attribute(("third", "3")) + .with_attribute(("fourth", "4")) + .new_line() + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + "\ + \n" + ); + } + + #[test] + fn newline_twice() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); + + writer + .create_element("element") + .new_line() + .new_line() + .write_empty() + .expect("write tag failed"); + + writer + .create_element("element") + .with_attribute(("first", "1")) + .new_line() + .new_line() + .with_attribute(("second", "2")) + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#" +"# + ); + } + + #[test] + fn without_indent() { + let mut buffer = Vec::new(); + let mut writer = Writer::new(&mut buffer); + + writer + .create_element("element") + .new_line() + .new_line() + .write_empty() + .expect("write tag failed"); + + writer + .create_element("element") + .with_attribute(("first", "1")) + .new_line() + .new_line() + .with_attribute(("second", "2")) + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#""# + ); + } + + #[test] + fn long_element_name() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b't', 1); + + writer + .create_element(String::from("x").repeat(128).as_str()) + .with_attribute(("first", "1")) + .new_line() + .with_attribute(("second", "2")) + .write_empty() + .expect("Problem with indentation reference"); + } + } + + mod in_attributes_multi { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn newline_first() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); + + writer + .create_element("element") + .new_line() + .with_attributes([("first", "1"), ("second", "2")]) + .new_line() + .with_attributes([("third", "3"), ("fourth", "4")]) + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + "" + ); + } + + #[test] + fn newline_inside() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); + + writer + .create_element("element") + .with_attributes([("first", "1"), ("second", "2")]) + .new_line() + .with_attributes([("third", "3"), ("fourth", "4")]) + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#""# + ); + } + + #[test] + fn newline_last() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); + + writer + .create_element("element") + .new_line() + .with_attributes([("first", "1"), ("second", "2")]) + .new_line() + .with_attributes([("third", "3"), ("fourth", "4")]) + .new_line() + .write_empty() + .expect("write tag failed"); + + writer + .create_element("element") + .with_attributes([("first", "1"), ("second", "2")]) + .new_line() + .with_attributes([("third", "3"), ("fourth", "4")]) + .new_line() + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + "\ + \n" + ); + } + } }