Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce Config struct that holds parser configuration and implement #513 #677

Merged
merged 11 commits into from
Nov 5, 2023
366 changes: 366 additions & 0 deletions tests/reader-config.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,366 @@
//! Contains tests for config options of a parser.
//!
//! Each module has a name of a corresponding option and functions inside performs
//! testing of various option values.
//!
//! Please keep tests sorted (exceptions are allowed if options are tightly related).

use quick_xml::errors::{Error, IllFormedError};
use quick_xml::events::{BytesCData, BytesEnd, BytesStart, BytesText, Event};
use quick_xml::reader::Reader;

mod expand_empty_elements {
use super::*;
use pretty_assertions::assert_eq;

/// Self-closed elements should be reported as one `Empty` event
#[test]
fn false_() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps "enabled" and "disabled" would be better names, to avoid keyword clashes?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I chose those words because if we will have other options that are not booleans, tests would be named as a config value

let mut reader = Reader::from_str("<root/>");
reader.expand_empty_elements(false);

assert_eq!(
reader.read_event().unwrap(),
Event::Empty(BytesStart::new("root"))
);
assert_eq!(reader.read_event().unwrap(), Event::Eof);
}

/// Self-closed elements should be reported as two events
#[test]
fn true_() {
let mut reader = Reader::from_str("<root/>");
reader.expand_empty_elements(true);

assert_eq!(
reader.read_event().unwrap(),
Event::Start(BytesStart::new("root"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::End(BytesEnd::new("root"))
);
assert_eq!(reader.read_event().unwrap(), Event::Eof);
}
}

mod trim_markup_names_in_closing_tags {
use super::*;
use pretty_assertions::assert_eq;

mod false_ {
use super::*;
use pretty_assertions::assert_eq;

#[test]
fn check_end_names_false() {
let mut reader = Reader::from_str("<root></root \t\r\n>");
reader.trim_markup_names_in_closing_tags(false);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is it the reason that we only have this for closing tags, attributes?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think, that this is optimization option. Usually end tags does not contain spaces before >, so if we will assume that the name ends immediately before the >, we could save some time. Such optimisation make sense only for the closing tags -- for opening tags we in any case should check if it has attributes and find the actual end of tag name.

// We need to disable checks, otherwise the error will be returned when read end
reader.check_end_names(false);

assert_eq!(
reader.read_event().unwrap(),
Event::Start(BytesStart::new("root"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::End(BytesEnd::new("root \t\r\n"))
);
assert_eq!(reader.read_event().unwrap(), Event::Eof);
}

#[test]
fn check_end_names_true() {
let mut reader = Reader::from_str("<root></root \t\r\n>");
reader.trim_markup_names_in_closing_tags(false);
reader.check_end_names(true);

assert_eq!(
reader.read_event().unwrap(),
Event::Start(BytesStart::new("root"))
);
match reader.read_event() {
Err(Error::IllFormed(cause)) => assert_eq!(
cause,
IllFormedError::MismatchedEnd {
expected: "root".into(),
found: "root \t\r\n".into(),
}
),
x => panic!("Expected `Err(IllFormed(_))`, but got `{:?}`", x),
}
assert_eq!(reader.read_event().unwrap(), Event::Eof);
}
}

#[test]
fn true_() {
let mut reader = Reader::from_str("<root></root \t\r\n>");
reader.trim_markup_names_in_closing_tags(true);

assert_eq!(
reader.read_event().unwrap(),
Event::Start(BytesStart::new("root"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::End(BytesEnd::new("root"))
);
assert_eq!(reader.read_event().unwrap(), Event::Eof);
}
}

const XML: &str = " \t\r\n\
<!doctype root \t\r\n> \t\r\n\
<root \t\r\n> \t\r\n\
<empty \t\r\n/> \t\r\n\
text \t\r\n\
<!-- comment \t\r\n--> \t\r\n\
<![CDATA[ \t\r\ncdata \t\r\n]]> \t\r\n\
<?pi \t\r\n?> \t\r\n\
</root> \t\r\n";

mod trim_text {
use super::*;
use pretty_assertions::assert_eq;

#[test]
fn false_() {
let mut reader = Reader::from_str(XML);
reader.trim_text(false);

assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\n"))
);

assert_eq!(
reader.read_event().unwrap(),
Event::DocType(BytesText::new("root \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\n"))
);

assert_eq!(
reader.read_event().unwrap(),
Event::Start(BytesStart::from_content("root \t\r\n", 4))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\n"))
);

assert_eq!(
reader.read_event().unwrap(),
Event::Empty(BytesStart::from_content("empty \t\r\n", 5))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\ntext \t\r\n"))
);

assert_eq!(
reader.read_event().unwrap(),
Event::Comment(BytesText::new(" comment \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\n"))
);

assert_eq!(
reader.read_event().unwrap(),
Event::CData(BytesCData::new(" \t\r\ncdata \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\n"))
);

assert_eq!(
reader.read_event().unwrap(),
Event::PI(BytesText::new("pi \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\n"))
);

assert_eq!(
reader.read_event().unwrap(),
Event::End(BytesEnd::new("root"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\n"))
);

assert_eq!(reader.read_event().unwrap(), Event::Eof);
}

#[test]
fn true_() {
let mut reader = Reader::from_str(XML);
reader.trim_text(true);

assert_eq!(
reader.read_event().unwrap(),
Event::DocType(BytesText::new("root \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Start(BytesStart::from_content("root \t\r\n", 4))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Empty(BytesStart::from_content("empty \t\r\n", 5))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new("text"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Comment(BytesText::new(" comment \t\r\n"))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have any reason to support trimming the text values of "comments"? I cannot immediately think of a reason to do that, but perhaps one exists.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think not, at least nobody request such feature. If such a request appears, we can add a separate option.

Generally speaking, I would delete the current trim options as they simply do not work correctly for text alternating with CDATA / comments / processing instructions, but I suppose that would break many users. I was thinking about renaming current Event into RawEvent and DeEvent to Event and give users stream of Events. The RawEvent then would be a low-level event which usually not needed by most users. That is very raw thoughts currently, so I decided to not do revolutional changes for now.

);
assert_eq!(
reader.read_event().unwrap(),
Event::CData(BytesCData::new(" \t\r\ncdata \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::PI(BytesText::new("pi \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::End(BytesEnd::new("root"))
);
assert_eq!(reader.read_event().unwrap(), Event::Eof);
}
}

mod trim_text_end {
use super::*;
use pretty_assertions::assert_eq;

#[test]
fn false_() {
let mut reader = Reader::from_str(XML);
reader.trim_text_end(false);

assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\n"))
);

assert_eq!(
reader.read_event().unwrap(),
Event::DocType(BytesText::new("root \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\n"))
);

assert_eq!(
reader.read_event().unwrap(),
Event::Start(BytesStart::from_content("root \t\r\n", 4))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\n"))
);

assert_eq!(
reader.read_event().unwrap(),
Event::Empty(BytesStart::from_content("empty \t\r\n", 5))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\ntext \t\r\n"))
);

assert_eq!(
reader.read_event().unwrap(),
Event::Comment(BytesText::new(" comment \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\n"))
);

assert_eq!(
reader.read_event().unwrap(),
Event::CData(BytesCData::new(" \t\r\ncdata \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\n"))
);

assert_eq!(
reader.read_event().unwrap(),
Event::PI(BytesText::new("pi \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\n"))
);

assert_eq!(
reader.read_event().unwrap(),
Event::End(BytesEnd::new("root"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\n"))
);

assert_eq!(reader.read_event().unwrap(), Event::Eof);
}

// TODO: Enable test after rewriting parser
#[test]
#[ignore = "currently it is hard to fix incorrect behavior, but this will much easy after parser rewrite"]
fn true_() {
let mut reader = Reader::from_str(XML);
reader.trim_text_end(true);

assert_eq!(
reader.read_event().unwrap(),
Event::DocType(BytesText::new("root \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Start(BytesStart::from_content("root \t\r\n", 4))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Empty(BytesStart::from_content("empty \t\r\n", 5))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Text(BytesText::new(" \t\r\ntext"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::Comment(BytesText::new(" comment \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::CData(BytesCData::new(" \t\r\ncdata \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::PI(BytesText::new("pi \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::End(BytesEnd::new("root"))
);
assert_eq!(reader.read_event().unwrap(), Event::Eof);
}
}
Loading