Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce Config struct that holds parser configuration and implement #513 #677

Merged
merged 11 commits into from
Nov 5, 2023
Merged
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ async-tokio = ["tokio"]
## # }
## let xml = to_utf16le_with_bom(r#"<?xml encoding='UTF-16'><element/>"#);
## let mut reader = Reader::from_reader(xml.as_ref());
## reader.trim_text(true);
## reader.config_mut().trim_text(true);
##
## let mut buf = Vec::new();
## let mut unsupported = false;
Expand Down
11 changes: 11 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,17 @@

## Unreleased

The way to configure parser is changed. Now all configuration is contained in the
`Config` struct and can be applied at once. When `serde-types` feature is enabled,
configuration is serializable.

### New Features

- [#513]: Allow to continue parsing after getting new `Error::IllFormed`.
- [#677]: Added methods `config()` and `config_mut()` to inspect and change the parser
configuration. Previous builder methods on `Reader` / `NsReader` was replaced by
direct access to fields of config using `reader.config_mut().<...>`.

### Bug Fixes

### Misc Changes
Expand All @@ -26,7 +35,9 @@
- `Error::UnexpectedEof` replaced by `IllFormedError` in some cases
- `Error::UnexpectedToken` replaced by `IllFormedError::DoubleHyphenInComment`

[#513]: https://github.com/tafia/quick-xml/issues/513
[#675]: https://github.com/tafia/quick-xml/pull/675
[#677]: https://github.com/tafia/quick-xml/pull/677


## 0.31.0 -- 2023-10-22
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ let xml = r#"<tag1 att1 = "test">
<tag2>Test 2</tag2>
</tag1>"#;
let mut reader = Reader::from_str(xml);
reader.trim_text(true);
reader.config_mut().trim_text(true);

let mut count = 0;
let mut txt = Vec::new();
Expand Down Expand Up @@ -73,7 +73,7 @@ use std::io::Cursor;

let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
let mut reader = Reader::from_str(xml);
reader.trim_text(true);
reader.config_mut().trim_text(true);
let mut writer = Writer::new(Cursor::new(Vec::new()));
loop {
match reader.read_event() {
Expand Down
30 changes: 20 additions & 10 deletions benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ fn read_event(c: &mut Criterion) {
group.bench_function("trim_text = false", |b| {
b.iter(|| {
let mut r = Reader::from_str(SAMPLE);
r.check_end_names(false);
r.config_mut().check_end_names = false;
let mut count = criterion::black_box(0);
loop {
match r.read_event() {
Expand All @@ -49,7 +49,9 @@ fn read_event(c: &mut Criterion) {
group.bench_function("trim_text = true", |b| {
b.iter(|| {
let mut r = Reader::from_str(SAMPLE);
r.trim_text(true).check_end_names(false);
let config = r.config_mut();
config.trim_text(true);
config.check_end_names = false;
let mut count = criterion::black_box(0);
loop {
match r.read_event() {
Expand All @@ -74,7 +76,7 @@ fn read_resolved_event_into(c: &mut Criterion) {
group.bench_function("trim_text = false", |b| {
b.iter(|| {
let mut r = NsReader::from_str(SAMPLE);
r.check_end_names(false);
r.config_mut().check_end_names = false;
let mut count = criterion::black_box(0);
loop {
match r.read_resolved_event() {
Expand All @@ -93,7 +95,9 @@ fn read_resolved_event_into(c: &mut Criterion) {
group.bench_function("trim_text = true", |b| {
b.iter(|| {
let mut r = NsReader::from_str(SAMPLE);
r.trim_text(true).check_end_names(false);
let config = r.config_mut();
config.trim_text(true);
config.check_end_names = false;
let mut count = criterion::black_box(0);
loop {
match r.read_resolved_event() {
Expand All @@ -120,7 +124,9 @@ fn one_event(c: &mut Criterion) {
b.iter(|| {
let mut r = Reader::from_str(&src);
let mut nbtxt = criterion::black_box(0);
r.trim_text(true).check_end_names(false);
let config = r.config_mut();
config.trim_text(true);
config.check_end_names = false;
match r.read_event() {
Ok(Event::Start(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
Expand All @@ -135,7 +141,9 @@ fn one_event(c: &mut Criterion) {
b.iter(|| {
let mut r = Reader::from_str(&src);
let mut nbtxt = criterion::black_box(0);
r.trim_text(true).check_end_names(false);
let config = r.config_mut();
config.trim_text(true);
config.check_end_names = false;
match r.read_event() {
Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(),
something_else => panic!("Did not expect {:?}", something_else),
Expand All @@ -150,7 +158,9 @@ fn one_event(c: &mut Criterion) {
b.iter(|| {
let mut r = Reader::from_str(&src);
let mut nbtxt = criterion::black_box(0);
r.trim_text(true).check_end_names(false);
let config = r.config_mut();
config.trim_text(true);
config.check_end_names = false;
match r.read_event() {
Ok(Event::CData(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
Expand All @@ -168,7 +178,7 @@ fn attributes(c: &mut Criterion) {
group.bench_function("with_checks = true", |b| {
b.iter(|| {
let mut r = Reader::from_str(PLAYERS);
r.check_end_names(false);
r.config_mut().check_end_names = false;
let mut count = criterion::black_box(0);
loop {
match r.read_event() {
Expand All @@ -189,7 +199,7 @@ fn attributes(c: &mut Criterion) {
group.bench_function("with_checks = false", |b| {
b.iter(|| {
let mut r = Reader::from_str(PLAYERS);
r.check_end_names(false);
r.config_mut().check_end_names = false;
let mut count = criterion::black_box(0);
loop {
match r.read_event() {
Expand All @@ -210,7 +220,7 @@ fn attributes(c: &mut Criterion) {
group.bench_function("try_get_attribute", |b| {
b.iter(|| {
let mut r = Reader::from_str(PLAYERS);
r.check_end_names(false);
r.config_mut().check_end_names = false;
let mut count = criterion::black_box(0);
loop {
match r.read_event() {
Expand Down
2 changes: 1 addition & 1 deletion compare/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ fn low_level_comparison(c: &mut Criterion) {
|b, input| {
b.iter(|| {
let mut r = Reader::from_reader(input.as_bytes());
r.check_end_names(false);
r.config_mut().check_end_names = false;
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
Expand Down
2 changes: 1 addition & 1 deletion examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ const DATA: &str = r#"

fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut reader = Reader::from_str(DATA);
reader.trim_text(true);
reader.config_mut().trim_text(true);

let mut custom_entities: HashMap<String, String> = HashMap::new();
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;
Expand Down
2 changes: 1 addition & 1 deletion examples/read_buffered.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ fn main() -> Result<(), quick_xml::Error> {
use quick_xml::reader::Reader;

let mut reader = Reader::from_file("tests/documents/document.xml")?;
reader.trim_text(true);
reader.config_mut().trim_text(true);

let mut buf = Vec::new();

Expand Down
6 changes: 4 additions & 2 deletions examples/read_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,16 @@ fn main() -> Result<(), AppError> {
let mut translations: Vec<Translation> = Vec::new();

let mut reader = Reader::from_str(XML);
reader.trim_text(true);
let config = reader.config_mut();

config.trim_text(true);
// == Handling empty elements ==
// To simply our processing code
// we want the same events for empty elements, like:
// <DefaultSettings Language="es" Greeting="HELLO"/>
// <Text/>
reader.expand_empty_elements(true);
config.expand_empty_elements = true;

let mut buf = Vec::new();

loop {
Expand Down
2 changes: 1 addition & 1 deletion examples/read_texts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ fn main() {
<tag1>text3</tag1><tag1><tag2>text4</tag2></tag1>";

let mut reader = Reader::from_str(xml);
reader.trim_text(true);
reader.config_mut().trim_text(true);

loop {
match reader.read_event() {
Expand Down
7 changes: 3 additions & 4 deletions fuzz/fuzz_targets/fuzz_target_1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ where
{
let mut writer = Writer::new(Cursor::new(Vec::new()));
let mut buf = vec![];
let reader = reader
.expand_empty_elements(true)
.trim_text(true)
.trim_text_end(true);
let config = reader.config_mut();
config.expand_empty_elements = true;
config.trim_text(true);
loop {
let event_result = reader.read_event_into(&mut buf);
if let Ok(ref event) = event_result {
Expand Down
19 changes: 4 additions & 15 deletions fuzz/fuzz_targets/structured_roundtrip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use arbitrary::{Arbitrary, Unstructured};
use libfuzzer_sys::fuzz_target;
use quick_xml::events::{BytesCData, BytesText, Event};
use quick_xml::reader::{NsReader, Reader};
use quick_xml::reader::{Config, NsReader, Reader};
use quick_xml::writer::Writer;
use std::{hint::black_box, io::Cursor};

Expand Down Expand Up @@ -41,7 +41,7 @@ enum WriterFunc<'a> {
#[derive(Debug, arbitrary::Arbitrary)]
struct Driver<'a> {
writer_funcs: Vec<WriterFunc<'a>>,
reader_config: Vec<bool>,
reader_config: Config,
}

fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> {
Expand Down Expand Up @@ -83,13 +83,7 @@ fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> {
let xml = writer.into_inner().into_inner();
// The str should be valid as we just generated it, unwrapping **should** be safe.
let mut reader = Reader::from_str(std::str::from_utf8(&xml).unwrap());
let mut config_iter = driver.reader_config.iter();
reader.check_comments(*config_iter.next().unwrap_or(&false));
reader.check_end_names(*config_iter.next().unwrap_or(&false));
reader.expand_empty_elements(*config_iter.next().unwrap_or(&false));
reader.trim_markup_names_in_closing_tags(*config_iter.next().unwrap_or(&false));
reader.trim_text(*config_iter.next().unwrap_or(&false));
reader.trim_text_end(*config_iter.next().unwrap_or(&false));
*reader.config_mut() = driver.reader_config.clone();

loop {
let event = black_box(reader.read_event()?);
Expand All @@ -99,12 +93,7 @@ fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> {
}

let mut reader = NsReader::from_reader(&xml[..]);
reader.check_comments(*config_iter.next().unwrap_or(&false));
reader.check_end_names(*config_iter.next().unwrap_or(&false));
reader.expand_empty_elements(*config_iter.next().unwrap_or(&false));
reader.trim_markup_names_in_closing_tags(*config_iter.next().unwrap_or(&false));
reader.trim_text(*config_iter.next().unwrap_or(&false));
reader.trim_text_end(*config_iter.next().unwrap_or(&false));
*reader.config_mut() = driver.reader_config;

loop {
let event = black_box(reader.read_event()?);
Expand Down
11 changes: 7 additions & 4 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2476,7 +2476,7 @@ where
///
/// [`deserialize_seq`]: serde::Deserializer::deserialize_seq
/// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack
/// [auto-expanding feature]: Reader::expand_empty_elements
/// [auto-expanding feature]: crate::reader::Config::expand_empty_elements
#[cfg(feature = "overlapped-lists")]
pub fn event_buffer_size(&mut self, limit: Option<NonZeroUsize>) -> &mut Self {
self.limit = limit;
Expand Down Expand Up @@ -2761,7 +2761,8 @@ where
/// and use specified entity resolver.
pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
let mut reader = Reader::from_str(source);
reader.expand_empty_elements(true);
let config = reader.config_mut();
config.expand_empty_elements = true;

Self::new(
SliceReader {
Expand Down Expand Up @@ -2803,7 +2804,8 @@ where
/// UTF-8, you can decode it first before using [`from_str`].
pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
let mut reader = Reader::from_reader(reader);
reader.expand_empty_elements(true);
let config = reader.config_mut();
config.expand_empty_elements = true;

Self::new(
IoReader {
Expand Down Expand Up @@ -3709,7 +3711,8 @@ mod tests {
start_trimmer: StartTrimmer::default(),
};

reader.reader.expand_empty_elements(true);
let config = reader.reader.config_mut();
config.expand_empty_elements = true;

let mut events = Vec::new();

Expand Down
5 changes: 4 additions & 1 deletion src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ impl std::error::Error for SyntaxError {}
/// An error returned if parsed document is not [well-formed], for example,
/// an opened tag is not closed before end of input.
///
/// Those errors are not fatal: after encountering an error you can continue
/// parsing the document.
///
/// [well-formed]: https://www.w3.org/TR/xml11/#dt-wellformed
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum IllFormedError {
Expand Down Expand Up @@ -93,7 +96,7 @@ pub enum IllFormedError {
/// mostly artificial, but you can enable it in the [configuration].
///
/// [specification]: https://www.w3.org/TR/xml11/#sec-comments
/// [configuration]: crate::reader::Reader::check_comments
/// [configuration]: crate::reader::Config::check_comments
DoubleHyphenInComment,
}

Expand Down
12 changes: 6 additions & 6 deletions src/reader/async_tokio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
/// <tag2>Test 2</tag2>
/// </tag1>
/// "#.as_bytes());
/// reader.trim_text(true);
/// reader.config_mut().trim_text(true);
///
/// let mut count = 0;
/// let mut buf = Vec::new();
Expand Down Expand Up @@ -111,7 +111,7 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
/// </inner>
/// </outer>
/// "#.as_bytes());
/// reader.trim_text(true);
/// reader.config_mut().trim_text(true);
/// let mut buf = Vec::new();
///
/// let start = BytesStart::new("outer");
Expand Down Expand Up @@ -186,7 +186,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
/// <y:tag2>Test 2</y:tag2>
/// </x:tag1>
/// "#.as_bytes());
/// reader.trim_text(true);
/// reader.config_mut().trim_text(true);
///
/// let mut count = 0;
/// let mut buf = Vec::new();
Expand Down Expand Up @@ -257,7 +257,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
/// </inner>
/// </outer>
/// "#.as_bytes());
/// reader.trim_text(true);
/// reader.config_mut().trim_text(true);
/// let mut buf = Vec::new();
///
/// let ns = Namespace(b"namespace 1");
Expand Down Expand Up @@ -292,7 +292,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
buf: &mut Vec<u8>,
) -> Result<Span> {
// According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
// match literally the start name. See `Reader::check_end_names` documentation
// match literally the start name. See `Config::check_end_names` documentation
self.reader.read_to_end_into_async(end, buf).await
}

Expand Down Expand Up @@ -321,7 +321,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
/// <y:tag2>Test 2</y:tag2>
/// </x:tag1>
/// "#.as_bytes());
/// reader.trim_text(true);
/// reader.config_mut().trim_text(true);
///
/// let mut count = 0;
/// let mut buf = Vec::new();
Expand Down
Loading
Loading