From e693b679aa192d6cffafc17d861263c577d4598e Mon Sep 17 00:00:00 2001 From: Phillip Davis Date: Sat, 20 Apr 2024 12:41:05 -0400 Subject: [PATCH] change: allow `unescape_with` to override standard replacements This involves swapping the order in which the main loop of that function checks for `named_entities` and `entity_resolver` closures, namely we check the user-provided `entity_resolver` first. This shouldn't be a breaking change unless some existing client code already had overridden `named_entity` replacements, but those replacements would have been noops under the previous implementation so the fix should be trivial. --- Changelog.md | 3 +++ src/escape.rs | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/Changelog.md b/Changelog.md index fbe531b1..3736947c 100644 --- a/Changelog.md +++ b/Changelog.md @@ -34,6 +34,8 @@ to get an offset of the error position. For `SyntaxError`s the range it can handle every attribute that does not match existing cases within an enum variant. - [#722]: Allow to pass owned strings to `Writer::create_element`. This is breaking change! - [#275]: Added `ElementWriter::new_line()` which enables pretty printing elements with multiple attributes. +- [#734]: Allow to override resolution of predefined entities (`lt`, `gt`, `apos`, `quot`, `amp`) + in `unescape_with` family of methods. ### Bug Fixes @@ -81,6 +83,7 @@ to get an offset of the error position. For `SyntaxError`s the range [#704]: https://github.com/tafia/quick-xml/pull/704 [#705]: https://github.com/tafia/quick-xml/pull/705 [#722]: https://github.com/tafia/quick-xml/pull/722 +[#734]: https://github.com/tafia/quick-xml/pull/734 [#738]: https://github.com/tafia/quick-xml/pull/738 diff --git a/src/escape.rs b/src/escape.rs index ee0b178b..59a826e3 100644 --- a/src/escape.rs +++ b/src/escape.rs @@ -192,8 +192,39 @@ pub fn unescape(raw: &str) -> Result, EscapeError> { /// /// If feature [`escape-html`] is enabled, then recognizes all [HTML5 escapes]. /// +/// Predefined entities will be resolved _after_ trying to resolve with `resolve_entity`, +/// which allows you to override default behavior which required in some XML dialects. +/// +/// Character references (`&#hh;`) cannot be overridden, they are resolved before +/// calling `resolve_entity`. +/// +/// Note, that entities will not be resolved recursively. In order to satisfy the +/// XML [requirements] you should unescape nested entities by yourself. +/// +/// # Example +/// +/// ``` +/// # use quick_xml::escape::unescape_with; +/// # use pretty_assertions::assert_eq; +/// let override_named_entities = |entity: &str| match entity { +/// // Override standard entities +/// "lt" => Some("FOO"), +/// "gt" => Some("BAR"), +/// // Resolve custom entities +/// "baz" => Some("<"), +/// // All other entities produces EscapeError::UnrecognizedSymbol error +/// _ => None, +/// }; +/// +/// assert_eq!( +/// unescape_with("&<test>&baz;", override_named_entities).unwrap(), +/// "&FOOtestBAR<" +/// ); +/// ``` +/// /// [`escape-html`]: ../index.html#escape-html /// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref +/// [requirements]: https://www.w3.org/TR/xml11/#intern-replacement pub fn unescape_with<'input, 'entity, F>( raw: &'input str, mut resolve_entity: F, @@ -221,10 +252,10 @@ where if let Some(entity) = pat.strip_prefix('#') { let codepoint = parse_number(entity, start..end)?; unescaped.push_str(codepoint.encode_utf8(&mut [0u8; 4])); - } else if let Some(value) = named_entity(pat) { - unescaped.push_str(value); } else if let Some(value) = resolve_entity(pat) { unescaped.push_str(value); + } else if let Some(value) = named_entity(pat) { + unescaped.push_str(value); } else { return Err(EscapeError::UnrecognizedSymbol( start + 1..end,