From 5d4244fe20dfcc4b5f2e927ab47d2e3885bbc5e7 Mon Sep 17 00:00:00 2001 From: Wren Turkal Date: Tue, 24 Jan 2023 23:58:32 -0800 Subject: [PATCH] Add reserved namespace bindings. This adds xml and xmlns namespace bindings. These are defined at https://www.w3.org/TR/xml-names11/#xmlReserved. --- Changelog.md | 5 + src/errors.rs | 20 +++- src/name.rs | 197 ++++++++++++++++++++++++++++++++++++---- src/reader/ns_reader.rs | 9 +- 4 files changed, 206 insertions(+), 25 deletions(-) diff --git a/Changelog.md b/Changelog.md index 052697c2..b959b08e 100644 --- a/Changelog.md +++ b/Changelog.md @@ -12,6 +12,11 @@ ### New Features +- [#541]: Deserialize specially named `$text` enum variant in [externally tagged] + enums from textual content +- [#545]: Resolve well-known namespaces (xml and xmlns) to their approrpriate URIs. + Also, enforce namespace constraints related to these well-known namespaces. + ### Bug Fixes ### Misc Changes diff --git a/src/errors.rs b/src/errors.rs index 14cd7a5c..95caf7a1 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -9,6 +9,16 @@ use std::str::Utf8Error; use std::string::FromUtf8Error; use std::sync::Arc; +#[derive(Clone, Debug)] +pub struct ReservedNamespacePrefixError { + pub prefix: String, +} + +#[derive(Clone, Debug)] +pub struct ReservedNamespaceNameError { + pub name: String, +} + /// The error type used by this crate. #[derive(Clone, Debug)] pub enum Error { @@ -45,6 +55,8 @@ pub enum Error { InvalidAttr(AttrError), /// Escape error EscapeError(EscapeError), + ReservedNamespacePrefixError(ReservedNamespacePrefixError), + ReservedNamespaceNameError(ReservedNamespaceNameError), /// Specified namespace prefix is unknown, cannot resolve namespace for it UnknownPrefix(Vec), } @@ -120,7 +132,13 @@ impl fmt::Display for Error { f.write_str("Unknown namespace prefix '")?; write_byte_string(f, prefix)?; f.write_str("'") - } + }, + Error::ReservedNamespacePrefixError(e) => write!( + f, "The namespace prefix `{}` is invalid.", e.prefix + ), + Error::ReservedNamespaceNameError(e) => write!( + f, "The namespace name `{}` is invalid.", e.name + ), } } } diff --git a/src/name.rs b/src/name.rs index 07d261ab..09ec18e5 100644 --- a/src/name.rs +++ b/src/name.rs @@ -3,7 +3,7 @@ //! //! [spec]: https://www.w3.org/TR/xml-names11 -use crate::errors::{Error, Result}; +use crate::errors::{self, Error, Result}; use crate::events::attributes::Attribute; use crate::events::BytesStart; use crate::utils::write_byte_string; @@ -399,12 +399,61 @@ pub(crate) struct NamespaceResolver { nesting_level: i32, } +/// These constants define the [reserved namespaces] for the xml standard. +/// +/// The prefix `xml` is by definition bound to the namespace name +/// `http://www.w3.org/XML/1998/namespace`. It may, but need not, be declared, and must not be +/// undeclared or bound to any other namespace name. Other prefixes must not be bound to this +/// namespace name, and it must not be declared as the default namespace. +/// +/// The prefix `xmlns` is used only to declare namespace bindings and is by definition bound +/// to the namespace name `http://www.w3.org/2000/xmlns/`. It must not be declared or +/// undeclared. Other prefixes must not be bound to this namespace name, and it must not be +/// declared as the default namespace. Element names must not have the prefix `xmlns`. +/// +/// [reserved namespaces]: https://www.w3.org/TR/xml-names11/#xmlReserved +const RESERVED_NAMESPACE_XML: (Prefix, Namespace) = ( + Prefix(b"xml"), + Namespace(b"http://www.w3.org/XML/1998/namespace"), +); +const RESERVED_NAMESPACE_XMLNS: (Prefix, Namespace) = ( + Prefix(b"xmlns"), + Namespace(b"http://www.w3.org/2000/xmlns/"), +); + +const RESERVED_NAMESPACES: [(Prefix, Namespace); 2] = + [RESERVED_NAMESPACE_XML, RESERVED_NAMESPACE_XMLNS]; + impl NamespaceResolver { + pub fn new_root_resolver_and_buf() -> (Self, Vec) { + let mut bindings = Vec::new(); + let mut buffer = Vec::new(); + for ent in RESERVED_NAMESPACES { + let prefix = ent.0.into_inner(); + let uri = ent.1.into_inner(); + bindings.push(NamespaceEntry { + start: buffer.len(), + prefix_len: prefix.len(), + value_len: uri.len(), + level: 0, + }); + buffer.extend(prefix); + buffer.extend(uri); + } + + ( + Self { + bindings, + ..Self::default() + }, + buffer, + ) + } /// Begins a new scope and add to it all [namespace bindings] that found in /// the specified start element. /// /// [namespace binding]: https://www.w3.org/TR/xml-names11/#dt-NSDecl - pub fn push(&mut self, start: &BytesStart, buffer: &mut Vec) { + pub fn push(&mut self, start: &BytesStart, buffer: &mut Vec) -> Result<()> { self.nesting_level += 1; let level = self.nesting_level; // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns' @@ -422,8 +471,50 @@ impl NamespaceResolver { level, }); } + Some(PrefixDeclaration::Named(prefix)) + if prefix == RESERVED_NAMESPACE_XMLNS.0.into_inner() => + { + return Err(Error::ReservedNamespacePrefixError( + errors::ReservedNamespacePrefixError { + prefix: String::from_utf8(prefix.to_vec()).unwrap(), + }, + )) + } Some(PrefixDeclaration::Named(prefix)) => { let start = buffer.len(); + + if prefix == RESERVED_NAMESPACE_XML.0.into_inner() + && v != RESERVED_NAMESPACE_XML.1.into_inner() + { + // error xml prefix not set to normal xml uri + return Err(Error::ReservedNamespacePrefixError( + errors::ReservedNamespacePrefixError { + prefix: String::from_utf8(prefix.to_vec()).unwrap(), + }, + )); + } else if v == RESERVED_NAMESPACE_XML.1.into_inner() { + // error, non-`xml` prefix set to xml uri + return Err(Error::ReservedNamespacePrefixError( + errors::ReservedNamespacePrefixError { + prefix: String::from_utf8(prefix.to_vec()).unwrap(), + }, + )); + } else if prefix == RESERVED_NAMESPACE_XMLNS.0.into_inner() { + // error attempt to override `xmlns` prefix + return Err(Error::ReservedNamespacePrefixError( + errors::ReservedNamespacePrefixError { + prefix: String::from_utf8(prefix.to_vec()).unwrap(), + }, + )); + } else if v == RESERVED_NAMESPACE_XMLNS.1.into_inner() { + // error, non-`xmlns` prefix set to xmlns uri + return Err(Error::ReservedNamespaceNameError( + errors::ReservedNamespaceNameError { + name: String::from_utf8(prefix.to_vec()).unwrap(), + }, + )); + } + // test for xmlns uri buffer.extend_from_slice(prefix); buffer.extend_from_slice(&v); self.bindings.push(NamespaceEntry { @@ -439,6 +530,7 @@ impl NamespaceResolver { break; } } + Ok(()) } /// Ends a top-most scope by popping all [namespace binding], that was added by @@ -567,7 +659,7 @@ mod namespaces { /// Basic tests that checks that basic resolver functionality is working #[test] - fn basic() { + fn basic() -> Result<()> { let name = QName(b"simple"); let ns = Namespace(b"default"); @@ -577,11 +669,11 @@ mod namespaces { resolver.push( &BytesStart::from_content(" xmlns='default'", 0), &mut buffer, - ); + )?; assert_eq!(buffer, b"default"); // Check that tags without namespaces does not change result - resolver.push(&BytesStart::from_content("", 0), &mut buffer); + resolver.push(&BytesStart::from_content("", 0), &mut buffer)?; assert_eq!(buffer, b"default"); resolver.pop(&mut buffer); @@ -595,11 +687,12 @@ mod namespaces { (Unbound, LocalName(b"simple")) ); assert_eq!(resolver.find(name, &buffer), Bound(ns)); + Ok(()) } /// Test adding a second level of namespaces, which replaces the previous binding #[test] - fn override_namespace() { + fn override_namespace() -> Result<()> { let name = QName(b"simple"); let old_ns = Namespace(b"old"); let new_ns = Namespace(b"new"); @@ -607,8 +700,8 @@ mod namespaces { let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); - resolver.push(&BytesStart::from_content(" xmlns='old'", 0), &mut buffer); - resolver.push(&BytesStart::from_content(" xmlns='new'", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns='old'", 0), &mut buffer)?; + resolver.push(&BytesStart::from_content(" xmlns='new'", 0), &mut buffer)?; assert_eq!(buffer, b"oldnew"); assert_eq!( @@ -632,6 +725,7 @@ mod namespaces { (Unbound, LocalName(b"simple")) ); assert_eq!(resolver.find(name, &buffer), Bound(old_ns)); + Ok(()) } /// Test adding a second level of namespaces, which reset the previous binding @@ -639,15 +733,15 @@ mod namespaces { /// /// See #[test] - fn reset() { + fn reset() -> Result<()> { let name = QName(b"simple"); let old_ns = Namespace(b"old"); let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); - resolver.push(&BytesStart::from_content(" xmlns='old'", 0), &mut buffer); - resolver.push(&BytesStart::from_content(" xmlns=''", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns='old'", 0), &mut buffer)?; + resolver.push(&BytesStart::from_content(" xmlns=''", 0), &mut buffer)?; assert_eq!(buffer, b"old"); assert_eq!( @@ -671,6 +765,7 @@ mod namespaces { (Unbound, LocalName(b"simple")) ); assert_eq!(resolver.find(name, &buffer), Bound(old_ns)); + Ok(()) } } @@ -680,7 +775,7 @@ mod namespaces { /// Basic tests that checks that basic resolver functionality is working #[test] - fn basic() { + fn basic() -> Result<()> { let name = QName(b"p:with-declared-prefix"); let ns = Namespace(b"default"); @@ -690,11 +785,11 @@ mod namespaces { resolver.push( &BytesStart::from_content(" xmlns:p='default'", 0), &mut buffer, - ); + )?; assert_eq!(buffer, b"pdefault"); // Check that tags without namespaces does not change result - resolver.push(&BytesStart::from_content("", 0), &mut buffer); + resolver.push(&BytesStart::from_content("", 0), &mut buffer)?; assert_eq!(buffer, b"pdefault"); resolver.pop(&mut buffer); @@ -708,11 +803,12 @@ mod namespaces { (Bound(ns), LocalName(b"with-declared-prefix")) ); assert_eq!(resolver.find(name, &buffer), Bound(ns)); + Ok(()) } /// Test adding a second level of namespaces, which replaces the previous binding #[test] - fn override_namespace() { + fn override_namespace() -> Result<()> { let name = QName(b"p:with-declared-prefix"); let old_ns = Namespace(b"old"); let new_ns = Namespace(b"new"); @@ -720,8 +816,8 @@ mod namespaces { let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); - resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0), &mut buffer); - resolver.push(&BytesStart::from_content(" xmlns:p='new'", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0), &mut buffer)?; + resolver.push(&BytesStart::from_content(" xmlns:p='new'", 0), &mut buffer)?; assert_eq!(buffer, b"poldpnew"); assert_eq!( @@ -745,6 +841,7 @@ mod namespaces { (Bound(old_ns), LocalName(b"with-declared-prefix")) ); assert_eq!(resolver.find(name, &buffer), Bound(old_ns)); + Ok(()) } /// Test adding a second level of namespaces, which reset the previous binding @@ -752,15 +849,15 @@ mod namespaces { /// /// See #[test] - fn reset() { + fn reset() -> Result<()> { let name = QName(b"p:with-declared-prefix"); let old_ns = Namespace(b"old"); let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); - resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0), &mut buffer); - resolver.push(&BytesStart::from_content(" xmlns:p=''", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0), &mut buffer)?; + resolver.push(&BytesStart::from_content(" xmlns:p=''", 0), &mut buffer)?; assert_eq!(buffer, b"poldp"); assert_eq!( @@ -784,6 +881,66 @@ mod namespaces { (Bound(old_ns), LocalName(b"with-declared-prefix")) ); assert_eq!(resolver.find(name, &buffer), Bound(old_ns)); + Ok(()) + } + } + + mod builtin_prefixes { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn undeclared_reserved_prefix_xml() { + let prefix_name = RESERVED_NAMESPACE_XML.0.into_inner(); + let namespace_uri = RESERVED_NAMESPACE_XML.1.into_inner(); + + let prefix = Prefix(prefix_name); + let namespace = Namespace(namespace_uri); + + let (resolver, buffer) = NamespaceResolver::new_root_resolver_and_buf(); + //let resolver = NamespaceResolver::default(); + let tag = b"random"; + + let name_buf = [prefix.into_inner(), tag].join(&b":"[..]); + let name = QName(&name_buf); + + assert_eq!( + resolver.resolve(name, &buffer, true), + (Bound(namespace), LocalName(tag)) + ); + + assert_eq!( + resolver.resolve(name.clone(), &buffer, false), + (Bound(namespace), LocalName(tag)) + ); + assert_eq!(resolver.find(name.clone(), &buffer), Bound(namespace)); + } + + #[test] + fn undeclared_reserved_prefix_xmlns() { + let prefix_name = RESERVED_NAMESPACE_XMLNS.0.into_inner(); + let namespace_uri = RESERVED_NAMESPACE_XMLNS.1.into_inner(); + + let prefix = Prefix(prefix_name); + let namespace = Namespace(namespace_uri); + + let (resolver, buffer) = NamespaceResolver::new_root_resolver_and_buf(); + //let resolver = NamespaceResolver::default(); + let tag = b"random"; + + let name_buf = [prefix.into_inner(), tag].join(&b":"[..]); + let name = QName(&name_buf); + + assert_eq!( + resolver.resolve(name, &buffer, true), + (Bound(namespace), LocalName(tag)) + ); + + assert_eq!( + resolver.resolve(name.clone(), &buffer, false), + (Bound(namespace), LocalName(tag)) + ); + assert_eq!(resolver.find(name.clone(), &buffer), Bound(namespace)); } } diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index 09457f28..c904204d 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -47,10 +47,11 @@ impl NsReader { impl NsReader { #[inline] fn new(reader: Reader) -> Self { + let (ns_resolver, buffer) = NamespaceResolver::new_root_resolver_and_buf(); Self { reader, - buffer: Vec::new(), - ns_resolver: NamespaceResolver::default(), + buffer, + ns_resolver, pending_pop: false, } } @@ -74,11 +75,11 @@ impl NsReader { pub(super) fn process_event<'i>(&mut self, event: Result>) -> Result> { match event { Ok(Event::Start(e)) => { - self.ns_resolver.push(&e, &mut self.buffer); + self.ns_resolver.push(&e, &mut self.buffer)?; Ok(Event::Start(e)) } Ok(Event::Empty(e)) => { - self.ns_resolver.push(&e, &mut self.buffer); + self.ns_resolver.push(&e, &mut self.buffer)?; // notify next `read_event_impl()` invocation that it needs to pop this // namespace scope self.pending_pop = true;