temp

tafia · Jun 23, 2022 · f206a71 · f206a71
1 parent 21687c7
commit f206a71
Showing 1 changed file with 80 additions and 85 deletions.
diff --git a/src/events/attributes.rs b/src/events/attributes.rs
@@ -33,9 +33,88 @@ pub struct Attribute<'a> {
 }
 
 impl<'a> Attribute<'a> {
+    /// Normalize the attribute value according to xml specification section 3.3.3
     ///
+    /// https://www.w3.org/TR/xml/#AVNormalize
+    ///
+    /// * Whitespace-like characters (\r, \n, \t, ' ') are trimmed from the ends of the value
+    /// * Sequences of whitespace-like characters are replaced with a single whitespace character
+    /// * Character and entity references are substituted as defined by the spec
     pub fn normalized_value(&'a self) -> Result<Cow<'a, [u8]>, EscapeError> {
-        let normalized = normalize_attribute_value(self.value.as_ref());
+        // TODO: character references, entity references, error handling associated with those
+
+        #[derive(PartialEq)]
+        enum ParseState {
+            Space,
+            CDATA,
+        }
+
+        // Trim characters from the beginning and end of the attribute value - this can't fail.
+        fn trim_value(attr: &[u8]) -> &[u8] {
+            let first_non_space_char = attr.iter().position(|c| !is_whitespace(*c));
+
+            if first_non_space_char.is_none() {
+                // The entire value was whitespace-like characters
+                return b"";
+            }
+
+            let last_non_space_char = attr.iter().rposition(|c| !is_whitespace(*c));
+
+            // Trim all whitespace-like characters away from the beginning and end of the attribute value.
+            let begin = first_non_space_char.unwrap();
+            let end = last_non_space_char.unwrap_or(attr.len());
+            &attr[begin..=end]
+        }
+
+        let trimmed_attr = trim_value(self.value.as_ref());
+
+        // A new buffer is only created when we encounter a situation that requires it.
+        let mut normalized: Option<Vec<u8>> = None;
+        // We start on character data because all whitespace-like characters are already trimmed away.
+        let mut current_state = ParseState::CDATA;
+
+        // Perform a single pass over the trimmed attribute value. If we encounter a character / entity reference
+        // or whitespace-like characters that need to be substituted, copy everything processed thus far to a new
+        // buffer and continue using this buffer.
+        for (idx, ch) in trimmed_attr.iter().enumerate() {
+            match ch {
+                b'\n' | b'\r' | b'\t' | b' ' => match current_state {
+                    ParseState::Space => match normalized {
+                        Some(_) => continue,
+                        None => normalized = Some(Vec::from(&trimmed_attr[..idx])),
+                    },
+                    ParseState::CDATA => {
+                        current_state = ParseState::Space;
+                        match normalized.as_mut() {
+                            Some(buf) => buf.push(b' '),
+                            None => {
+                                let mut buf = Vec::from(&trimmed_attr[..idx]);
+                                buf.push(b' ');
+                                normalized = Some(buf);
+                            }
+                        }
+                    }
+                },
+                c @ _ => match current_state {
+                    ParseState::Space => {
+                        current_state = ParseState::CDATA;
+                        if let Some(normalized) = normalized.as_mut() {
+                            normalized.push(*c);
+                        }
+                    }
+                    ParseState::CDATA => {
+                        if let Some(normalized) = normalized.as_mut() {
+                            normalized.push(*c);
+                        }
+                    }
+                },
+            }
+        }
+
+        let normalized = match normalized {
+            Some(normalized) => Cow::Owned(normalized),
+            None => Cow::Borrowed(trimmed_attr),
+        };
         let escaped = do_unescape(&*normalized, None)?;
         Ok(Cow::Owned(escaped.into_owned()))
     }
@@ -190,90 +269,6 @@ impl<'a> From<Attr<&'a [u8]>> for Attribute<'a> {
     }
 }
 
-/// Normalize the attribute value according to xml specification section 3.3.3
-///
-/// https://www.w3.org/TR/xml/#AVNormalize
-///
-/// * Whitespace-like characters (\r, \n, \t, ' ') are trimmed from the ends of the value
-/// * Sequences of whitespace-like characters are replaced with a single whitespace character
-/// * Character and entity references are substituted as defined by the spec
-fn normalize_attribute_value(attr: &[u8]) -> Cow<[u8]> {
-    // TODO: character references, entity references, error handling associated with those
-
-    #[derive(PartialEq)]
-    enum ParseState {
-        Space,
-        CDATA,
-    }
-
-    // Trim characters from the beginning and end of the attribute value - this can't fail.
-    fn trim_value(attr: &[u8]) -> &[u8] {
-        let first_non_space_char = attr.iter().position(|c| !is_whitespace(*c));
-
-        if first_non_space_char.is_none() {
-            // The entire value was whitespace-like characters
-            return b"";
-        }
-
-        let last_non_space_char = attr.iter().rposition(|c| !is_whitespace(*c));
-
-        // Trim all whitespace-like characters away from the beginning and end of the attribute value.
-        let begin = first_non_space_char.unwrap();
-        let end = last_non_space_char.unwrap_or(attr.len());
-        &attr[begin..=end]
-    }
-
-    let trimmed_attr = trim_value(attr);
-
-    // A new buffer is only created when we encounter a situation that requires it.
-    let mut normalized: Option<Vec<u8>> = None;
-    // We start on character data because all whitespace-like characters are already trimmed away.
-    let mut current_state = ParseState::CDATA;
-
-    // Perform a single pass over the trimmed attribute value. If we encounter a character / entity reference
-    // or whitespace-like characters that need to be substituted, copy everything processed thus far to a new
-    // buffer and continue using this buffer.
-    for (idx, ch) in trimmed_attr.iter().enumerate() {
-        match ch {
-            b'\n' | b'\r' | b'\t' | b' ' => match current_state {
-                ParseState::Space => match normalized {
-                    Some(_) => continue,
-                    None => normalized = Some(Vec::from(&trimmed_attr[..idx])),
-                },
-                ParseState::CDATA => {
-                    current_state = ParseState::Space;
-                    match normalized.as_mut() {
-                        Some(buf) => buf.push(b' '),
-                        None => {
-                            let mut buf = Vec::from(&trimmed_attr[..idx]);
-                            buf.push(b' ');
-                            normalized = Some(buf);
-                        }
-                    }
-                }
-            },
-            c @ _ => match current_state {
-                ParseState::Space => {
-                    current_state = ParseState::CDATA;
-                    if let Some(normalized) = normalized.as_mut() {
-                        normalized.push(*c);
-                    }
-                }
-                ParseState::CDATA => {
-                    if let Some(normalized) = normalized.as_mut() {
-                        normalized.push(*c);
-                    }
-                }
-            },
-        }
-    }
-
-    match normalized {
-        Some(normalized) => Cow::Owned(normalized),
-        None => Cow::Borrowed(trimmed_attr),
-    }
-}
-
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 /// Iterator over XML attributes.