From ccf88892adf634d16fda239e6086e666f0d96b13 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 30 May 2020 17:15:03 -0400 Subject: [PATCH] Add safety comments to header::name module The comments document the invariant, preconditions, and post-conditions that together ensure that the use of unsafe related to UTF-8 assumptions (in calls to ByteStr::from_utf8_unchecked()) are sound. --- src/header/name.rs | 83 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 19 deletions(-) diff --git a/src/header/name.rs b/src/header/name.rs index 533e389f..2e7cc232 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -51,6 +51,7 @@ enum Repr { struct Custom(ByteStr); #[derive(Debug, Clone)] +// Invariant: If lower then buf is valid UTF-8. struct MaybeLower<'a> { buf: &'a [u8], lower: bool, @@ -979,6 +980,8 @@ standard_headers! { /// / DIGIT / ALPHA /// ; any VCHAR, except delimiters /// ``` +// HEADER_CHARS maps every byte that is 128 or larger to 0 so everything that is +// mapped by HEADER_CHARS, maps to a valid single-byte UTF-8 codepoint. const HEADER_CHARS: [u8; 256] = [ // 0 1 2 3 4 5 6 7 8 9 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x @@ -1010,6 +1013,8 @@ const HEADER_CHARS: [u8; 256] = [ ]; /// Valid header name characters for HTTP/2.0 and HTTP/3.0 +// HEADER_CHARS_H2 maps every byte that is 128 or larger to 0 so everything that is +// mapped by HEADER_CHARS_H2, maps to a valid single-byte UTF-8 codepoint. const HEADER_CHARS_H2: [u8; 256] = [ // 0 1 2 3 4 5 6 7 8 9 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x @@ -1044,6 +1049,7 @@ const HEADER_CHARS_H2: [u8; 256] = [ /// This version is best under optimized mode, however in a wasm debug compile, /// the `eq` macro expands to 1 + 1 + 1 + 1... and wasm explodes when this chain gets too long /// See https://github.com/DenisKolodin/yew/issues/478 +// Precondition: table maps all bytes that are not valid single-byte UTF-8 to something that is. fn parse_hdr<'a>( data: &'a [u8], b: &'a mut [MaybeUninit; SCRATCH_BUF_SIZE], @@ -1053,30 +1059,33 @@ fn parse_hdr<'a>( let len = data.len(); - // Precondition: each element of buf must be intitialized + // Precondition: each element of buf must be intitialized and must be + // a valid single-byte UTF-8 codepoint. let validate = |buf: &'a [MaybeUninit]| { // Safety: follows from the precondtion let buf = unsafe {slice_assume_init(buf)}; if buf.iter().any(|&b| b == 0) { Err(InvalidHeaderName::new()) } else { + // Precondition: satified by the precondition of validate. Ok(HdrName::custom(buf, true)) } }; // Called as either eq!(b == b'a' b'b' b'c') or eq!(b[i] == b'a' b'b' b'c') - // Precondition: the first n elements of b (or the first n starting at i) - // must be intitialized, where n is the number of bytes listed after the '==' - // in the invocation. + // Precondition: the first n elements of b (or the first n starting at i) must be + // intitialized, where n is the number of bytes listed after the '==' in the + // invocation. macro_rules! eq { (($($cmp:expr,)*) $v:ident[$n:expr] ==) => { $($cmp) && * }; (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { - // Safety: this arm is matched once for each byte after the '==' in - // the invocation (starting at 0 or i depending on the form of the call). - // By the precondtion $v[$n] is intitialized for each such match. - eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == $($rest)*) + // Safety: this arm is matched once for each byte after the '==' in the + // invocation (starting at 0 or i depending on the form of the call). By + // the precondtion $v[$n] is intitialized for each such match. + eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == + $($rest)*) }; ($v:ident == $($rest:tt)+) => { eq!(() $v[0] == $($rest)+) @@ -1086,11 +1095,12 @@ fn parse_hdr<'a>( }; } - - // Post-condition: the first n elements of $d are intitialized where n is the - // third paramter to the macro. Note that this macro overwrite the first n elements - // of $d without dropping the existing contents (if any) but the elements of $d - // are u8's so no drop is necessary. + // Post-condition: the first n elements of $d are intitialized to a valid + // single-byte UTF-8 codepoint where n is the third paramter to the macro. Note + // that this macro overwrite the first n elements of $d without dropping the + // existing contents (if any) but the elements of $d are u8's so no drop is + // necessary. The UTF-8 part of the post-condition follows from the precondition + // on table that is a part of parse_hdr(). macro_rules! to_lower { ($d:ident, $src:ident, 1) => { $d[0] = MaybeUninit::new(table[$src[0] as usize]); }; ($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); $d[1] = MaybeUninit::new(table[$src[1] as usize]); }; @@ -1155,8 +1165,9 @@ fn parse_hdr<'a>( Ok(Te.into()) } else { // Precondition: the post-condition on to_lower!() ensures that the - // first 2 elements of b are intitialized. len == 2 so all of - // b[..len] is intitialized. + // first 2 elements of b are intitialized and are valid single-byte + // UTF-8. len == 2 so all of b[..len] is intitialized and is valid + // UTF-8. validate(&b[..len]) } } @@ -1573,11 +1584,13 @@ fn parse_hdr<'a>( _ => { if len < 64 { for i in 0..len { + // The precondition on table for parse_hdr() means that b[i] is + // intitialized to a valid single-byte UTF-8 codepoint. b[i] = MaybeUninit::new(table[data[i] as usize]); } // Precondition: the first len bytes of b are intitialized in the loop above so - // b[..len] is intitialized. + // b[..len] is intitialized and is valid UTF-8. validate(&b[..len]) } else { Ok(HdrName::custom(data, false)) @@ -1588,6 +1601,7 @@ fn parse_hdr<'a>( #[cfg(all(debug_assertions, target_arch = "wasm32"))] /// This version works best in debug mode in wasm +// Precondition: table maps all bytes that are not valid single-byte UTF-8 to something that is. fn parse_hdr<'a>( data: &'a [u8], b: &'a mut [MaybeUninit; SCRATCH_BUF_SIZE], @@ -1597,11 +1611,13 @@ fn parse_hdr<'a>( let len = data.len(); + // Precondition: the first len bytes of buf are valid UTF-8. let validate = |buf: &'a [u8], len: usize| { let buf = &buf[..len]; if buf.iter().any(|&b| b == 0) { Err(InvalidHeaderName::new()) } else { + // Precondition: follows from the precondtion on validate. Ok(HdrName::custom(buf, true)) } }; @@ -1617,9 +1633,13 @@ fn parse_hdr<'a>( len if len > 64 => Ok(HdrName::custom(data, false)), len => { // Read from data into the buffer - transforming using `table` as we go. - // The assignment to *out ensures that each byte is intitialized. Since - // *out is a u8 it doesn't matter that we are not dropping *out before accessing it. - data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = MaybeUninit::new(table[*index as usize])); + // The assignment to *out ensures that each byte is intitialized. Since + // *out is a u8 it doesn't matter that we are not dropping *out before + // accessing it. The precondition on table for parse_hdr() means that + // each intitialized byte of b is valid UTF-8. + data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = + MaybeUninit::new(table[*index as + usize])); // Safety: We just intitialized the first len bytes of b in the previous line. let b = unsafe {slice_assume_init(&b[..len])}; match &b[0..len] { @@ -1704,6 +1724,8 @@ fn parse_hdr<'a>( b"content-security-policy-report-only" => { Ok(ContentSecurityPolicyReportOnly.into()) } + // Precondition: other is the first len bytes of b which was + // initialized to valid UTF-8 above. other => validate(other, len), } } @@ -1724,10 +1746,12 @@ impl HeaderName { /// This function normalizes the input. pub fn from_bytes(src: &[u8]) -> Result { let mut buf = uninit_u8_array(); + // Precondition: HEADER_CHARS is a valid table for parse_hdr(). match parse_hdr(src, &mut buf, &HEADER_CHARS)?.inner { Repr::Standard(std) => Ok(std.into()), Repr::Custom(MaybeLower { buf, lower: true }) => { let buf = Bytes::copy_from_slice(buf); + // Safety: the invariant on MaybeLower ensures buf is valid UTF-8. let val = unsafe { ByteStr::from_utf8_unchecked(buf) }; Ok(Custom(val).into()) } @@ -1736,6 +1760,7 @@ impl HeaderName { let mut dst = BytesMut::with_capacity(buf.len()); for b in buf.iter() { + // HEADER_CHARS maps all bytes to valid single-byte UTF-8 let b = HEADER_CHARS[*b as usize]; if b == 0 { @@ -1745,6 +1770,9 @@ impl HeaderName { dst.put_u8(b); } + // Safety: the loop above maps all bytes in buf to valid single byte + // UTF-8 before copying them into dst. This means that dst (and hence + // dst.freeze()) is valid UTF-8. let val = unsafe { ByteStr::from_utf8_unchecked(dst.freeze()) }; Ok(Custom(val).into()) @@ -1772,21 +1800,27 @@ impl HeaderName { /// ``` pub fn from_lowercase(src: &[u8]) -> Result { let mut buf = uninit_u8_array(); + // Precondition: HEADER_CHARS_H2 is a valid table for parse_hdr() match parse_hdr(src, &mut buf, &HEADER_CHARS_H2)?.inner { Repr::Standard(std) => Ok(std.into()), Repr::Custom(MaybeLower { buf, lower: true }) => { let buf = Bytes::copy_from_slice(buf); + // Safety: the invariant on MaybeLower ensures buf is valid UTF-8. let val = unsafe { ByteStr::from_utf8_unchecked(buf) }; Ok(Custom(val).into()) } Repr::Custom(MaybeLower { buf, lower: false }) => { for &b in buf.iter() { + // HEADER_CHARS maps all bytes that are not valid single-byte + // UTF-8 to 0 so this check returns an error for invalid UTF-8. if b != HEADER_CHARS[b as usize] { return Err(InvalidHeaderName::new()); } } let buf = Bytes::copy_from_slice(buf); + // Safety: the loop above checks that each byte of buf (either + // version) is valid UTF-8. let val = unsafe { ByteStr::from_utf8_unchecked(buf) }; Ok(Custom(val).into()) } @@ -1831,6 +1865,7 @@ impl HeaderName { pub fn from_static(src: &'static str) -> HeaderName { let bytes = src.as_bytes(); let mut buf = uninit_u8_array(); + // Precondition: HEADER_CHARS_H2 is a valid table for parse_hdr() match parse_hdr(bytes, &mut buf, &HEADER_CHARS_H2) { Ok(hdr_name) => match hdr_name.inner { Repr::Standard(std) => std.into(), @@ -2073,8 +2108,10 @@ impl Error for InvalidHeaderName {} // ===== HdrName ===== impl<'a> HdrName<'a> { + // Precondition: if lower then buf is valid UTF-8 fn custom(buf: &'a [u8], lower: bool) -> HdrName<'a> { HdrName { + // Invariant (on MaybeLower): follows from the precondition inner: Repr::Custom(MaybeLower { buf: buf, lower: lower, @@ -2086,6 +2123,7 @@ impl<'a> HdrName<'a> { where F: FnOnce(HdrName<'_>) -> U, { let mut buf = uninit_u8_array(); + // Precondition: HEADER_CHARS is a valid table for parse_hdr(). let hdr = parse_hdr(hdr, &mut buf, &HEADER_CHARS)?; Ok(f(hdr)) } @@ -2096,6 +2134,7 @@ impl<'a> HdrName<'a> { { let mut buf = uninit_u8_array(); let hdr = + // Precondition: HEADER_CHARS is a valid table for parse_hdr(). parse_hdr(hdr.as_bytes(), &mut buf, &HEADER_CHARS).expect("static str is invalid name"); f(hdr) } @@ -2111,6 +2150,7 @@ impl<'a> From> for HeaderName { Repr::Custom(maybe_lower) => { if maybe_lower.lower { let buf = Bytes::copy_from_slice(&maybe_lower.buf[..]); + // Safety: the invariant on MaybeLower ensures buf is valid UTF-8. let byte_str = unsafe { ByteStr::from_utf8_unchecked(buf) }; HeaderName { @@ -2121,9 +2161,14 @@ impl<'a> From> for HeaderName { let mut dst = BytesMut::with_capacity(maybe_lower.buf.len()); for b in maybe_lower.buf.iter() { + // HEADER_CHARS maps each byte to a valid single-byte UTF-8 + // codepoint. dst.put_u8(HEADER_CHARS[*b as usize]); } + // Safety: the loop above maps each byte of maybe_lower.buf to a + // valid single-byte UTF-8 codepoint before copying it into dst. + // dst (and hence dst.freeze()) is thus valid UTF-8. let buf = unsafe { ByteStr::from_utf8_unchecked(dst.freeze()) }; HeaderName {