From f1455245bfe62b8f5fe9e03298ef12967bb798e6 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 9 May 2020 17:38:42 -0400 Subject: [PATCH 1/8] Convert header::name to use MaybeUninit The use of MaybeUninit to replace the depreciated mem::uninitialized() is supported because the minimum supported version of Rust is 1.39. This change moves most of the uninitialized memory related use of unsafe into the parse_hdr() function. --- src/header/name.rs | 186 +++++++++++++++++++++++---------------------- 1 file changed, 94 insertions(+), 92 deletions(-) diff --git a/src/header/name.rs b/src/header/name.rs index 4d70ff30..270c3bc5 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -5,8 +5,9 @@ use std::borrow::Borrow; use std::error::Error; use std::convert::{TryFrom}; use std::hash::{Hash, Hasher}; +use std::mem::MaybeUninit; use std::str::FromStr; -use std::{fmt, mem}; +use std::fmt; /// Represents an HTTP header field name /// @@ -1045,7 +1046,7 @@ macro_rules! eq { $($cmp) && * }; (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { - eq!(($($cmp,)* $v[$n] == $a,) $v[$n+1] == $($rest)*) + eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a,) $v[$n+1] == $($rest)*) }; ($v:ident == $($rest:tt)+) => { eq!(() $v[0] == $($rest)+) @@ -1061,15 +1062,15 @@ macro_rules! eq { /// See https://github.com/DenisKolodin/yew/issues/478 fn parse_hdr<'a>( data: &'a [u8], - b: &'a mut [u8; 64], + b: &'a mut [MaybeUninit; SCRATCH_BUF_SIZE], table: &[u8; 256], ) -> Result, InvalidHeaderName> { use self::StandardHeader::*; let len = data.len(); - let validate = |buf: &'a [u8], len: usize| { - let buf = &buf[..len]; + let validate = |buf: &'a [MaybeUninit]| { + let buf = unsafe {slice_assume_init(buf)}; if buf.iter().any(|&b| b == 0) { Err(InvalidHeaderName::new()) } else { @@ -1079,41 +1080,41 @@ fn parse_hdr<'a>( macro_rules! to_lower { - ($d:ident, $src:ident, 1) => { $d[0] = table[$src[0] as usize]; }; - ($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); $d[1] = table[$src[1] as usize]; }; - ($d:ident, $src:ident, 3) => { to_lower!($d, $src, 2); $d[2] = table[$src[2] as usize]; }; - ($d:ident, $src:ident, 4) => { to_lower!($d, $src, 3); $d[3] = table[$src[3] as usize]; }; - ($d:ident, $src:ident, 5) => { to_lower!($d, $src, 4); $d[4] = table[$src[4] as usize]; }; - ($d:ident, $src:ident, 6) => { to_lower!($d, $src, 5); $d[5] = table[$src[5] as usize]; }; - ($d:ident, $src:ident, 7) => { to_lower!($d, $src, 6); $d[6] = table[$src[6] as usize]; }; - ($d:ident, $src:ident, 8) => { to_lower!($d, $src, 7); $d[7] = table[$src[7] as usize]; }; - ($d:ident, $src:ident, 9) => { to_lower!($d, $src, 8); $d[8] = table[$src[8] as usize]; }; - ($d:ident, $src:ident, 10) => { to_lower!($d, $src, 9); $d[9] = table[$src[9] as usize]; }; - ($d:ident, $src:ident, 11) => { to_lower!($d, $src, 10); $d[10] = table[$src[10] as usize]; }; - ($d:ident, $src:ident, 12) => { to_lower!($d, $src, 11); $d[11] = table[$src[11] as usize]; }; - ($d:ident, $src:ident, 13) => { to_lower!($d, $src, 12); $d[12] = table[$src[12] as usize]; }; - ($d:ident, $src:ident, 14) => { to_lower!($d, $src, 13); $d[13] = table[$src[13] as usize]; }; - ($d:ident, $src:ident, 15) => { to_lower!($d, $src, 14); $d[14] = table[$src[14] as usize]; }; - ($d:ident, $src:ident, 16) => { to_lower!($d, $src, 15); $d[15] = table[$src[15] as usize]; }; - ($d:ident, $src:ident, 17) => { to_lower!($d, $src, 16); $d[16] = table[$src[16] as usize]; }; - ($d:ident, $src:ident, 18) => { to_lower!($d, $src, 17); $d[17] = table[$src[17] as usize]; }; - ($d:ident, $src:ident, 19) => { to_lower!($d, $src, 18); $d[18] = table[$src[18] as usize]; }; - ($d:ident, $src:ident, 20) => { to_lower!($d, $src, 19); $d[19] = table[$src[19] as usize]; }; - ($d:ident, $src:ident, 21) => { to_lower!($d, $src, 20); $d[20] = table[$src[20] as usize]; }; - ($d:ident, $src:ident, 22) => { to_lower!($d, $src, 21); $d[21] = table[$src[21] as usize]; }; - ($d:ident, $src:ident, 23) => { to_lower!($d, $src, 22); $d[22] = table[$src[22] as usize]; }; - ($d:ident, $src:ident, 24) => { to_lower!($d, $src, 23); $d[23] = table[$src[23] as usize]; }; - ($d:ident, $src:ident, 25) => { to_lower!($d, $src, 24); $d[24] = table[$src[24] as usize]; }; - ($d:ident, $src:ident, 26) => { to_lower!($d, $src, 25); $d[25] = table[$src[25] as usize]; }; - ($d:ident, $src:ident, 27) => { to_lower!($d, $src, 26); $d[26] = table[$src[26] as usize]; }; - ($d:ident, $src:ident, 28) => { to_lower!($d, $src, 27); $d[27] = table[$src[27] as usize]; }; - ($d:ident, $src:ident, 29) => { to_lower!($d, $src, 28); $d[28] = table[$src[28] as usize]; }; - ($d:ident, $src:ident, 30) => { to_lower!($d, $src, 29); $d[29] = table[$src[29] as usize]; }; - ($d:ident, $src:ident, 31) => { to_lower!($d, $src, 30); $d[30] = table[$src[30] as usize]; }; - ($d:ident, $src:ident, 32) => { to_lower!($d, $src, 31); $d[31] = table[$src[31] as usize]; }; - ($d:ident, $src:ident, 33) => { to_lower!($d, $src, 32); $d[32] = table[$src[32] as usize]; }; - ($d:ident, $src:ident, 34) => { to_lower!($d, $src, 33); $d[33] = table[$src[33] as usize]; }; - ($d:ident, $src:ident, 35) => { to_lower!($d, $src, 34); $d[34] = table[$src[34] as usize]; }; + ($d:ident, $src:ident, 1) => { unsafe {*($d[0].as_mut_ptr()) = table[$src[0] as usize];} }; + ($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); unsafe {*($d[1].as_mut_ptr()) = table[$src[1] as usize];} }; + ($d:ident, $src:ident, 3) => { to_lower!($d, $src, 2); unsafe {*($d[2].as_mut_ptr()) = table[$src[2] as usize];} }; + ($d:ident, $src:ident, 4) => { to_lower!($d, $src, 3); unsafe {*($d[3].as_mut_ptr()) = table[$src[3] as usize];} }; + ($d:ident, $src:ident, 5) => { to_lower!($d, $src, 4); unsafe {*($d[4].as_mut_ptr()) = table[$src[4] as usize];} }; + ($d:ident, $src:ident, 6) => { to_lower!($d, $src, 5); unsafe {*($d[5].as_mut_ptr()) = table[$src[5] as usize];} }; + ($d:ident, $src:ident, 7) => { to_lower!($d, $src, 6); unsafe {*($d[6].as_mut_ptr()) = table[$src[6] as usize];} }; + ($d:ident, $src:ident, 8) => { to_lower!($d, $src, 7); unsafe {*($d[7].as_mut_ptr()) = table[$src[7] as usize];} }; + ($d:ident, $src:ident, 9) => { to_lower!($d, $src, 8); unsafe {*($d[8].as_mut_ptr()) = table[$src[8] as usize];} }; + ($d:ident, $src:ident, 10) => { to_lower!($d, $src, 9); unsafe {*($d[9].as_mut_ptr()) = table[$src[9] as usize];} }; + ($d:ident, $src:ident, 11) => { to_lower!($d, $src, 10); unsafe {*($d[10].as_mut_ptr()) = table[$src[10] as usize];} }; + ($d:ident, $src:ident, 12) => { to_lower!($d, $src, 11); unsafe {*($d[11].as_mut_ptr()) = table[$src[11] as usize];} }; + ($d:ident, $src:ident, 13) => { to_lower!($d, $src, 12); unsafe {*($d[12].as_mut_ptr()) = table[$src[12] as usize];} }; + ($d:ident, $src:ident, 14) => { to_lower!($d, $src, 13); unsafe {*($d[13].as_mut_ptr()) = table[$src[13] as usize];} }; + ($d:ident, $src:ident, 15) => { to_lower!($d, $src, 14); unsafe {*($d[14].as_mut_ptr()) = table[$src[14] as usize];} }; + ($d:ident, $src:ident, 16) => { to_lower!($d, $src, 15); unsafe {*($d[15].as_mut_ptr()) = table[$src[15] as usize];} }; + ($d:ident, $src:ident, 17) => { to_lower!($d, $src, 16); unsafe {*($d[16].as_mut_ptr()) = table[$src[16] as usize];} }; + ($d:ident, $src:ident, 18) => { to_lower!($d, $src, 17); unsafe {*($d[17].as_mut_ptr()) = table[$src[17] as usize];} }; + ($d:ident, $src:ident, 19) => { to_lower!($d, $src, 18); unsafe {*($d[18].as_mut_ptr()) = table[$src[18] as usize];} }; + ($d:ident, $src:ident, 20) => { to_lower!($d, $src, 19); unsafe {*($d[19].as_mut_ptr()) = table[$src[19] as usize];} }; + ($d:ident, $src:ident, 21) => { to_lower!($d, $src, 20); unsafe {*($d[20].as_mut_ptr()) = table[$src[20] as usize];} }; + ($d:ident, $src:ident, 22) => { to_lower!($d, $src, 21); unsafe {*($d[21].as_mut_ptr()) = table[$src[21] as usize];} }; + ($d:ident, $src:ident, 23) => { to_lower!($d, $src, 22); unsafe {*($d[22].as_mut_ptr()) = table[$src[22] as usize];} }; + ($d:ident, $src:ident, 24) => { to_lower!($d, $src, 23); unsafe {*($d[23].as_mut_ptr()) = table[$src[23] as usize];} }; + ($d:ident, $src:ident, 25) => { to_lower!($d, $src, 24); unsafe {*($d[24].as_mut_ptr()) = table[$src[24] as usize];} }; + ($d:ident, $src:ident, 26) => { to_lower!($d, $src, 25); unsafe {*($d[25].as_mut_ptr()) = table[$src[25] as usize];} }; + ($d:ident, $src:ident, 27) => { to_lower!($d, $src, 26); unsafe {*($d[26].as_mut_ptr()) = table[$src[26] as usize];} }; + ($d:ident, $src:ident, 28) => { to_lower!($d, $src, 27); unsafe {*($d[27].as_mut_ptr()) = table[$src[27] as usize];} }; + ($d:ident, $src:ident, 29) => { to_lower!($d, $src, 28); unsafe {*($d[28].as_mut_ptr()) = table[$src[28] as usize];} }; + ($d:ident, $src:ident, 30) => { to_lower!($d, $src, 29); unsafe {*($d[29].as_mut_ptr()) = table[$src[29] as usize];} }; + ($d:ident, $src:ident, 31) => { to_lower!($d, $src, 30); unsafe {*($d[30].as_mut_ptr()) = table[$src[30] as usize];} }; + ($d:ident, $src:ident, 32) => { to_lower!($d, $src, 31); unsafe {*($d[31].as_mut_ptr()) = table[$src[31] as usize];} }; + ($d:ident, $src:ident, 33) => { to_lower!($d, $src, 32); unsafe {*($d[32].as_mut_ptr()) = table[$src[32] as usize];} }; + ($d:ident, $src:ident, 34) => { to_lower!($d, $src, 33); unsafe {*($d[33].as_mut_ptr()) = table[$src[33] as usize];} }; + ($d:ident, $src:ident, 35) => { to_lower!($d, $src, 34); unsafe {*($d[34].as_mut_ptr()) = table[$src[34] as usize];} }; } assert!(len < super::MAX_HEADER_NAME_LEN, @@ -1128,7 +1129,7 @@ fn parse_hdr<'a>( if eq!(b == b't' b'e') { Ok(Te.into()) } else { - validate(b, len) + validate(&b[..len]) } } 3 => { @@ -1141,7 +1142,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'd' b'n' b't') { Ok(Dnt.into()) } else { - validate(b, len) + validate(&b[..len]) } } 4 => { @@ -1160,7 +1161,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'v' b'a' b'r' b'y') { Ok(Vary.into()) } else { - validate(b, len) + validate(&b[..len]) } } 5 => { @@ -1171,7 +1172,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'r' b'a' b'n' b'g' b'e') { Ok(Range.into()) } else { - validate(b, len) + validate(&b[..len]) } } 6 => { @@ -1187,13 +1188,13 @@ fn parse_hdr<'a>( return Ok(Origin.into()); } else if eq!(b == b'p' b'r' b'a' b'g' b'm' b'a') { return Ok(Pragma.into()); - } else if b[0] == b's' { + } else if unsafe {*(b[0].as_ptr())} == b's' { if eq!(b[1] == b'e' b'r' b'v' b'e' b'r') { return Ok(Server.into()); } } - validate(b, len) + validate(&b[..len]) } 7 => { to_lower!(b, data, 7); @@ -1213,7 +1214,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'w' b'a' b'r' b'n' b'i' b'n' b'g') { Ok(Warning.into()) } else { - validate(b, len) + validate(&b[..len]) } } 8 => { @@ -1229,7 +1230,7 @@ fn parse_hdr<'a>( return Ok(Location.into()); } - validate(b, len) + validate(&b[..len]) } 9 => { to_lower!(b, data, 9); @@ -1237,7 +1238,7 @@ fn parse_hdr<'a>( if eq!(b == b'f' b'o' b'r' b'w' b'a' b'r' b'd' b'e' b'd') { Ok(Forwarded.into()) } else { - validate(b, len) + validate(&b[..len]) } } 10 => { @@ -1250,7 +1251,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'u' b's' b'e' b'r' b'-' b'a' b'g' b'e' b'n' b't') { Ok(UserAgent.into()) } else { - validate(b, len) + validate(&b[..len]) } } 11 => { @@ -1259,7 +1260,7 @@ fn parse_hdr<'a>( if eq!(b == b'r' b'e' b't' b'r' b'y' b'-' b'a' b'f' b't' b'e' b'r') { Ok(RetryAfter.into()) } else { - validate(b, len) + validate(&b[..len]) } } 12 => { @@ -1270,19 +1271,19 @@ fn parse_hdr<'a>( } else if eq!(b == b'm' b'a' b'x' b'-' b'f' b'o' b'r' b'w' b'a' b'r' b'd' b's') { Ok(MaxForwards.into()) } else { - validate(b, len) + validate(&b[..len]) } } 13 => { to_lower!(b, data, 13); - if b[0] == b'a' { + if unsafe {*(b[0].as_ptr())} == b'a' { if eq!(b[1] == b'c' b'c' b'e' b'p' b't' b'-' b'r' b'a' b'n' b'g' b'e' b's') { return Ok(AcceptRanges.into()); } else if eq!(b[1] == b'u' b't' b'h' b'o' b'r' b'i' b'z' b'a' b't' b'i' b'o' b'n') { return Ok(Authorization.into()); } - } else if b[0] == b'c' { + } else if unsafe {*(b[0].as_ptr())} == b'c' { if eq!(b[1] == b'a' b'c' b'h' b'e' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l') { return Ok(CacheControl.into()); } else if eq!(b[1] == b'o' b'n' b't' b'e' b'n' b't' b'-' b'r' b'a' b'n' b'g' b'e' ) @@ -1295,7 +1296,7 @@ fn parse_hdr<'a>( return Ok(LastModified.into()); } - validate(b, len) + validate(&b[..len]) } 14 => { to_lower!(b, data, 14); @@ -1306,7 +1307,7 @@ fn parse_hdr<'a>( { Ok(ContentLength.into()) } else { - validate(b, len) + validate(&b[..len]) } } 15 => { @@ -1327,7 +1328,7 @@ fn parse_hdr<'a>( return Ok(ReferrerPolicy.into()) } - validate(b, len) + validate(&b[..len]) } 16 => { to_lower!(b, data, 16); @@ -1346,7 +1347,7 @@ fn parse_hdr<'a>( return Ok(XXssProtection.into()) } - validate(b, len) + validate(&b[..len]) } 17 => { to_lower!(b, data, 17); @@ -1358,7 +1359,7 @@ fn parse_hdr<'a>( } else if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'k' b'e' b'y') { Ok(SecWebSocketKey.into()) } else { - validate(b, len) + validate(&b[..len]) } } 18 => { @@ -1367,7 +1368,7 @@ fn parse_hdr<'a>( if eq!(b == b'p' b'r' b'o' b'x' b'y' b'-' b'a' b'u' b't' b'h' b'e' b'n' b't' b'i' b'c' b'a' b't' b'e') { Ok(ProxyAuthenticate.into()) } else { - validate(b, len) + validate(&b[..len]) } } 19 => { @@ -1380,7 +1381,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'p' b'r' b'o' b'x' b'y' b'-' b'a' b'u' b't' b'h' b'o' b'r' b'i' b'z' b'a' b't' b'i' b'o' b'n') { Ok(ProxyAuthorization.into()) } else { - validate(b, len) + validate(&b[..len]) } } 20 => { @@ -1389,7 +1390,7 @@ fn parse_hdr<'a>( if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'a' b'c' b'c' b'e' b'p' b't') { Ok(SecWebSocketAccept.into()) } else { - validate(b, len) + validate(&b[..len]) } } 21 => { @@ -1398,7 +1399,7 @@ fn parse_hdr<'a>( if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'v' b'e' b'r' b's' b'i' b'o' b'n') { Ok(SecWebSocketVersion.into()) } else { - validate(b, len) + validate(&b[..len]) } } 22 => { @@ -1413,7 +1414,7 @@ fn parse_hdr<'a>( } else if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'p' b'r' b'o' b't' b'o' b'c' b'o' b'l') { Ok(SecWebSocketProtocol.into()) } else { - validate(b, len) + validate(&b[..len]) } } 23 => { @@ -1422,7 +1423,7 @@ fn parse_hdr<'a>( if eq!(b == b'c' b'o' b'n' b't' b'e' b'n' b't' b'-' b's' b'e' b'c' b'u' b'r' b'i' b't' b'y' b'-' b'p' b'o' b'l' b'i' b'c' b'y') { Ok(ContentSecurityPolicy.into()) } else { - validate(b, len) + validate(&b[..len]) } } 24 => { @@ -1431,7 +1432,7 @@ fn parse_hdr<'a>( if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'e' b'x' b't' b'e' b'n' b's' b'i' b'o' b'n' b's') { Ok(SecWebSocketExtensions.into()) } else { - validate(b, len) + validate(&b[..len]) } } 25 => { @@ -1442,7 +1443,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'u' b'p' b'g' b'r' b'a' b'd' b'e' b'-' b'i' b'n' b's' b'e' b'c' b'u' b'r' b'e' b'-' b'r' b'e' b'q' b'u' b'e' b's' b't' b's') { Ok(UpgradeInsecureRequests.into()) } else { - validate(b, len) + validate(&b[..len]) } } 27 => { @@ -1453,7 +1454,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'p' b'u' b'b' b'l' b'i' b'c' b'-' b'k' b'e' b'y' b'-' b'p' b'i' b'n' b's' b'-' b'r' b'e' b'p' b'o' b'r' b't' b'-' b'o' b'n' b'l' b'y') { Ok(PublicKeyPinsReportOnly.into()) } else { - validate(b, len) + validate(&b[..len]) } } 28 => { @@ -1467,7 +1468,7 @@ fn parse_hdr<'a>( } } - validate(b, len) + validate(&b[..len]) } 29 => { to_lower!(b, data, 29); @@ -1480,7 +1481,7 @@ fn parse_hdr<'a>( } } - validate(b, len) + validate(&b[..len]) } 30 => { to_lower!(b, data, 30); @@ -1488,7 +1489,7 @@ fn parse_hdr<'a>( if eq!(b == b'a' b'c' b'c' b'e' b's' b's' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l' b'-' b'r' b'e' b'q' b'u' b'e' b's' b't' b'-' b'h' b'e' b'a' b'd' b'e' b'r' b's') { Ok(AccessControlRequestHeaders.into()) } else { - validate(b, len) + validate(&b[..len]) } } 32 => { @@ -1497,7 +1498,7 @@ fn parse_hdr<'a>( if eq!(b == b'a' b'c' b'c' b'e' b's' b's' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l' b'-' b'a' b'l' b'l' b'o' b'w' b'-' b'c' b'r' b'e' b'd' b'e' b'n' b't' b'i' b'a' b'l' b's') { Ok(AccessControlAllowCredentials.into()) } else { - validate(b, len) + validate(&b[..len]) } } 35 => { @@ -1506,16 +1507,16 @@ fn parse_hdr<'a>( if eq!(b == b'c' b'o' b'n' b't' b'e' b'n' b't' b'-' b's' b'e' b'c' b'u' b'r' b'i' b't' b'y' b'-' b'p' b'o' b'l' b'i' b'c' b'y' b'-' b'r' b'e' b'p' b'o' b'r' b't' b'-' b'o' b'n' b'l' b'y') { Ok(ContentSecurityPolicyReportOnly.into()) } else { - validate(b, len) + validate(&b[..len]) } } _ => { if len < 64 { for i in 0..len { - b[i] = table[data[i] as usize]; + unsafe {*(b[i].as_mut_ptr()) = table[data[i] as usize]; } } - validate(b, len) + validate(&b[..len]) } else { Ok(HdrName::custom(data, false)) } @@ -1527,7 +1528,7 @@ fn parse_hdr<'a>( /// This version works best in debug mode in wasm fn parse_hdr<'a>( data: &'a [u8], - b: &'a mut [u8; 64], + b: &'a mut [MaybeUninit; SCRATCH_BUF_SIZE], table: &[u8; 256], ) -> Result, InvalidHeaderName> { use self::StandardHeader::*; @@ -1554,7 +1555,8 @@ fn parse_hdr<'a>( len if len > 64 => Ok(HdrName::custom(data, false)), len => { // Read from data into the buffer - transforming using `table` as we go - data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = table[*index as usize]); + data.iter().zip(b.iter_mut()).for_each(|(index, out)| unsafe {*(out.as_mut_ptr()) = table[*index as usize]}); + let b = unsafe {slice_assume_init(&b[..len])}; match &b[0..len] { b"te" => Ok(Te.into()), b"age" => Ok(Age.into()), @@ -1655,10 +1657,8 @@ impl HeaderName { /// Converts a slice of bytes to an HTTP header name. /// /// This function normalizes the input. - #[allow(deprecated)] pub fn from_bytes(src: &[u8]) -> Result { - #[allow(deprecated)] - let mut buf = unsafe { mem::uninitialized() }; + let mut buf = uninit_u8_array(); match parse_hdr(src, &mut buf, &HEADER_CHARS)?.inner { Repr::Standard(std) => Ok(std.into()), Repr::Custom(MaybeLower { buf, lower: true }) => { @@ -1705,10 +1705,8 @@ impl HeaderName { /// // Parsing a header that contains uppercase characters /// assert!(HeaderName::from_lowercase(b"Content-Length").is_err()); /// ``` - #[allow(deprecated)] pub fn from_lowercase(src: &[u8]) -> Result { - #[allow(deprecated)] - let mut buf = unsafe { mem::uninitialized() }; + let mut buf = uninit_u8_array(); match parse_hdr(src, &mut buf, &HEADER_CHARS_H2)?.inner { Repr::Standard(std) => Ok(std.into()), Repr::Custom(MaybeLower { buf, lower: true }) => { @@ -1765,11 +1763,9 @@ impl HeaderName { /// let a = HeaderName::from_static("foobar"); /// let b = HeaderName::from_static("FOOBAR"); // This line panics! /// ``` - #[allow(deprecated)] pub fn from_static(src: &'static str) -> HeaderName { let bytes = src.as_bytes(); - #[allow(deprecated)] - let mut buf = unsafe { mem::uninitialized() }; + let mut buf = uninit_u8_array(); match parse_hdr(bytes, &mut buf, &HEADER_CHARS_H2) { Ok(hdr_name) => match hdr_name.inner { Repr::Standard(std) => std.into(), @@ -2021,23 +2017,19 @@ impl<'a> HdrName<'a> { } } - #[allow(deprecated)] pub fn from_bytes(hdr: &[u8], f: F) -> Result where F: FnOnce(HdrName<'_>) -> U, { - #[allow(deprecated)] - let mut buf = unsafe { mem::uninitialized() }; + let mut buf = uninit_u8_array(); let hdr = parse_hdr(hdr, &mut buf, &HEADER_CHARS)?; Ok(f(hdr)) } - #[allow(deprecated)] pub fn from_static(hdr: &'static str, f: F) -> U where F: FnOnce(HdrName<'_>) -> U, { - #[allow(deprecated)] - let mut buf = unsafe { mem::uninitialized() }; + let mut buf = uninit_u8_array(); let hdr = parse_hdr(hdr.as_bytes(), &mut buf, &HEADER_CHARS).expect("static str is invalid name"); f(hdr) @@ -2137,6 +2129,16 @@ fn eq_ignore_ascii_case(lower: &[u8], s: &[u8]) -> bool { }) } +const SCRATCH_BUF_SIZE: usize = 64; + +fn uninit_u8_array() -> [MaybeUninit; SCRATCH_BUF_SIZE] { + unsafe { MaybeUninit::<[MaybeUninit; SCRATCH_BUF_SIZE]>::uninit().assume_init() } +} + +unsafe fn slice_assume_init(slice: &[MaybeUninit]) -> &[T] { + &*(slice as *const [MaybeUninit] as *const [T]) + } + #[cfg(test)] mod tests { use super::*; From 0a9f51b7c326d90743c8e5775c3596e11465546a Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sun, 10 May 2020 08:59:02 -0400 Subject: [PATCH 2/8] Refactor MaybeUninit support in mod header::name The general pattern follows the "Initilizing an array element-by-element" example in the documentation of MaybeUninit. This change removes some unnecessary use of unsafe. --- src/header/name.rs | 103 ++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 43 deletions(-) diff --git a/src/header/name.rs b/src/header/name.rs index 270c3bc5..0658e76e 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -1046,7 +1046,7 @@ macro_rules! eq { $($cmp) && * }; (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { - eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a,) $v[$n+1] == $($rest)*) + eq!(($($cmp,)* unsafe {assume_init_eq($v[$n], $a)} ,) $v[$n+1] == $($rest)*) }; ($v:ident == $($rest:tt)+) => { eq!(() $v[0] == $($rest)+) @@ -1080,41 +1080,41 @@ fn parse_hdr<'a>( macro_rules! to_lower { - ($d:ident, $src:ident, 1) => { unsafe {*($d[0].as_mut_ptr()) = table[$src[0] as usize];} }; - ($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); unsafe {*($d[1].as_mut_ptr()) = table[$src[1] as usize];} }; - ($d:ident, $src:ident, 3) => { to_lower!($d, $src, 2); unsafe {*($d[2].as_mut_ptr()) = table[$src[2] as usize];} }; - ($d:ident, $src:ident, 4) => { to_lower!($d, $src, 3); unsafe {*($d[3].as_mut_ptr()) = table[$src[3] as usize];} }; - ($d:ident, $src:ident, 5) => { to_lower!($d, $src, 4); unsafe {*($d[4].as_mut_ptr()) = table[$src[4] as usize];} }; - ($d:ident, $src:ident, 6) => { to_lower!($d, $src, 5); unsafe {*($d[5].as_mut_ptr()) = table[$src[5] as usize];} }; - ($d:ident, $src:ident, 7) => { to_lower!($d, $src, 6); unsafe {*($d[6].as_mut_ptr()) = table[$src[6] as usize];} }; - ($d:ident, $src:ident, 8) => { to_lower!($d, $src, 7); unsafe {*($d[7].as_mut_ptr()) = table[$src[7] as usize];} }; - ($d:ident, $src:ident, 9) => { to_lower!($d, $src, 8); unsafe {*($d[8].as_mut_ptr()) = table[$src[8] as usize];} }; - ($d:ident, $src:ident, 10) => { to_lower!($d, $src, 9); unsafe {*($d[9].as_mut_ptr()) = table[$src[9] as usize];} }; - ($d:ident, $src:ident, 11) => { to_lower!($d, $src, 10); unsafe {*($d[10].as_mut_ptr()) = table[$src[10] as usize];} }; - ($d:ident, $src:ident, 12) => { to_lower!($d, $src, 11); unsafe {*($d[11].as_mut_ptr()) = table[$src[11] as usize];} }; - ($d:ident, $src:ident, 13) => { to_lower!($d, $src, 12); unsafe {*($d[12].as_mut_ptr()) = table[$src[12] as usize];} }; - ($d:ident, $src:ident, 14) => { to_lower!($d, $src, 13); unsafe {*($d[13].as_mut_ptr()) = table[$src[13] as usize];} }; - ($d:ident, $src:ident, 15) => { to_lower!($d, $src, 14); unsafe {*($d[14].as_mut_ptr()) = table[$src[14] as usize];} }; - ($d:ident, $src:ident, 16) => { to_lower!($d, $src, 15); unsafe {*($d[15].as_mut_ptr()) = table[$src[15] as usize];} }; - ($d:ident, $src:ident, 17) => { to_lower!($d, $src, 16); unsafe {*($d[16].as_mut_ptr()) = table[$src[16] as usize];} }; - ($d:ident, $src:ident, 18) => { to_lower!($d, $src, 17); unsafe {*($d[17].as_mut_ptr()) = table[$src[17] as usize];} }; - ($d:ident, $src:ident, 19) => { to_lower!($d, $src, 18); unsafe {*($d[18].as_mut_ptr()) = table[$src[18] as usize];} }; - ($d:ident, $src:ident, 20) => { to_lower!($d, $src, 19); unsafe {*($d[19].as_mut_ptr()) = table[$src[19] as usize];} }; - ($d:ident, $src:ident, 21) => { to_lower!($d, $src, 20); unsafe {*($d[20].as_mut_ptr()) = table[$src[20] as usize];} }; - ($d:ident, $src:ident, 22) => { to_lower!($d, $src, 21); unsafe {*($d[21].as_mut_ptr()) = table[$src[21] as usize];} }; - ($d:ident, $src:ident, 23) => { to_lower!($d, $src, 22); unsafe {*($d[22].as_mut_ptr()) = table[$src[22] as usize];} }; - ($d:ident, $src:ident, 24) => { to_lower!($d, $src, 23); unsafe {*($d[23].as_mut_ptr()) = table[$src[23] as usize];} }; - ($d:ident, $src:ident, 25) => { to_lower!($d, $src, 24); unsafe {*($d[24].as_mut_ptr()) = table[$src[24] as usize];} }; - ($d:ident, $src:ident, 26) => { to_lower!($d, $src, 25); unsafe {*($d[25].as_mut_ptr()) = table[$src[25] as usize];} }; - ($d:ident, $src:ident, 27) => { to_lower!($d, $src, 26); unsafe {*($d[26].as_mut_ptr()) = table[$src[26] as usize];} }; - ($d:ident, $src:ident, 28) => { to_lower!($d, $src, 27); unsafe {*($d[27].as_mut_ptr()) = table[$src[27] as usize];} }; - ($d:ident, $src:ident, 29) => { to_lower!($d, $src, 28); unsafe {*($d[28].as_mut_ptr()) = table[$src[28] as usize];} }; - ($d:ident, $src:ident, 30) => { to_lower!($d, $src, 29); unsafe {*($d[29].as_mut_ptr()) = table[$src[29] as usize];} }; - ($d:ident, $src:ident, 31) => { to_lower!($d, $src, 30); unsafe {*($d[30].as_mut_ptr()) = table[$src[30] as usize];} }; - ($d:ident, $src:ident, 32) => { to_lower!($d, $src, 31); unsafe {*($d[31].as_mut_ptr()) = table[$src[31] as usize];} }; - ($d:ident, $src:ident, 33) => { to_lower!($d, $src, 32); unsafe {*($d[32].as_mut_ptr()) = table[$src[32] as usize];} }; - ($d:ident, $src:ident, 34) => { to_lower!($d, $src, 33); unsafe {*($d[33].as_mut_ptr()) = table[$src[33] as usize];} }; - ($d:ident, $src:ident, 35) => { to_lower!($d, $src, 34); unsafe {*($d[34].as_mut_ptr()) = table[$src[34] as usize];} }; + ($d:ident, $src:ident, 1) => { $d[0] = MaybeUninit::new(table[$src[0] as usize]); }; + ($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); $d[1] = MaybeUninit::new(table[$src[1] as usize]); }; + ($d:ident, $src:ident, 3) => { to_lower!($d, $src, 2); $d[2] = MaybeUninit::new(table[$src[2] as usize]); }; + ($d:ident, $src:ident, 4) => { to_lower!($d, $src, 3); $d[3] = MaybeUninit::new(table[$src[3] as usize]); }; + ($d:ident, $src:ident, 5) => { to_lower!($d, $src, 4); $d[4] = MaybeUninit::new(table[$src[4] as usize]); }; + ($d:ident, $src:ident, 6) => { to_lower!($d, $src, 5); $d[5] = MaybeUninit::new(table[$src[5] as usize]); }; + ($d:ident, $src:ident, 7) => { to_lower!($d, $src, 6); $d[6] = MaybeUninit::new(table[$src[6] as usize]); }; + ($d:ident, $src:ident, 8) => { to_lower!($d, $src, 7); $d[7] = MaybeUninit::new(table[$src[7] as usize]); }; + ($d:ident, $src:ident, 9) => { to_lower!($d, $src, 8); $d[8] = MaybeUninit::new(table[$src[8] as usize]); }; + ($d:ident, $src:ident, 10) => { to_lower!($d, $src, 9); $d[9] = MaybeUninit::new(table[$src[9] as usize]); }; + ($d:ident, $src:ident, 11) => { to_lower!($d, $src, 10); $d[10] = MaybeUninit::new(table[$src[10] as usize]); }; + ($d:ident, $src:ident, 12) => { to_lower!($d, $src, 11); $d[11] = MaybeUninit::new(table[$src[11] as usize]); }; + ($d:ident, $src:ident, 13) => { to_lower!($d, $src, 12); $d[12] = MaybeUninit::new(table[$src[12] as usize]); }; + ($d:ident, $src:ident, 14) => { to_lower!($d, $src, 13); $d[13] = MaybeUninit::new(table[$src[13] as usize]); }; + ($d:ident, $src:ident, 15) => { to_lower!($d, $src, 14); $d[14] = MaybeUninit::new(table[$src[14] as usize]); }; + ($d:ident, $src:ident, 16) => { to_lower!($d, $src, 15); $d[15] = MaybeUninit::new(table[$src[15] as usize]); }; + ($d:ident, $src:ident, 17) => { to_lower!($d, $src, 16); $d[16] = MaybeUninit::new(table[$src[16] as usize]); }; + ($d:ident, $src:ident, 18) => { to_lower!($d, $src, 17); $d[17] = MaybeUninit::new(table[$src[17] as usize]); }; + ($d:ident, $src:ident, 19) => { to_lower!($d, $src, 18); $d[18] = MaybeUninit::new(table[$src[18] as usize]); }; + ($d:ident, $src:ident, 20) => { to_lower!($d, $src, 19); $d[19] = MaybeUninit::new(table[$src[19] as usize]); }; + ($d:ident, $src:ident, 21) => { to_lower!($d, $src, 20); $d[20] = MaybeUninit::new(table[$src[20] as usize]); }; + ($d:ident, $src:ident, 22) => { to_lower!($d, $src, 21); $d[21] = MaybeUninit::new(table[$src[21] as usize]); }; + ($d:ident, $src:ident, 23) => { to_lower!($d, $src, 22); $d[22] = MaybeUninit::new(table[$src[22] as usize]); }; + ($d:ident, $src:ident, 24) => { to_lower!($d, $src, 23); $d[23] = MaybeUninit::new(table[$src[23] as usize]); }; + ($d:ident, $src:ident, 25) => { to_lower!($d, $src, 24); $d[24] = MaybeUninit::new(table[$src[24] as usize]); }; + ($d:ident, $src:ident, 26) => { to_lower!($d, $src, 25); $d[25] = MaybeUninit::new(table[$src[25] as usize]); }; + ($d:ident, $src:ident, 27) => { to_lower!($d, $src, 26); $d[26] = MaybeUninit::new(table[$src[26] as usize]); }; + ($d:ident, $src:ident, 28) => { to_lower!($d, $src, 27); $d[27] = MaybeUninit::new(table[$src[27] as usize]); }; + ($d:ident, $src:ident, 29) => { to_lower!($d, $src, 28); $d[28] = MaybeUninit::new(table[$src[28] as usize]); }; + ($d:ident, $src:ident, 30) => { to_lower!($d, $src, 29); $d[29] = MaybeUninit::new(table[$src[29] as usize]); }; + ($d:ident, $src:ident, 31) => { to_lower!($d, $src, 30); $d[30] = MaybeUninit::new(table[$src[30] as usize]); }; + ($d:ident, $src:ident, 32) => { to_lower!($d, $src, 31); $d[31] = MaybeUninit::new(table[$src[31] as usize]); }; + ($d:ident, $src:ident, 33) => { to_lower!($d, $src, 32); $d[32] = MaybeUninit::new(table[$src[32] as usize]); }; + ($d:ident, $src:ident, 34) => { to_lower!($d, $src, 33); $d[33] = MaybeUninit::new(table[$src[33] as usize]); }; + ($d:ident, $src:ident, 35) => { to_lower!($d, $src, 34); $d[34] = MaybeUninit::new(table[$src[34] as usize]); }; } assert!(len < super::MAX_HEADER_NAME_LEN, @@ -1188,7 +1188,7 @@ fn parse_hdr<'a>( return Ok(Origin.into()); } else if eq!(b == b'p' b'r' b'a' b'g' b'm' b'a') { return Ok(Pragma.into()); - } else if unsafe {*(b[0].as_ptr())} == b's' { + } else if unsafe {assume_init_eq(b[0], b's')} { if eq!(b[1] == b'e' b'r' b'v' b'e' b'r') { return Ok(Server.into()); } @@ -1277,13 +1277,13 @@ fn parse_hdr<'a>( 13 => { to_lower!(b, data, 13); - if unsafe {*(b[0].as_ptr())} == b'a' { + if unsafe {assume_init_eq(b[0], b'a')} { if eq!(b[1] == b'c' b'c' b'e' b'p' b't' b'-' b'r' b'a' b'n' b'g' b'e' b's') { return Ok(AcceptRanges.into()); } else if eq!(b[1] == b'u' b't' b'h' b'o' b'r' b'i' b'z' b'a' b't' b'i' b'o' b'n') { return Ok(Authorization.into()); } - } else if unsafe {*(b[0].as_ptr())} == b'c' { + } else if unsafe {assume_init_eq(b[0], b'c')} { if eq!(b[1] == b'a' b'c' b'h' b'e' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l') { return Ok(CacheControl.into()); } else if eq!(b[1] == b'o' b'n' b't' b'e' b'n' b't' b'-' b'r' b'a' b'n' b'g' b'e' ) @@ -1513,7 +1513,7 @@ fn parse_hdr<'a>( _ => { if len < 64 { for i in 0..len { - unsafe {*(b[i].as_mut_ptr()) = table[data[i] as usize]; } + b[i] = MaybeUninit::new(table[data[i] as usize]); } validate(&b[..len]) @@ -1555,7 +1555,7 @@ fn parse_hdr<'a>( len if len > 64 => Ok(HdrName::custom(data, false)), len => { // Read from data into the buffer - transforming using `table` as we go - data.iter().zip(b.iter_mut()).for_each(|(index, out)| unsafe {*(out.as_mut_ptr()) = table[*index as usize]}); + data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = MaybeUninit::new(table[*index as usize])); let b = unsafe {slice_assume_init(&b[..len])}; match &b[0..len] { b"te" => Ok(Te.into()), @@ -2129,15 +2129,32 @@ fn eq_ignore_ascii_case(lower: &[u8], s: &[u8]) -> bool { }) } +// Utility functions for MaybeUninit<>. These are drawn from unstable API's on +// MaybeUninit<> itself. const SCRATCH_BUF_SIZE: usize = 64; fn uninit_u8_array() -> [MaybeUninit; SCRATCH_BUF_SIZE] { - unsafe { MaybeUninit::<[MaybeUninit; SCRATCH_BUF_SIZE]>::uninit().assume_init() } + let arr = MaybeUninit::<[MaybeUninit; SCRATCH_BUF_SIZE]>::uninit(); + // Safety: assume_init() is claiming that an array of MaybeUninit<> + // has been initilized, but MaybeUninit<>'s do not require initilizaton. + unsafe { arr.assume_init() } } +// Assuming all the elements are initilized, get a slice of them. +// +// Safety: All elements of `slice` must be initilized to prevent +// undefined behavior. unsafe fn slice_assume_init(slice: &[MaybeUninit]) -> &[T] { &*(slice as *const [MaybeUninit] as *const [T]) - } +} + +// Compare `rhs` to `lhs` assuming the latter is initilized. +// +// Safety: `lhs` must be initilized to avoid undefined behavior. +#[cfg(any(not(debug_assertions), not(target_arch = "wasm32")))] +unsafe fn assume_init_eq(lhs: MaybeUninit, rhs: T) -> bool { + *(lhs.as_ptr()) == rhs +} #[cfg(test)] mod tests { From 9a61c45d170a2a111da2affff4980768fea41a45 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sun, 10 May 2020 18:37:03 -0400 Subject: [PATCH 3/8] Add criterion benchmark for header::HeaderName This introduces a dev-dependency on the criterion crate. The new benchmark for HeaderName uses different standard header names of increasing lengths. --- Cargo.toml | 6 +++++ benches/header_name2.rs | 52 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 benches/header_name2.rs diff --git a/Cargo.toml b/Cargo.toml index 0357da6f..c813f18f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ seahash = "3.0.5" serde = "1.0" serde_json = "1.0" doc-comment = "0.3" +criterion = "0.3.2" [[bench]] name = "header_map" @@ -43,6 +44,11 @@ path = "benches/header_map/mod.rs" name = "header_name" path = "benches/header_name.rs" +[[bench]] +name = "header_name2" +path = "benches/header_name2.rs" +harness = false + [[bench]] name = "header_value" path = "benches/header_value.rs" diff --git a/benches/header_name2.rs b/benches/header_name2.rs new file mode 100644 index 00000000..4562fd66 --- /dev/null +++ b/benches/header_name2.rs @@ -0,0 +1,52 @@ +use criterion::{criterion_group, criterion_main, BenchmarkId,Criterion, Throughput}; +use http::header::HeaderName; + +// This is a list of some of the standard headers ordered by increasing size. +// It has exactly one standard header per size (some sizes don't have a standard +// header). +const STANDARD_HEADERS_BY_SIZE: &[&str] = &[ + "te", + "age", + "date", + "allow", + "accept", + "alt-svc", + "if-match", + "forwarded", + "connection", + "retry-after", + "content-type", + "accept-ranges", + "accept-charset", + "accept-encoding", + "content-encoding", + "if-modified-since", + "proxy-authenticate", + "content-disposition", + "sec-websocket-accept", + "sec-websocket-version", + "access-control-max-age", + "content-security-policy", + "sec-websocket-extensions", + "strict-transport-security", + "access-control-allow-origin", + "access-control-allow-headers", + "access-control-expose-headers", + "access-control-request-headers", + "access-control-allow-credentials", + "content-security-policy-report-only", +]; + +fn header_name_by_size(c: &mut Criterion) { + let mut group = c.benchmark_group("std_hdr"); + for name in STANDARD_HEADERS_BY_SIZE { + group.throughput(Throughput::Bytes(name.len() as u64)); + group.bench_with_input(BenchmarkId::from_parameter(name), name, |b, name| { + b.iter(|| HeaderName::from_static(name) ); + }); + } + group.finish(); +} + +criterion_group!(benches, header_name_by_size); +criterion_main!(benches); From c154b5a7a96d197287622a30f948b2e870722f24 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Wed, 13 May 2020 20:18:28 -0400 Subject: [PATCH 4/8] Fix performance regression in header::name A previous commit had extracted the comparison of a presumed-initialized MaybeUninit and a static u8 to its own function. While clearer, this lead to a performance regression so this commit manually inlines this method again. This restores most (but not yet all) of the performance that regressed. --- src/header/name.rs | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/header/name.rs b/src/header/name.rs index 0658e76e..99d186eb 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -1046,7 +1046,7 @@ macro_rules! eq { $($cmp) && * }; (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { - eq!(($($cmp,)* unsafe {assume_init_eq($v[$n], $a)} ,) $v[$n+1] == $($rest)*) + eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == $($rest)*) }; ($v:ident == $($rest:tt)+) => { eq!(() $v[0] == $($rest)+) @@ -1188,7 +1188,7 @@ fn parse_hdr<'a>( return Ok(Origin.into()); } else if eq!(b == b'p' b'r' b'a' b'g' b'm' b'a') { return Ok(Pragma.into()); - } else if unsafe {assume_init_eq(b[0], b's')} { + } else if unsafe {*(b[0].as_ptr())} == b's' { if eq!(b[1] == b'e' b'r' b'v' b'e' b'r') { return Ok(Server.into()); } @@ -1277,13 +1277,13 @@ fn parse_hdr<'a>( 13 => { to_lower!(b, data, 13); - if unsafe {assume_init_eq(b[0], b'a')} { + if unsafe {*(b[0].as_ptr())} == b'a' { if eq!(b[1] == b'c' b'c' b'e' b'p' b't' b'-' b'r' b'a' b'n' b'g' b'e' b's') { return Ok(AcceptRanges.into()); } else if eq!(b[1] == b'u' b't' b'h' b'o' b'r' b'i' b'z' b'a' b't' b'i' b'o' b'n') { return Ok(Authorization.into()); } - } else if unsafe {assume_init_eq(b[0], b'c')} { + } else if unsafe {*(b[0].as_ptr())} == b'c' { if eq!(b[1] == b'a' b'c' b'h' b'e' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l') { return Ok(CacheControl.into()); } else if eq!(b[1] == b'o' b'n' b't' b'e' b'n' b't' b'-' b'r' b'a' b'n' b'g' b'e' ) @@ -2148,14 +2148,6 @@ unsafe fn slice_assume_init(slice: &[MaybeUninit]) -> &[T] { &*(slice as *const [MaybeUninit] as *const [T]) } -// Compare `rhs` to `lhs` assuming the latter is initilized. -// -// Safety: `lhs` must be initilized to avoid undefined behavior. -#[cfg(any(not(debug_assertions), not(target_arch = "wasm32")))] -unsafe fn assume_init_eq(lhs: MaybeUninit, rhs: T) -> bool { - *(lhs.as_ptr()) == rhs -} - #[cfg(test)] mod tests { use super::*; From 46c9e77396b8314c2b857a0d8115178864425705 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Fri, 29 May 2020 20:57:17 -0400 Subject: [PATCH 5/8] Rearrange code in header::name mod Move the eq! macro into the parsh_hdr() function which locallized all of the unsafe code related to MaybeUninit. --- src/header/name.rs | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/header/name.rs b/src/header/name.rs index 99d186eb..53e39891 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -1040,22 +1040,6 @@ const HEADER_CHARS_H2: [u8; 256] = [ 0, 0, 0, 0, 0, 0 // 25x ]; -#[cfg(any(not(debug_assertions), not(target_arch = "wasm32")))] -macro_rules! eq { - (($($cmp:expr,)*) $v:ident[$n:expr] ==) => { - $($cmp) && * - }; - (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { - eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == $($rest)*) - }; - ($v:ident == $($rest:tt)+) => { - eq!(() $v[0] == $($rest)+) - }; - ($v:ident[$n:expr] == $($rest:tt)+) => { - eq!(() $v[$n] == $($rest)+) - }; -} - #[cfg(any(not(debug_assertions), not(target_arch = "wasm32")))] /// This version is best under optimized mode, however in a wasm debug compile, /// the `eq` macro expands to 1 + 1 + 1 + 1... and wasm explodes when this chain gets too long @@ -1078,6 +1062,21 @@ fn parse_hdr<'a>( } }; + macro_rules! eq { + (($($cmp:expr,)*) $v:ident[$n:expr] ==) => { + $($cmp) && * + }; + (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { + eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == $($rest)*) + }; + ($v:ident == $($rest:tt)+) => { + eq!(() $v[0] == $($rest)+) + }; + ($v:ident[$n:expr] == $($rest:tt)+) => { + eq!(() $v[$n] == $($rest)+) + }; + } + macro_rules! to_lower { ($d:ident, $src:ident, 1) => { $d[0] = MaybeUninit::new(table[$src[0] as usize]); }; From 4aa98febea18354f188249934ec6c917084450bc Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 30 May 2020 14:34:16 -0400 Subject: [PATCH 6/8] Add safety comments to header::name module The comments describe the pre-condition and post-conditions on the various different parts of the two parse_hdr() implementations that combine to make the use of MaybeUninit in that function sound. The process of assessing the soundness of the use of MaybeUninit also included manually checking that the number of parameters to each all eq!() invocation matches the number of bytes initilized by the immediatly preceeding invocation of to_lower!() (which is necessary to avoid undefined behavior). To avoid being overly repetative the general pattern that assures soundness is documented in the comments but not each instance of that pattern. Each instance, though, was checked. --- src/header/name.rs | 68 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/src/header/name.rs b/src/header/name.rs index 53e39891..c02a259f 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -1053,7 +1053,9 @@ fn parse_hdr<'a>( let len = data.len(); + // Precondition: each element of buf must be intitialized let validate = |buf: &'a [MaybeUninit]| { + // Safety: follows from the precondtion let buf = unsafe {slice_assume_init(buf)}; if buf.iter().any(|&b| b == 0) { Err(InvalidHeaderName::new()) @@ -1062,11 +1064,18 @@ fn parse_hdr<'a>( } }; + // Called as either eq!(b == b'a' b'b' b'c') or eq!(b[i] == b'a' b'b' b'c') + // Precondition: the first n elements of b (or the first n starting at i) + // must be intitialized, where n is the number of bytes listed after the '==' + // in the invocation. macro_rules! eq { (($($cmp:expr,)*) $v:ident[$n:expr] ==) => { $($cmp) && * }; (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { + // Safety: this arm is matched once for each byte after the '==' in + // the invocation (starting at 0 or i depending on the form of the call). + // By the precondtion $v[$n] is intitialized for each such match. eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == $($rest)*) }; ($v:ident == $($rest:tt)+) => { @@ -1078,6 +1087,10 @@ fn parse_hdr<'a>( } + // Post-condition: the first n elements of $d are intitialized where n is the + // third paramter to the macro. Note that this macro overwrite the first n elements + // of $d without dropping the existing contents (if any) but the elements of $d + // are u8's so no drop is necessary. macro_rules! to_lower { ($d:ident, $src:ident, 1) => { $d[0] = MaybeUninit::new(table[$src[0] as usize]); }; ($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); $d[1] = MaybeUninit::new(table[$src[1] as usize]); }; @@ -1120,14 +1133,30 @@ fn parse_hdr<'a>( "header name too long -- max length is {}", super::MAX_HEADER_NAME_LEN); + // Most of the arms of the match below have a variation of the following pattern: + // to_lower!(b, data, n); + // if eq!(b == b'1' b'2' ... b'n') { + // Ok(StandardHeaderElement.into()) + // } else { + // validate(&b[..n]) + // } + // The soundness of the arms following this pattern is described once in the + // match arm for 2. The soundness of exception to this pattern are described in + // each such match arm. match len { 0 => Err(InvalidHeaderName::new()), 2 => { to_lower!(b, data, 2); + // Precondition: the post-condition on to_lower!() ensures the first 2 + // elements of b are intitialized and the eq!() call lists 2 bytes + // after the ==. if eq!(b == b't' b'e') { Ok(Te.into()) } else { + // Precondition: the post-condition on to_lower!() ensures that the + // first 2 elements of b are intitialized. len == 2 so all of + // b[..len] is intitialized. validate(&b[..len]) } } @@ -1175,6 +1204,7 @@ fn parse_hdr<'a>( } } 6 => { + // this arm mostly follows the pattern except as indicated to_lower!(b, data, 6); if eq!(b == b'a' b'c' b'c' b'e' b'p' b't') { @@ -1187,7 +1217,12 @@ fn parse_hdr<'a>( return Ok(Origin.into()); } else if eq!(b == b'p' b'r' b'a' b'g' b'm' b'a') { return Ok(Pragma.into()); + // Safety: the post-condtion on to_lower!() means the first 6 + // elements of b are intitialized so, in particular, b[0] is. } else if unsafe {*(b[0].as_ptr())} == b's' { + // Precondition: the post-condtion on to_lower!() means the + // first 6 elements of b (and hence the first 5 elements starting + // at b[1]) are intitialized. if eq!(b[1] == b'e' b'r' b'v' b'e' b'r') { return Ok(Server.into()); } @@ -1219,7 +1254,11 @@ fn parse_hdr<'a>( 8 => { to_lower!(b, data, 8); + // Precondition: the post-condition on to_lower!() means the first + // 8 elements of b are intitialized so, in particular, the first 3 are. if eq!(b == b'i' b'f' b'-') { + // Precondition: (here and next eq!()) the first 5 elements of b + // starting at b[3] are intitialized because the first 8 are. if eq!(b[3] == b'm' b'a' b't' b'c' b'h') { return Ok(IfMatch.into()); } else if eq!(b[3] == b'r' b'a' b'n' b'g' b'e') { @@ -1276,7 +1315,12 @@ fn parse_hdr<'a>( 13 => { to_lower!(b, data, 13); + // Safety: (here and next else if) The post-condition on to_lower!() + // means the first 13 bytes of b are intitialized so b[0] is. if unsafe {*(b[0].as_ptr())} == b'a' { + // Precondition: (here and next calls of eq!() with b[1]) the + // first 13 bytes of b are intitialized so the first 12 starting + // at b[1] are. if eq!(b[1] == b'c' b'c' b'e' b'p' b't' b'-' b'r' b'a' b'n' b'g' b'e' b's') { return Ok(AcceptRanges.into()); } else if eq!(b[1] == b'u' b't' b'h' b'o' b'r' b'i' b'z' b'a' b't' b'i' b'o' b'n') { @@ -1312,7 +1356,11 @@ fn parse_hdr<'a>( 15 => { to_lower!(b, data, 15); + // Precondition: The post-condition on to_lower!() ensures the first 15 + // bytes of b are intitialized so, in particular the first 7 are. if eq!(b == b'a' b'c' b'c' b'e' b'p' b't' b'-') { // accept- + // Precondition: The first 15 bytes of 5 are intitialized so the + // first 8 starting at b[7] are. if eq!(b[7] == b'e' b'n' b'c' b'o' b'd' b'i' b'n' b'g') { return Ok(AcceptEncoding.into()) } else if eq!(b[7] == b'l' b'a' b'n' b'g' b'u' b'a' b'g' b'e') { @@ -1332,7 +1380,12 @@ fn parse_hdr<'a>( 16 => { to_lower!(b, data, 16); + // Precondition: The post-condition on to_lower!() means that the first + // 16 bytes of b are intitialized so, in particular, the first 8 bytes + // are. if eq!(b == b'c' b'o' b'n' b't' b'e' b'n' b't' b'-') { + // Precondition: The first 16 bytes of b are intitialized so the + // first 8 bytes starting at b[8] are. if eq!(b[8] == b'l' b'a' b'n' b'g' b'u' b'a' b'g' b'e') { return Ok(ContentLanguage.into()) } else if eq!(b[8] == b'l' b'o' b'c' b'a' b't' b'i' b'o' b'n') { @@ -1459,7 +1512,11 @@ fn parse_hdr<'a>( 28 => { to_lower!(b, data, 28); + // Precondition: The post-condition of to_lower!() ensures that the first 28 bytes of b + // are intitialized so, in particular, the first 21 are. if eq!(b == b'a' b'c' b'c' b'e' b's' b's' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l' b'-' b'a' b'l' b'l' b'o' b'w' b'-') { + // Precondition: The first 28 bytes of b are intitialized so the first 7 bytes + // starting at b[21] are. if eq!(b[21] == b'h' b'e' b'a' b'd' b'e' b'r' b's') { return Ok(AccessControlAllowHeaders.into()) } else if eq!(b[21] == b'm' b'e' b't' b'h' b'o' b'd' b's') { @@ -1472,7 +1529,11 @@ fn parse_hdr<'a>( 29 => { to_lower!(b, data, 29); + // Precondition: The post-condition of to_lower!() ensures the fist 29 bytes of b are + // intitialized so, in particular, the first 15 bytes are. if eq!(b == b'a' b'c' b'c' b'e' b's' b's' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l' b'-') { + // Precondition: The fisr 29 bytes of b are intitialized so the first 14 bytes + // starting at b[15] are. if eq!(b[15] == b'e' b'x' b'p' b'o' b's' b'e' b'-' b'h' b'e' b'a' b'd' b'e' b'r' b's') { return Ok(AccessControlExposeHeaders.into()) } else if eq!(b[15] == b'r' b'e' b'q' b'u' b'e' b's' b't' b'-' b'm' b'e' b't' b'h' b'o' b'd') { @@ -1515,6 +1576,8 @@ fn parse_hdr<'a>( b[i] = MaybeUninit::new(table[data[i] as usize]); } + // Precondition: the first len bytes of b are intitialized in the loop above so + // b[..len] is intitialized. validate(&b[..len]) } else { Ok(HdrName::custom(data, false)) @@ -1553,8 +1616,11 @@ fn parse_hdr<'a>( 0 => Err(InvalidHeaderName::new()), len if len > 64 => Ok(HdrName::custom(data, false)), len => { - // Read from data into the buffer - transforming using `table` as we go + // Read from data into the buffer - transforming using `table` as we go. + // The assignment to *out ensures that each byte is intitialized. Since + // *out is a u8 it doesn't matter that we are not dropping *out before accessing it. data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = MaybeUninit::new(table[*index as usize])); + // Safety: We just intitialized the first len bytes of b in the previous line. let b = unsafe {slice_assume_init(&b[..len])}; match &b[0..len] { b"te" => Ok(Te.into()), From a3a7800c30521d07146329e37450dc767720a4e7 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 30 May 2020 14:44:32 -0400 Subject: [PATCH 7/8] Fix spelling error in one comment --- src/header/name.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/header/name.rs b/src/header/name.rs index c02a259f..533e389f 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -1359,7 +1359,7 @@ fn parse_hdr<'a>( // Precondition: The post-condition on to_lower!() ensures the first 15 // bytes of b are intitialized so, in particular the first 7 are. if eq!(b == b'a' b'c' b'c' b'e' b'p' b't' b'-') { // accept- - // Precondition: The first 15 bytes of 5 are intitialized so the + // Precondition: The first 15 bytes of b are intitialized so the // first 8 starting at b[7] are. if eq!(b[7] == b'e' b'n' b'c' b'o' b'd' b'i' b'n' b'g') { return Ok(AcceptEncoding.into()) From ccf88892adf634d16fda239e6086e666f0d96b13 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 30 May 2020 17:15:03 -0400 Subject: [PATCH 8/8] Add safety comments to header::name module The comments document the invariant, preconditions, and post-conditions that together ensure that the use of unsafe related to UTF-8 assumptions (in calls to ByteStr::from_utf8_unchecked()) are sound. --- src/header/name.rs | 83 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 19 deletions(-) diff --git a/src/header/name.rs b/src/header/name.rs index 533e389f..2e7cc232 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -51,6 +51,7 @@ enum Repr { struct Custom(ByteStr); #[derive(Debug, Clone)] +// Invariant: If lower then buf is valid UTF-8. struct MaybeLower<'a> { buf: &'a [u8], lower: bool, @@ -979,6 +980,8 @@ standard_headers! { /// / DIGIT / ALPHA /// ; any VCHAR, except delimiters /// ``` +// HEADER_CHARS maps every byte that is 128 or larger to 0 so everything that is +// mapped by HEADER_CHARS, maps to a valid single-byte UTF-8 codepoint. const HEADER_CHARS: [u8; 256] = [ // 0 1 2 3 4 5 6 7 8 9 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x @@ -1010,6 +1013,8 @@ const HEADER_CHARS: [u8; 256] = [ ]; /// Valid header name characters for HTTP/2.0 and HTTP/3.0 +// HEADER_CHARS_H2 maps every byte that is 128 or larger to 0 so everything that is +// mapped by HEADER_CHARS_H2, maps to a valid single-byte UTF-8 codepoint. const HEADER_CHARS_H2: [u8; 256] = [ // 0 1 2 3 4 5 6 7 8 9 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x @@ -1044,6 +1049,7 @@ const HEADER_CHARS_H2: [u8; 256] = [ /// This version is best under optimized mode, however in a wasm debug compile, /// the `eq` macro expands to 1 + 1 + 1 + 1... and wasm explodes when this chain gets too long /// See https://github.com/DenisKolodin/yew/issues/478 +// Precondition: table maps all bytes that are not valid single-byte UTF-8 to something that is. fn parse_hdr<'a>( data: &'a [u8], b: &'a mut [MaybeUninit; SCRATCH_BUF_SIZE], @@ -1053,30 +1059,33 @@ fn parse_hdr<'a>( let len = data.len(); - // Precondition: each element of buf must be intitialized + // Precondition: each element of buf must be intitialized and must be + // a valid single-byte UTF-8 codepoint. let validate = |buf: &'a [MaybeUninit]| { // Safety: follows from the precondtion let buf = unsafe {slice_assume_init(buf)}; if buf.iter().any(|&b| b == 0) { Err(InvalidHeaderName::new()) } else { + // Precondition: satified by the precondition of validate. Ok(HdrName::custom(buf, true)) } }; // Called as either eq!(b == b'a' b'b' b'c') or eq!(b[i] == b'a' b'b' b'c') - // Precondition: the first n elements of b (or the first n starting at i) - // must be intitialized, where n is the number of bytes listed after the '==' - // in the invocation. + // Precondition: the first n elements of b (or the first n starting at i) must be + // intitialized, where n is the number of bytes listed after the '==' in the + // invocation. macro_rules! eq { (($($cmp:expr,)*) $v:ident[$n:expr] ==) => { $($cmp) && * }; (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { - // Safety: this arm is matched once for each byte after the '==' in - // the invocation (starting at 0 or i depending on the form of the call). - // By the precondtion $v[$n] is intitialized for each such match. - eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == $($rest)*) + // Safety: this arm is matched once for each byte after the '==' in the + // invocation (starting at 0 or i depending on the form of the call). By + // the precondtion $v[$n] is intitialized for each such match. + eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == + $($rest)*) }; ($v:ident == $($rest:tt)+) => { eq!(() $v[0] == $($rest)+) @@ -1086,11 +1095,12 @@ fn parse_hdr<'a>( }; } - - // Post-condition: the first n elements of $d are intitialized where n is the - // third paramter to the macro. Note that this macro overwrite the first n elements - // of $d without dropping the existing contents (if any) but the elements of $d - // are u8's so no drop is necessary. + // Post-condition: the first n elements of $d are intitialized to a valid + // single-byte UTF-8 codepoint where n is the third paramter to the macro. Note + // that this macro overwrite the first n elements of $d without dropping the + // existing contents (if any) but the elements of $d are u8's so no drop is + // necessary. The UTF-8 part of the post-condition follows from the precondition + // on table that is a part of parse_hdr(). macro_rules! to_lower { ($d:ident, $src:ident, 1) => { $d[0] = MaybeUninit::new(table[$src[0] as usize]); }; ($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); $d[1] = MaybeUninit::new(table[$src[1] as usize]); }; @@ -1155,8 +1165,9 @@ fn parse_hdr<'a>( Ok(Te.into()) } else { // Precondition: the post-condition on to_lower!() ensures that the - // first 2 elements of b are intitialized. len == 2 so all of - // b[..len] is intitialized. + // first 2 elements of b are intitialized and are valid single-byte + // UTF-8. len == 2 so all of b[..len] is intitialized and is valid + // UTF-8. validate(&b[..len]) } } @@ -1573,11 +1584,13 @@ fn parse_hdr<'a>( _ => { if len < 64 { for i in 0..len { + // The precondition on table for parse_hdr() means that b[i] is + // intitialized to a valid single-byte UTF-8 codepoint. b[i] = MaybeUninit::new(table[data[i] as usize]); } // Precondition: the first len bytes of b are intitialized in the loop above so - // b[..len] is intitialized. + // b[..len] is intitialized and is valid UTF-8. validate(&b[..len]) } else { Ok(HdrName::custom(data, false)) @@ -1588,6 +1601,7 @@ fn parse_hdr<'a>( #[cfg(all(debug_assertions, target_arch = "wasm32"))] /// This version works best in debug mode in wasm +// Precondition: table maps all bytes that are not valid single-byte UTF-8 to something that is. fn parse_hdr<'a>( data: &'a [u8], b: &'a mut [MaybeUninit; SCRATCH_BUF_SIZE], @@ -1597,11 +1611,13 @@ fn parse_hdr<'a>( let len = data.len(); + // Precondition: the first len bytes of buf are valid UTF-8. let validate = |buf: &'a [u8], len: usize| { let buf = &buf[..len]; if buf.iter().any(|&b| b == 0) { Err(InvalidHeaderName::new()) } else { + // Precondition: follows from the precondtion on validate. Ok(HdrName::custom(buf, true)) } }; @@ -1617,9 +1633,13 @@ fn parse_hdr<'a>( len if len > 64 => Ok(HdrName::custom(data, false)), len => { // Read from data into the buffer - transforming using `table` as we go. - // The assignment to *out ensures that each byte is intitialized. Since - // *out is a u8 it doesn't matter that we are not dropping *out before accessing it. - data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = MaybeUninit::new(table[*index as usize])); + // The assignment to *out ensures that each byte is intitialized. Since + // *out is a u8 it doesn't matter that we are not dropping *out before + // accessing it. The precondition on table for parse_hdr() means that + // each intitialized byte of b is valid UTF-8. + data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = + MaybeUninit::new(table[*index as + usize])); // Safety: We just intitialized the first len bytes of b in the previous line. let b = unsafe {slice_assume_init(&b[..len])}; match &b[0..len] { @@ -1704,6 +1724,8 @@ fn parse_hdr<'a>( b"content-security-policy-report-only" => { Ok(ContentSecurityPolicyReportOnly.into()) } + // Precondition: other is the first len bytes of b which was + // initialized to valid UTF-8 above. other => validate(other, len), } } @@ -1724,10 +1746,12 @@ impl HeaderName { /// This function normalizes the input. pub fn from_bytes(src: &[u8]) -> Result { let mut buf = uninit_u8_array(); + // Precondition: HEADER_CHARS is a valid table for parse_hdr(). match parse_hdr(src, &mut buf, &HEADER_CHARS)?.inner { Repr::Standard(std) => Ok(std.into()), Repr::Custom(MaybeLower { buf, lower: true }) => { let buf = Bytes::copy_from_slice(buf); + // Safety: the invariant on MaybeLower ensures buf is valid UTF-8. let val = unsafe { ByteStr::from_utf8_unchecked(buf) }; Ok(Custom(val).into()) } @@ -1736,6 +1760,7 @@ impl HeaderName { let mut dst = BytesMut::with_capacity(buf.len()); for b in buf.iter() { + // HEADER_CHARS maps all bytes to valid single-byte UTF-8 let b = HEADER_CHARS[*b as usize]; if b == 0 { @@ -1745,6 +1770,9 @@ impl HeaderName { dst.put_u8(b); } + // Safety: the loop above maps all bytes in buf to valid single byte + // UTF-8 before copying them into dst. This means that dst (and hence + // dst.freeze()) is valid UTF-8. let val = unsafe { ByteStr::from_utf8_unchecked(dst.freeze()) }; Ok(Custom(val).into()) @@ -1772,21 +1800,27 @@ impl HeaderName { /// ``` pub fn from_lowercase(src: &[u8]) -> Result { let mut buf = uninit_u8_array(); + // Precondition: HEADER_CHARS_H2 is a valid table for parse_hdr() match parse_hdr(src, &mut buf, &HEADER_CHARS_H2)?.inner { Repr::Standard(std) => Ok(std.into()), Repr::Custom(MaybeLower { buf, lower: true }) => { let buf = Bytes::copy_from_slice(buf); + // Safety: the invariant on MaybeLower ensures buf is valid UTF-8. let val = unsafe { ByteStr::from_utf8_unchecked(buf) }; Ok(Custom(val).into()) } Repr::Custom(MaybeLower { buf, lower: false }) => { for &b in buf.iter() { + // HEADER_CHARS maps all bytes that are not valid single-byte + // UTF-8 to 0 so this check returns an error for invalid UTF-8. if b != HEADER_CHARS[b as usize] { return Err(InvalidHeaderName::new()); } } let buf = Bytes::copy_from_slice(buf); + // Safety: the loop above checks that each byte of buf (either + // version) is valid UTF-8. let val = unsafe { ByteStr::from_utf8_unchecked(buf) }; Ok(Custom(val).into()) } @@ -1831,6 +1865,7 @@ impl HeaderName { pub fn from_static(src: &'static str) -> HeaderName { let bytes = src.as_bytes(); let mut buf = uninit_u8_array(); + // Precondition: HEADER_CHARS_H2 is a valid table for parse_hdr() match parse_hdr(bytes, &mut buf, &HEADER_CHARS_H2) { Ok(hdr_name) => match hdr_name.inner { Repr::Standard(std) => std.into(), @@ -2073,8 +2108,10 @@ impl Error for InvalidHeaderName {} // ===== HdrName ===== impl<'a> HdrName<'a> { + // Precondition: if lower then buf is valid UTF-8 fn custom(buf: &'a [u8], lower: bool) -> HdrName<'a> { HdrName { + // Invariant (on MaybeLower): follows from the precondition inner: Repr::Custom(MaybeLower { buf: buf, lower: lower, @@ -2086,6 +2123,7 @@ impl<'a> HdrName<'a> { where F: FnOnce(HdrName<'_>) -> U, { let mut buf = uninit_u8_array(); + // Precondition: HEADER_CHARS is a valid table for parse_hdr(). let hdr = parse_hdr(hdr, &mut buf, &HEADER_CHARS)?; Ok(f(hdr)) } @@ -2096,6 +2134,7 @@ impl<'a> HdrName<'a> { { let mut buf = uninit_u8_array(); let hdr = + // Precondition: HEADER_CHARS is a valid table for parse_hdr(). parse_hdr(hdr.as_bytes(), &mut buf, &HEADER_CHARS).expect("static str is invalid name"); f(hdr) } @@ -2111,6 +2150,7 @@ impl<'a> From> for HeaderName { Repr::Custom(maybe_lower) => { if maybe_lower.lower { let buf = Bytes::copy_from_slice(&maybe_lower.buf[..]); + // Safety: the invariant on MaybeLower ensures buf is valid UTF-8. let byte_str = unsafe { ByteStr::from_utf8_unchecked(buf) }; HeaderName { @@ -2121,9 +2161,14 @@ impl<'a> From> for HeaderName { let mut dst = BytesMut::with_capacity(maybe_lower.buf.len()); for b in maybe_lower.buf.iter() { + // HEADER_CHARS maps each byte to a valid single-byte UTF-8 + // codepoint. dst.put_u8(HEADER_CHARS[*b as usize]); } + // Safety: the loop above maps each byte of maybe_lower.buf to a + // valid single-byte UTF-8 codepoint before copying it into dst. + // dst (and hence dst.freeze()) is thus valid UTF-8. let buf = unsafe { ByteStr::from_utf8_unchecked(dst.freeze()) }; HeaderName {