From e6a097914acc84939080f7db23d5cf5b58cec907 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Sat, 9 Mar 2024 20:46:44 +0100 Subject: [PATCH] perf: don't alloc zeroed in encode --- README.md | 148 ++++++++++++++++++++++++++--------------------------- src/lib.rs | 25 ++++++--- 2 files changed, 91 insertions(+), 82 deletions(-) diff --git a/README.md b/README.md index 9b49a05..2190525 100644 --- a/README.md +++ b/README.md @@ -33,94 +33,94 @@ You can run these benchmarks with `cargo bench --features std` on a nightly compiler. ```log -test decode::const_hex::bench1_32b ... bench: 14 ns/iter (+/- 0) -test decode::const_hex::bench2_256b ... bench: 37 ns/iter (+/- 4) -test decode::const_hex::bench3_2k ... bench: 226 ns/iter (+/- 7) -test decode::const_hex::bench4_16k ... bench: 1,636 ns/iter (+/- 13) -test decode::const_hex::bench5_128k ... bench: 12,644 ns/iter (+/- 84) -test decode::const_hex::bench6_1m ... bench: 102,836 ns/iter (+/- 3,236) +test decode::const_hex::bench1_32b ... bench: 16 ns/iter (+/- 5) +test decode::const_hex::bench2_256b ... bench: 37 ns/iter (+/- 0) +test decode::const_hex::bench3_2k ... bench: 232 ns/iter (+/- 2) +test decode::const_hex::bench4_16k ... bench: 1,672 ns/iter (+/- 12) +test decode::const_hex::bench5_128k ... bench: 12,979 ns/iter (+/- 91) +test decode::const_hex::bench6_1m ... bench: 104,751 ns/iter (+/- 2,068) test decode::faster_hex::bench1_32b ... bench: 15 ns/iter (+/- 0) -test decode::faster_hex::bench2_256b ... bench: 50 ns/iter (+/- 1) -test decode::faster_hex::bench3_2k ... bench: 244 ns/iter (+/- 4) -test decode::faster_hex::bench4_16k ... bench: 1,782 ns/iter (+/- 31) -test decode::faster_hex::bench5_128k ... bench: 13,745 ns/iter (+/- 66) -test decode::faster_hex::bench6_1m ... bench: 115,126 ns/iter (+/- 1,544) -test decode::hex::bench1_32b ... bench: 101 ns/iter (+/- 6) -test decode::hex::bench2_256b ... bench: 701 ns/iter (+/- 12) -test decode::hex::bench3_2k ... bench: 5,287 ns/iter (+/- 97) -test decode::hex::bench4_16k ... bench: 41,743 ns/iter (+/- 1,420) -test decode::hex::bench5_128k ... bench: 782,327 ns/iter (+/- 18,876) -test decode::hex::bench6_1m ... bench: 6,283,181 ns/iter (+/- 88,813) +test decode::faster_hex::bench2_256b ... bench: 54 ns/iter (+/- 1) +test decode::faster_hex::bench3_2k ... bench: 253 ns/iter (+/- 3) +test decode::faster_hex::bench4_16k ... bench: 1,831 ns/iter (+/- 20) +test decode::faster_hex::bench5_128k ... bench: 14,120 ns/iter (+/- 57) +test decode::faster_hex::bench6_1m ... bench: 115,291 ns/iter (+/- 1,325) +test decode::hex::bench1_32b ... bench: 104 ns/iter (+/- 1) +test decode::hex::bench2_256b ... bench: 697 ns/iter (+/- 7) +test decode::hex::bench3_2k ... bench: 5,189 ns/iter (+/- 86) +test decode::hex::bench4_16k ... bench: 42,355 ns/iter (+/- 21,853) +test decode::hex::bench5_128k ... bench: 765,278 ns/iter (+/- 4,091) +test decode::hex::bench6_1m ... bench: 6,161,416 ns/iter (+/- 64,954) test decode_to_slice::const_hex::bench1_32b ... bench: 5 ns/iter (+/- 0) -test decode_to_slice::const_hex::bench2_256b ... bench: 25 ns/iter (+/- 0) -test decode_to_slice::const_hex::bench3_2k ... bench: 201 ns/iter (+/- 3) -test decode_to_slice::const_hex::bench4_16k ... bench: 1,600 ns/iter (+/- 17) -test decode_to_slice::const_hex::bench5_128k ... bench: 12,732 ns/iter (+/- 119) -test decode_to_slice::const_hex::bench6_1m ... bench: 103,414 ns/iter (+/- 2,402) +test decode_to_slice::const_hex::bench2_256b ... bench: 26 ns/iter (+/- 0) +test decode_to_slice::const_hex::bench3_2k ... bench: 210 ns/iter (+/- 10) +test decode_to_slice::const_hex::bench4_16k ... bench: 1,667 ns/iter (+/- 13) +test decode_to_slice::const_hex::bench5_128k ... bench: 13,043 ns/iter (+/- 19) +test decode_to_slice::const_hex::bench6_1m ... bench: 105,883 ns/iter (+/- 1,427) test decode_to_slice::faster_hex::bench1_32b ... bench: 6 ns/iter (+/- 0) test decode_to_slice::faster_hex::bench2_256b ... bench: 28 ns/iter (+/- 0) -test decode_to_slice::faster_hex::bench3_2k ... bench: 206 ns/iter (+/- 3) -test decode_to_slice::faster_hex::bench4_16k ... bench: 1,640 ns/iter (+/- 13) -test decode_to_slice::faster_hex::bench5_128k ... bench: 13,065 ns/iter (+/- 92) -test decode_to_slice::faster_hex::bench6_1m ... bench: 105,963 ns/iter (+/- 2,831) -test decode_to_slice::hex::bench1_32b ... bench: 37 ns/iter (+/- 0) -test decode_to_slice::hex::bench2_256b ... bench: 298 ns/iter (+/- 6) -test decode_to_slice::hex::bench3_2k ... bench: 2,552 ns/iter (+/- 27) -test decode_to_slice::hex::bench4_16k ... bench: 20,335 ns/iter (+/- 581) -test decode_to_slice::hex::bench5_128k ... bench: 611,494 ns/iter (+/- 11,531) -test decode_to_slice::hex::bench6_1m ... bench: 4,941,477 ns/iter (+/- 180,172) - -test encode::const_hex::bench1_32b ... bench: 10 ns/iter (+/- 0) -test encode::const_hex::bench2_256b ... bench: 27 ns/iter (+/- 0) -test encode::const_hex::bench3_2k ... bench: 97 ns/iter (+/- 0) -test encode::const_hex::bench4_16k ... bench: 644 ns/iter (+/- 8) -test encode::const_hex::bench5_128k ... bench: 4,967 ns/iter (+/- 52) -test encode::const_hex::bench6_1m ... bench: 45,424 ns/iter (+/- 1,922) +test decode_to_slice::faster_hex::bench3_2k ... bench: 214 ns/iter (+/- 2) +test decode_to_slice::faster_hex::bench4_16k ... bench: 1,710 ns/iter (+/- 6) +test decode_to_slice::faster_hex::bench5_128k ... bench: 13,304 ns/iter (+/- 37) +test decode_to_slice::faster_hex::bench6_1m ... bench: 110,276 ns/iter (+/- 3,475) +test decode_to_slice::hex::bench1_32b ... bench: 38 ns/iter (+/- 2) +test decode_to_slice::hex::bench2_256b ... bench: 300 ns/iter (+/- 185) +test decode_to_slice::hex::bench3_2k ... bench: 2,717 ns/iter (+/- 64) +test decode_to_slice::hex::bench4_16k ... bench: 19,257 ns/iter (+/- 530) +test decode_to_slice::hex::bench5_128k ... bench: 624,172 ns/iter (+/- 15,725) +test decode_to_slice::hex::bench6_1m ... bench: 5,333,915 ns/iter (+/- 298,093) + +test encode::const_hex::bench1_32b ... bench: 6 ns/iter (+/- 0) +test encode::const_hex::bench2_256b ... bench: 10 ns/iter (+/- 0) +test encode::const_hex::bench3_2k ... bench: 72 ns/iter (+/- 1) +test encode::const_hex::bench4_16k ... bench: 462 ns/iter (+/- 4) +test encode::const_hex::bench5_128k ... bench: 3,600 ns/iter (+/- 28) +test encode::const_hex::bench6_1m ... bench: 29,447 ns/iter (+/- 858) test encode::faster_hex::bench1_32b ... bench: 17 ns/iter (+/- 0) -test encode::faster_hex::bench2_256b ... bench: 36 ns/iter (+/- 0) -test encode::faster_hex::bench3_2k ... bench: 95 ns/iter (+/- 1) -test encode::faster_hex::bench4_16k ... bench: 597 ns/iter (+/- 10) -test encode::faster_hex::bench5_128k ... bench: 4,538 ns/iter (+/- 180) -test encode::faster_hex::bench6_1m ... bench: 41,513 ns/iter (+/- 779) -test encode::hex::bench1_32b ... bench: 97 ns/iter (+/- 0) -test encode::hex::bench2_256b ... bench: 694 ns/iter (+/- 4) -test encode::hex::bench3_2k ... bench: 5,476 ns/iter (+/- 28) -test encode::hex::bench4_16k ... bench: 43,617 ns/iter (+/- 215) -test encode::hex::bench5_128k ... bench: 348,646 ns/iter (+/- 1,155) -test encode::hex::bench6_1m ... bench: 2,895,775 ns/iter (+/- 95,699) +test encode::faster_hex::bench2_256b ... bench: 37 ns/iter (+/- 3) +test encode::faster_hex::bench3_2k ... bench: 102 ns/iter (+/- 1) +test encode::faster_hex::bench4_16k ... bench: 614 ns/iter (+/- 6) +test encode::faster_hex::bench5_128k ... bench: 4,764 ns/iter (+/- 12) +test encode::faster_hex::bench6_1m ... bench: 40,894 ns/iter (+/- 1,223) +test encode::hex::bench1_32b ... bench: 112 ns/iter (+/- 0) +test encode::hex::bench2_256b ... bench: 812 ns/iter (+/- 5) +test encode::hex::bench3_2k ... bench: 6,404 ns/iter (+/- 26) +test encode::hex::bench4_16k ... bench: 51,039 ns/iter (+/- 595) +test encode::hex::bench5_128k ... bench: 408,378 ns/iter (+/- 23,022) +test encode::hex::bench6_1m ... bench: 3,571,916 ns/iter (+/- 142,828) test encode_to_slice::const_hex::bench1_32b ... bench: 1 ns/iter (+/- 0) test encode_to_slice::const_hex::bench2_256b ... bench: 6 ns/iter (+/- 0) -test encode_to_slice::const_hex::bench3_2k ... bench: 59 ns/iter (+/- 0) -test encode_to_slice::const_hex::bench4_16k ... bench: 438 ns/iter (+/- 2) -test encode_to_slice::const_hex::bench5_128k ... bench: 3,414 ns/iter (+/- 10) -test encode_to_slice::const_hex::bench6_1m ... bench: 28,947 ns/iter (+/- 546) +test encode_to_slice::const_hex::bench3_2k ... bench: 53 ns/iter (+/- 0) +test encode_to_slice::const_hex::bench4_16k ... bench: 452 ns/iter (+/- 3) +test encode_to_slice::const_hex::bench5_128k ... bench: 3,550 ns/iter (+/- 10) +test encode_to_slice::const_hex::bench6_1m ... bench: 29,605 ns/iter (+/- 916) test encode_to_slice::faster_hex::bench1_32b ... bench: 4 ns/iter (+/- 0) test encode_to_slice::faster_hex::bench2_256b ... bench: 7 ns/iter (+/- 0) -test encode_to_slice::faster_hex::bench3_2k ... bench: 63 ns/iter (+/- 0) -test encode_to_slice::faster_hex::bench4_16k ... bench: 390 ns/iter (+/- 5) -test encode_to_slice::faster_hex::bench5_128k ... bench: 3,012 ns/iter (+/- 22) -test encode_to_slice::faster_hex::bench6_1m ... bench: 26,138 ns/iter (+/- 596) +test encode_to_slice::faster_hex::bench3_2k ... bench: 47 ns/iter (+/- 0) +test encode_to_slice::faster_hex::bench4_16k ... bench: 402 ns/iter (+/- 5) +test encode_to_slice::faster_hex::bench5_128k ... bench: 3,121 ns/iter (+/- 25) +test encode_to_slice::faster_hex::bench6_1m ... bench: 26,171 ns/iter (+/- 573) test encode_to_slice::hex::bench1_32b ... bench: 11 ns/iter (+/- 0) -test encode_to_slice::hex::bench2_256b ... bench: 116 ns/iter (+/- 0) -test encode_to_slice::hex::bench3_2k ... bench: 971 ns/iter (+/- 6) -test encode_to_slice::hex::bench4_16k ... bench: 7,821 ns/iter (+/- 48) -test encode_to_slice::hex::bench5_128k ... bench: 61,907 ns/iter (+/- 377) -test encode_to_slice::hex::bench6_1m ... bench: 499,203 ns/iter (+/- 3,771) +test encode_to_slice::hex::bench2_256b ... bench: 118 ns/iter (+/- 0) +test encode_to_slice::hex::bench3_2k ... bench: 994 ns/iter (+/- 4) +test encode_to_slice::hex::bench4_16k ... bench: 8,065 ns/iter (+/- 31) +test encode_to_slice::hex::bench5_128k ... bench: 63,982 ns/iter (+/- 2,026) +test encode_to_slice::hex::bench6_1m ... bench: 515,171 ns/iter (+/- 2,789) -test format::const_hex::bench1_32b ... bench: 10 ns/iter (+/- 1) +test format::const_hex::bench1_32b ... bench: 9 ns/iter (+/- 0) test format::const_hex::bench2_256b ... bench: 18 ns/iter (+/- 0) -test format::const_hex::bench3_2k ... bench: 134 ns/iter (+/- 2) -test format::const_hex::bench4_16k ... bench: 1,151 ns/iter (+/- 5) -test format::const_hex::bench5_128k ... bench: 9,298 ns/iter (+/- 83) -test format::const_hex::bench6_1m ... bench: 83,611 ns/iter (+/- 1,530) -test format::std::bench1_32b ... bench: 359 ns/iter (+/- 6) -test format::std::bench2_256b ... bench: 2,773 ns/iter (+/- 44) -test format::std::bench3_2k ... bench: 22,620 ns/iter (+/- 213) -test format::std::bench4_16k ... bench: 183,197 ns/iter (+/- 1,512) -test format::std::bench5_128k ... bench: 1,481,851 ns/iter (+/- 9,791) -test format::std::bench6_1m ... bench: 11,947,054 ns/iter (+/- 132,579) +test format::const_hex::bench3_2k ... bench: 119 ns/iter (+/- 1) +test format::const_hex::bench4_16k ... bench: 1,157 ns/iter (+/- 3) +test format::const_hex::bench5_128k ... bench: 9,560 ns/iter (+/- 443) +test format::const_hex::bench6_1m ... bench: 85,479 ns/iter (+/- 1,498) +test format::std::bench1_32b ... bench: 374 ns/iter (+/- 6) +test format::std::bench2_256b ... bench: 2,952 ns/iter (+/- 10) +test format::std::bench3_2k ... bench: 23,767 ns/iter (+/- 61) +test format::std::bench4_16k ... bench: 183,579 ns/iter (+/- 2,078) +test format::std::bench5_128k ... bench: 1,498,391 ns/iter (+/- 8,445) +test format::std::bench6_1m ... bench: 11,965,082 ns/iter (+/- 43,784) ``` ## Acknowledgements diff --git a/src/lib.rs b/src/lib.rs index 72c931b..ad810b4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -48,6 +48,7 @@ )] #[cfg(feature = "alloc")] +#[allow(unused_imports)] #[macro_use] extern crate alloc; @@ -576,16 +577,24 @@ pub fn decode_to_array, const N: usize>(input: T) -> Result<[u8; #[cfg(feature = "alloc")] fn encode_inner(data: &[u8]) -> String { - let mut buf = vec![0; (PREFIX as usize + data.len()) * 2]; - let output = if PREFIX { - buf[0] = b'0'; - buf[1] = b'x'; - &mut buf[2..] - } else { - &mut buf[..] + let capacity = PREFIX as usize * 2 + data.len() * 2; + let mut buf = Vec::::with_capacity(capacity); + // SAFETY: The entire vec is never read from, and gets dropped if decoding fails. + #[allow(clippy::uninit_vec)] + unsafe { + buf.set_len(capacity) }; + let mut output = buf.as_mut_ptr(); + if PREFIX { + // SAFETY: `output` is long enough. + unsafe { + output.add(0).write(b'0'); + output.add(1).write(b'x'); + output = output.add(2); + } + } // SAFETY: `output` is long enough (input.len() * 2). - unsafe { imp::encode::(data, output.as_mut_ptr()) }; + unsafe { imp::encode::(data, output) }; // SAFETY: We only write only ASCII bytes. unsafe { String::from_utf8_unchecked(buf) } }