Skip to content

Commit

Permalink
perf: don't alloc zeroed in encode
Browse files Browse the repository at this point in the history
  • Loading branch information
DaniPopes committed Mar 9, 2024
1 parent 3ecac1e commit e6a0979
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 82 deletions.
148 changes: 74 additions & 74 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,94 +33,94 @@ You can run these benchmarks with `cargo bench --features std` on a nightly
compiler.

```log
test decode::const_hex::bench1_32b ... bench: 14 ns/iter (+/- 0)
test decode::const_hex::bench2_256b ... bench: 37 ns/iter (+/- 4)
test decode::const_hex::bench3_2k ... bench: 226 ns/iter (+/- 7)
test decode::const_hex::bench4_16k ... bench: 1,636 ns/iter (+/- 13)
test decode::const_hex::bench5_128k ... bench: 12,644 ns/iter (+/- 84)
test decode::const_hex::bench6_1m ... bench: 102,836 ns/iter (+/- 3,236)
test decode::const_hex::bench1_32b ... bench: 16 ns/iter (+/- 5)
test decode::const_hex::bench2_256b ... bench: 37 ns/iter (+/- 0)
test decode::const_hex::bench3_2k ... bench: 232 ns/iter (+/- 2)
test decode::const_hex::bench4_16k ... bench: 1,672 ns/iter (+/- 12)
test decode::const_hex::bench5_128k ... bench: 12,979 ns/iter (+/- 91)
test decode::const_hex::bench6_1m ... bench: 104,751 ns/iter (+/- 2,068)
test decode::faster_hex::bench1_32b ... bench: 15 ns/iter (+/- 0)
test decode::faster_hex::bench2_256b ... bench: 50 ns/iter (+/- 1)
test decode::faster_hex::bench3_2k ... bench: 244 ns/iter (+/- 4)
test decode::faster_hex::bench4_16k ... bench: 1,782 ns/iter (+/- 31)
test decode::faster_hex::bench5_128k ... bench: 13,745 ns/iter (+/- 66)
test decode::faster_hex::bench6_1m ... bench: 115,126 ns/iter (+/- 1,544)
test decode::hex::bench1_32b ... bench: 101 ns/iter (+/- 6)
test decode::hex::bench2_256b ... bench: 701 ns/iter (+/- 12)
test decode::hex::bench3_2k ... bench: 5,287 ns/iter (+/- 97)
test decode::hex::bench4_16k ... bench: 41,743 ns/iter (+/- 1,420)
test decode::hex::bench5_128k ... bench: 782,327 ns/iter (+/- 18,876)
test decode::hex::bench6_1m ... bench: 6,283,181 ns/iter (+/- 88,813)
test decode::faster_hex::bench2_256b ... bench: 54 ns/iter (+/- 1)
test decode::faster_hex::bench3_2k ... bench: 253 ns/iter (+/- 3)
test decode::faster_hex::bench4_16k ... bench: 1,831 ns/iter (+/- 20)
test decode::faster_hex::bench5_128k ... bench: 14,120 ns/iter (+/- 57)
test decode::faster_hex::bench6_1m ... bench: 115,291 ns/iter (+/- 1,325)
test decode::hex::bench1_32b ... bench: 104 ns/iter (+/- 1)
test decode::hex::bench2_256b ... bench: 697 ns/iter (+/- 7)
test decode::hex::bench3_2k ... bench: 5,189 ns/iter (+/- 86)
test decode::hex::bench4_16k ... bench: 42,355 ns/iter (+/- 21,853)
test decode::hex::bench5_128k ... bench: 765,278 ns/iter (+/- 4,091)
test decode::hex::bench6_1m ... bench: 6,161,416 ns/iter (+/- 64,954)
test decode_to_slice::const_hex::bench1_32b ... bench: 5 ns/iter (+/- 0)
test decode_to_slice::const_hex::bench2_256b ... bench: 25 ns/iter (+/- 0)
test decode_to_slice::const_hex::bench3_2k ... bench: 201 ns/iter (+/- 3)
test decode_to_slice::const_hex::bench4_16k ... bench: 1,600 ns/iter (+/- 17)
test decode_to_slice::const_hex::bench5_128k ... bench: 12,732 ns/iter (+/- 119)
test decode_to_slice::const_hex::bench6_1m ... bench: 103,414 ns/iter (+/- 2,402)
test decode_to_slice::const_hex::bench2_256b ... bench: 26 ns/iter (+/- 0)
test decode_to_slice::const_hex::bench3_2k ... bench: 210 ns/iter (+/- 10)
test decode_to_slice::const_hex::bench4_16k ... bench: 1,667 ns/iter (+/- 13)
test decode_to_slice::const_hex::bench5_128k ... bench: 13,043 ns/iter (+/- 19)
test decode_to_slice::const_hex::bench6_1m ... bench: 105,883 ns/iter (+/- 1,427)
test decode_to_slice::faster_hex::bench1_32b ... bench: 6 ns/iter (+/- 0)
test decode_to_slice::faster_hex::bench2_256b ... bench: 28 ns/iter (+/- 0)
test decode_to_slice::faster_hex::bench3_2k ... bench: 206 ns/iter (+/- 3)
test decode_to_slice::faster_hex::bench4_16k ... bench: 1,640 ns/iter (+/- 13)
test decode_to_slice::faster_hex::bench5_128k ... bench: 13,065 ns/iter (+/- 92)
test decode_to_slice::faster_hex::bench6_1m ... bench: 105,963 ns/iter (+/- 2,831)
test decode_to_slice::hex::bench1_32b ... bench: 37 ns/iter (+/- 0)
test decode_to_slice::hex::bench2_256b ... bench: 298 ns/iter (+/- 6)
test decode_to_slice::hex::bench3_2k ... bench: 2,552 ns/iter (+/- 27)
test decode_to_slice::hex::bench4_16k ... bench: 20,335 ns/iter (+/- 581)
test decode_to_slice::hex::bench5_128k ... bench: 611,494 ns/iter (+/- 11,531)
test decode_to_slice::hex::bench6_1m ... bench: 4,941,477 ns/iter (+/- 180,172)
test encode::const_hex::bench1_32b ... bench: 10 ns/iter (+/- 0)
test encode::const_hex::bench2_256b ... bench: 27 ns/iter (+/- 0)
test encode::const_hex::bench3_2k ... bench: 97 ns/iter (+/- 0)
test encode::const_hex::bench4_16k ... bench: 644 ns/iter (+/- 8)
test encode::const_hex::bench5_128k ... bench: 4,967 ns/iter (+/- 52)
test encode::const_hex::bench6_1m ... bench: 45,424 ns/iter (+/- 1,922)
test decode_to_slice::faster_hex::bench3_2k ... bench: 214 ns/iter (+/- 2)
test decode_to_slice::faster_hex::bench4_16k ... bench: 1,710 ns/iter (+/- 6)
test decode_to_slice::faster_hex::bench5_128k ... bench: 13,304 ns/iter (+/- 37)
test decode_to_slice::faster_hex::bench6_1m ... bench: 110,276 ns/iter (+/- 3,475)
test decode_to_slice::hex::bench1_32b ... bench: 38 ns/iter (+/- 2)
test decode_to_slice::hex::bench2_256b ... bench: 300 ns/iter (+/- 185)
test decode_to_slice::hex::bench3_2k ... bench: 2,717 ns/iter (+/- 64)
test decode_to_slice::hex::bench4_16k ... bench: 19,257 ns/iter (+/- 530)
test decode_to_slice::hex::bench5_128k ... bench: 624,172 ns/iter (+/- 15,725)
test decode_to_slice::hex::bench6_1m ... bench: 5,333,915 ns/iter (+/- 298,093)
test encode::const_hex::bench1_32b ... bench: 6 ns/iter (+/- 0)
test encode::const_hex::bench2_256b ... bench: 10 ns/iter (+/- 0)
test encode::const_hex::bench3_2k ... bench: 72 ns/iter (+/- 1)
test encode::const_hex::bench4_16k ... bench: 462 ns/iter (+/- 4)
test encode::const_hex::bench5_128k ... bench: 3,600 ns/iter (+/- 28)
test encode::const_hex::bench6_1m ... bench: 29,447 ns/iter (+/- 858)
test encode::faster_hex::bench1_32b ... bench: 17 ns/iter (+/- 0)
test encode::faster_hex::bench2_256b ... bench: 36 ns/iter (+/- 0)
test encode::faster_hex::bench3_2k ... bench: 95 ns/iter (+/- 1)
test encode::faster_hex::bench4_16k ... bench: 597 ns/iter (+/- 10)
test encode::faster_hex::bench5_128k ... bench: 4,538 ns/iter (+/- 180)
test encode::faster_hex::bench6_1m ... bench: 41,513 ns/iter (+/- 779)
test encode::hex::bench1_32b ... bench: 97 ns/iter (+/- 0)
test encode::hex::bench2_256b ... bench: 694 ns/iter (+/- 4)
test encode::hex::bench3_2k ... bench: 5,476 ns/iter (+/- 28)
test encode::hex::bench4_16k ... bench: 43,617 ns/iter (+/- 215)
test encode::hex::bench5_128k ... bench: 348,646 ns/iter (+/- 1,155)
test encode::hex::bench6_1m ... bench: 2,895,775 ns/iter (+/- 95,699)
test encode::faster_hex::bench2_256b ... bench: 37 ns/iter (+/- 3)
test encode::faster_hex::bench3_2k ... bench: 102 ns/iter (+/- 1)
test encode::faster_hex::bench4_16k ... bench: 614 ns/iter (+/- 6)
test encode::faster_hex::bench5_128k ... bench: 4,764 ns/iter (+/- 12)
test encode::faster_hex::bench6_1m ... bench: 40,894 ns/iter (+/- 1,223)
test encode::hex::bench1_32b ... bench: 112 ns/iter (+/- 0)
test encode::hex::bench2_256b ... bench: 812 ns/iter (+/- 5)
test encode::hex::bench3_2k ... bench: 6,404 ns/iter (+/- 26)
test encode::hex::bench4_16k ... bench: 51,039 ns/iter (+/- 595)
test encode::hex::bench5_128k ... bench: 408,378 ns/iter (+/- 23,022)
test encode::hex::bench6_1m ... bench: 3,571,916 ns/iter (+/- 142,828)
test encode_to_slice::const_hex::bench1_32b ... bench: 1 ns/iter (+/- 0)
test encode_to_slice::const_hex::bench2_256b ... bench: 6 ns/iter (+/- 0)
test encode_to_slice::const_hex::bench3_2k ... bench: 59 ns/iter (+/- 0)
test encode_to_slice::const_hex::bench4_16k ... bench: 438 ns/iter (+/- 2)
test encode_to_slice::const_hex::bench5_128k ... bench: 3,414 ns/iter (+/- 10)
test encode_to_slice::const_hex::bench6_1m ... bench: 28,947 ns/iter (+/- 546)
test encode_to_slice::const_hex::bench3_2k ... bench: 53 ns/iter (+/- 0)
test encode_to_slice::const_hex::bench4_16k ... bench: 452 ns/iter (+/- 3)
test encode_to_slice::const_hex::bench5_128k ... bench: 3,550 ns/iter (+/- 10)
test encode_to_slice::const_hex::bench6_1m ... bench: 29,605 ns/iter (+/- 916)
test encode_to_slice::faster_hex::bench1_32b ... bench: 4 ns/iter (+/- 0)
test encode_to_slice::faster_hex::bench2_256b ... bench: 7 ns/iter (+/- 0)
test encode_to_slice::faster_hex::bench3_2k ... bench: 63 ns/iter (+/- 0)
test encode_to_slice::faster_hex::bench4_16k ... bench: 390 ns/iter (+/- 5)
test encode_to_slice::faster_hex::bench5_128k ... bench: 3,012 ns/iter (+/- 22)
test encode_to_slice::faster_hex::bench6_1m ... bench: 26,138 ns/iter (+/- 596)
test encode_to_slice::faster_hex::bench3_2k ... bench: 47 ns/iter (+/- 0)
test encode_to_slice::faster_hex::bench4_16k ... bench: 402 ns/iter (+/- 5)
test encode_to_slice::faster_hex::bench5_128k ... bench: 3,121 ns/iter (+/- 25)
test encode_to_slice::faster_hex::bench6_1m ... bench: 26,171 ns/iter (+/- 573)
test encode_to_slice::hex::bench1_32b ... bench: 11 ns/iter (+/- 0)
test encode_to_slice::hex::bench2_256b ... bench: 116 ns/iter (+/- 0)
test encode_to_slice::hex::bench3_2k ... bench: 971 ns/iter (+/- 6)
test encode_to_slice::hex::bench4_16k ... bench: 7,821 ns/iter (+/- 48)
test encode_to_slice::hex::bench5_128k ... bench: 61,907 ns/iter (+/- 377)
test encode_to_slice::hex::bench6_1m ... bench: 499,203 ns/iter (+/- 3,771)
test encode_to_slice::hex::bench2_256b ... bench: 118 ns/iter (+/- 0)
test encode_to_slice::hex::bench3_2k ... bench: 994 ns/iter (+/- 4)
test encode_to_slice::hex::bench4_16k ... bench: 8,065 ns/iter (+/- 31)
test encode_to_slice::hex::bench5_128k ... bench: 63,982 ns/iter (+/- 2,026)
test encode_to_slice::hex::bench6_1m ... bench: 515,171 ns/iter (+/- 2,789)
test format::const_hex::bench1_32b ... bench: 10 ns/iter (+/- 1)
test format::const_hex::bench1_32b ... bench: 9 ns/iter (+/- 0)
test format::const_hex::bench2_256b ... bench: 18 ns/iter (+/- 0)
test format::const_hex::bench3_2k ... bench: 134 ns/iter (+/- 2)
test format::const_hex::bench4_16k ... bench: 1,151 ns/iter (+/- 5)
test format::const_hex::bench5_128k ... bench: 9,298 ns/iter (+/- 83)
test format::const_hex::bench6_1m ... bench: 83,611 ns/iter (+/- 1,530)
test format::std::bench1_32b ... bench: 359 ns/iter (+/- 6)
test format::std::bench2_256b ... bench: 2,773 ns/iter (+/- 44)
test format::std::bench3_2k ... bench: 22,620 ns/iter (+/- 213)
test format::std::bench4_16k ... bench: 183,197 ns/iter (+/- 1,512)
test format::std::bench5_128k ... bench: 1,481,851 ns/iter (+/- 9,791)
test format::std::bench6_1m ... bench: 11,947,054 ns/iter (+/- 132,579)
test format::const_hex::bench3_2k ... bench: 119 ns/iter (+/- 1)
test format::const_hex::bench4_16k ... bench: 1,157 ns/iter (+/- 3)
test format::const_hex::bench5_128k ... bench: 9,560 ns/iter (+/- 443)
test format::const_hex::bench6_1m ... bench: 85,479 ns/iter (+/- 1,498)
test format::std::bench1_32b ... bench: 374 ns/iter (+/- 6)
test format::std::bench2_256b ... bench: 2,952 ns/iter (+/- 10)
test format::std::bench3_2k ... bench: 23,767 ns/iter (+/- 61)
test format::std::bench4_16k ... bench: 183,579 ns/iter (+/- 2,078)
test format::std::bench5_128k ... bench: 1,498,391 ns/iter (+/- 8,445)
test format::std::bench6_1m ... bench: 11,965,082 ns/iter (+/- 43,784)
```

## Acknowledgements
Expand Down
25 changes: 17 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
)]

#[cfg(feature = "alloc")]
#[allow(unused_imports)]
#[macro_use]
extern crate alloc;

Expand Down Expand Up @@ -576,16 +577,24 @@ pub fn decode_to_array<T: AsRef<[u8]>, const N: usize>(input: T) -> Result<[u8;

#[cfg(feature = "alloc")]
fn encode_inner<const UPPER: bool, const PREFIX: bool>(data: &[u8]) -> String {
let mut buf = vec![0; (PREFIX as usize + data.len()) * 2];
let output = if PREFIX {
buf[0] = b'0';
buf[1] = b'x';
&mut buf[2..]
} else {
&mut buf[..]
let capacity = PREFIX as usize * 2 + data.len() * 2;
let mut buf = Vec::<u8>::with_capacity(capacity);
// SAFETY: The entire vec is never read from, and gets dropped if decoding fails.
#[allow(clippy::uninit_vec)]
unsafe {
buf.set_len(capacity)
};
let mut output = buf.as_mut_ptr();
if PREFIX {
// SAFETY: `output` is long enough.
unsafe {
output.add(0).write(b'0');
output.add(1).write(b'x');
output = output.add(2);
}
}
// SAFETY: `output` is long enough (input.len() * 2).
unsafe { imp::encode::<UPPER>(data, output.as_mut_ptr()) };
unsafe { imp::encode::<UPPER>(data, output) };
// SAFETY: We only write only ASCII bytes.
unsafe { String::from_utf8_unchecked(buf) }
}
Expand Down

0 comments on commit e6a0979

Please sign in to comment.