From d8cd0f6d33e18de54c41546254a1a9b9015a5bf6 Mon Sep 17 00:00:00 2001 From: Declan Kelly Date: Tue, 8 Oct 2024 21:21:11 -0700 Subject: [PATCH] Simplify opaque node pointer casting to concrete **Description** - Remove dependency on `sptr` by adding shim functions in the nightly module as replacement - Remove some `#[inline(always)]` complete and change all other instances to `#[inline]` - Modify `count_words` example to use nul-terminated string internally - Remove the `TaggedPointer::::cast` cast function in favour of casting directly on the resulting `NonNull` **Motivation** - The `sptr` crate hasn't been updated to include strict provenance methods for `NonNull`, see Gankra/sptr#17 - `#[inline(always)]` is generally too strong of a requirement, I'd rather leave it to the compiler heuristic without strong evidence to the contrary. - The `count_words` example was failing on some inputs because of words that were prefixes of other words - The `cast` function was showing up in a hot part of the call stack and it seemed inefficient to carry the data tag over for the cast for my use-case **Testing Done** `./scripts/full-test.sh nightly` See PR for benchmark results --- .gitignore | 3 + Cargo.toml | 1 - README.md | 18 ++- benches/iai_callgrind.rs | 5 +- benches/tree/generated_get.rs | 3 - benches/tree/generated_insert.rs | 1 - examples/count_words.rs | 36 +++--- fuzz/fuzz_targets/fuzz_tree_map_api.rs | 28 ++--- scripts/bench-against.sh | 18 +++ src/collections/map/iterators/fuzzy.rs | 8 +- src/nodes/operations/delete.rs | 4 +- src/nodes/operations/insert.rs | 1 + src/nodes/operations/lookup.rs | 1 + src/nodes/operations/minmax.rs | 4 +- src/nodes/representation.rs | 99 ++++++++------- src/nodes/representation/header.rs | 28 ++--- src/rust_nightly_apis.rs | 82 ++++++++++++- src/tagged_pointer.rs | 162 +++++++------------------ 18 files changed, 282 insertions(+), 220 deletions(-) create mode 100755 scripts/bench-against.sh diff --git a/.gitignore b/.gitignore index 3a8da652..9d1cb581 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,6 @@ profile.json # coverage info lcov.info + +# perf data +perf.data* \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 86faf015..eac975be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,6 @@ autobenches = false [dependencies] bytemuck = { version = "1.16.1", features = ["min_const_generics"] } paste = "1.0.15" -sptr = "0.3.2" [features] nightly = [] diff --git a/README.md b/README.md index cb6b9072..9cdca801 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,22 @@ cargo +nightly fuzz coverage fuzz_tree_map_api && cargo cov -- show fuzz/target/ > index.html ``` +```bash +TARGET_TRIPLE="x86_64-unknown-linux-gnu" +/home/declan/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/bin/llvm-cov show -format=html \ + -instr-profile=fuzz/coverage/fuzz_tree_map_api/coverage.profdata \ + -Xdemangler=rustfilt \ + -ignore-filename-regex=\.cargo/registry \ + fuzz/target/x86_64-unknown-linux-gnu/release/fuzz_tree_map_api + > cov.html + +/home/declan/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/bin/llvm-cov show \ + --instr-profile=/home/declan/repos/github/declanvk/blart/fuzz/coverage/fuzz_tree_map_api/coverage.profdata \ + --show-instantiations --show-line-counts-or-regions --Xdemangler=rustfilt \ + --format=html --ignore-filename-regex=/home/declan/.cargo/registry --ignore-filename-regex=/home/declan/.rustup/\ + target/x86_64-unknown-linux-gnu/coverage/x86_64-unknown-linux-gnu/release/fuzz_tree_map_api > coverage.html +``` + ## Benchmarks To run the benchmarks, install [`cargo-criterion`][cargo-criterion], then run: @@ -134,7 +150,7 @@ curl -o data/Ulysses.txt https://www.gutenberg.org/cache/epub/4300/pg4300.txt Then build the word count example using the `profiling` profile: ```bash -cargo build --profile profiling --exampleps +RUSTFLAGS="-C force-frame-pointers=yes" cargo build --profile profiling --examples ``` Then run the count words workload on the downloaded data while profiling: diff --git a/benches/iai_callgrind.rs b/benches/iai_callgrind.rs index 9e3fae2c..79d878bd 100644 --- a/benches/iai_callgrind.rs +++ b/benches/iai_callgrind.rs @@ -5,7 +5,9 @@ use crate::common::{ with_prefixes_tree, }; use blart::{AsBytes, TreeMap}; -use iai_callgrind::{library_benchmark, library_benchmark_group, main, LibraryBenchmarkConfig}; +use iai_callgrind::{ + library_benchmark, library_benchmark_group, main, FlamegraphConfig, LibraryBenchmarkConfig, +}; #[macro_use] mod common; @@ -177,6 +179,7 @@ library_benchmark_group!(name = bench_iterator_group; benchmarks = bench_full_it fn config() -> LibraryBenchmarkConfig { let mut c = LibraryBenchmarkConfig::default(); c.truncate_description(Some(0)); + c.flamegraph(FlamegraphConfig::default()); c } diff --git a/benches/tree/generated_get.rs b/benches/tree/generated_get.rs index 243bc88b..03b338ff 100644 --- a/benches/tree/generated_get.rs +++ b/benches/tree/generated_get.rs @@ -8,7 +8,6 @@ use blart::{ }; use criterion::{criterion_group, measurement::Measurement, BenchmarkGroup, Criterion}; -#[inline(always)] fn run_benchmarks( group: &mut BenchmarkGroup, key_vec: &[Box<[u8]>], @@ -38,7 +37,6 @@ fn run_benchmarks( // - a tree node that is full and will need to grow } -#[inline(always)] fn setup_tree_run_benches_cleanup( c: &mut Criterion, keys: impl Iterator>, @@ -60,7 +58,6 @@ fn setup_tree_run_benches_cleanup( } } -#[inline(always)] fn bench(c: &mut Criterion) { // number of keys = 256 setup_tree_run_benches_cleanup( diff --git a/benches/tree/generated_insert.rs b/benches/tree/generated_insert.rs index 83ae5326..07b4538c 100644 --- a/benches/tree/generated_insert.rs +++ b/benches/tree/generated_insert.rs @@ -26,7 +26,6 @@ fn gen_group(c: &mut Criterion, group: &str, keys: Vec>) { }); } -#[inline(always)] fn bench(c: &mut Criterion) { let skewed: Vec<_> = generate_keys_skewed(u8::MAX as usize).collect(); let fixed_length: Vec<_> = generate_key_fixed_length([2; 8]).map(Box::from).collect(); diff --git a/examples/count_words.rs b/examples/count_words.rs index 9bd284f3..a71daca2 100644 --- a/examples/count_words.rs +++ b/examples/count_words.rs @@ -1,6 +1,8 @@ use argh::FromArgs; use blart::TreeMap; -use std::{collections::BTreeMap, error::Error, fs::OpenOptions, io::Read, path::PathBuf}; +use std::{ + collections::BTreeMap, error::Error, ffi::CString, fs::OpenOptions, io::Read, path::PathBuf, +}; /// Count words in file #[derive(FromArgs)] @@ -50,23 +52,25 @@ fn main() -> Result<(), Box> { #[derive(Debug)] #[allow(dead_code)] // this struct is used for its debug repr -struct WordStats<'b> { +struct WordStats { num_unique: u64, - first_word: &'b [u8], + first_word: CString, first_word_count: u64, - last_word: &'b [u8], + last_word: CString, last_word_count: u64, } fn count_words_blart(contents: &[u8]) -> WordStats { - let mut map = TreeMap::<&[u8], u64>::new(); + let mut map = TreeMap::::new(); for word in contents.split_inclusive(|b| *b == SPLIT_BYTE) { - if let Some(count) = map.get_mut(word) { - *count += 1; - } else { - map.try_insert(word, 1).unwrap(); - } + let word = CString::new(word).unwrap(); + + map.entry(word) + .and_modify(|count| { + *count += 1; + }) + .or_insert(1); } let (first_word, first_word_count) = map @@ -79,9 +83,9 @@ fn count_words_blart(contents: &[u8]) -> WordStats { WordStats { num_unique: map.len() as u64, - last_word, + last_word: last_word.clone(), last_word_count: *last_word_count, - first_word, + first_word: first_word.clone(), first_word_count: *first_word_count, } } @@ -89,9 +93,11 @@ fn count_words_blart(contents: &[u8]) -> WordStats { const SPLIT_BYTE: u8 = b' '; fn count_words_std(contents: &[u8]) -> WordStats { - let mut map = BTreeMap::<&[u8], u64>::new(); + let mut map = BTreeMap::::new(); for word in contents.split_inclusive(|b| *b == SPLIT_BYTE) { + let word = CString::new(word).unwrap(); + map.entry(word) .and_modify(|count| { *count += 1; @@ -109,9 +115,9 @@ fn count_words_std(contents: &[u8]) -> WordStats { WordStats { num_unique: map.len() as u64, - last_word, + last_word: last_word.clone(), last_word_count: *last_word_count, - first_word, + first_word: first_word.clone(), first_word_count: *first_word_count, } } diff --git a/fuzz/fuzz_targets/fuzz_tree_map_api.rs b/fuzz/fuzz_targets/fuzz_tree_map_api.rs index b01e05a0..43902f7e 100644 --- a/fuzz/fuzz_targets/fuzz_tree_map_api.rs +++ b/fuzz/fuzz_targets/fuzz_tree_map_api.rs @@ -45,7 +45,7 @@ enum Action { libfuzzer_sys::fuzz_target!(|actions: Vec| { let mut tree = TreeMap::<_, u32>::new(); - let mut next_key = 0; + let mut next_value = 0; for action in actions { match action { @@ -58,25 +58,25 @@ libfuzzer_sys::fuzz_target!(|actions: Vec| { Action::GetMinimum => { let min = tree.first_key_value(); if let Some((_, min_value)) = min { - assert!(*min_value < next_key); + assert!(*min_value < next_value); } }, Action::PopMinimum => { let min = tree.pop_first(); if let Some((_, min_value)) = min { - assert!(min_value < next_key); + assert!(min_value < next_value); } }, Action::GetMaximum => { let max = tree.last_key_value(); if let Some((_, max_value)) = max { - assert!(*max_value < next_key); + assert!(*max_value < next_value); } }, Action::PopMaximum => { let max = tree.pop_last(); if let Some((_, max_value)) = max { - assert!(max_value < next_key); + assert!(max_value < next_value); } }, Action::GetKey(key) => { @@ -104,19 +104,19 @@ libfuzzer_sys::fuzz_target!(|actions: Vec| { }, Action::Remove(key) => { if let Some(value) = tree.remove(key.as_ref()) { - assert!(value < next_key); + assert!(value < next_value); } }, Action::TryInsert(key) => { - let value = next_key; - next_key += 1; + let value = next_value; + next_value += 1; let _ = tree.try_insert(key, value); }, Action::Extend(new_keys) => { for key in new_keys { - let value = next_key; - next_key += 1; + let value = next_value; + next_value += 1; let _ = tree.try_insert(key, value); } @@ -137,8 +137,8 @@ libfuzzer_sys::fuzz_target!(|actions: Vec| { }, Action::Entry(ea, key) => { if let Ok(entry) = tree.try_entry(key) { - let value = next_key; - next_key += 1; + let value = next_value; + next_value += 1; match ea { EntryAction::AndModify => { entry.and_modify(|v| *v = v.saturating_sub(1)); @@ -174,8 +174,8 @@ libfuzzer_sys::fuzz_target!(|actions: Vec| { }, Action::EntryRef(ea, key) => { if let Ok(entry) = tree.try_entry_ref(&key) { - let value = next_key; - next_key += 1; + let value = next_value; + next_value += 1; match ea { EntryAction::AndModify => { entry.and_modify(|v| *v = v.saturating_sub(1)); diff --git a/scripts/bench-against.sh b/scripts/bench-against.sh new file mode 100755 index 00000000..9593e405 --- /dev/null +++ b/scripts/bench-against.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -o errexit # make script exit when a command fails +set -o nounset # make script exit when using undeclared variables +set -o pipefail # make script exit when command fails in a pipe + +git switch --quiet --detach "$1" + +BASELINE_NAME="$(git rev-parse --short HEAD)" + +# Create the baseline benchmark and don't output the summary +cargo bench --quiet --bench iai_callgrind -- --save-baseline="${BASELINE_NAME}" > /dev/null + +# Using '-' will switch back to the previous branch or git checkout +git switch --quiet - + +# Run the benchmark again with comparison to baseline +cargo bench --quiet --bench iai_callgrind -- --baseline="${BASELINE_NAME}" \ No newline at end of file diff --git a/src/collections/map/iterators/fuzzy.rs b/src/collections/map/iterators/fuzzy.rs index 5a0d965d..2f16fc88 100644 --- a/src/collections/map/iterators/fuzzy.rs +++ b/src/collections/map/iterators/fuzzy.rs @@ -91,7 +91,7 @@ impl StackArena { /// SAFETY: `old` and `new` must have the same length, and be >= 1 /// /// SAFETY: `key` length + 1 == `new` or `old` length -#[inline(always)] +#[inline] fn edit_dist( key: &[u8], c: u8, @@ -142,7 +142,7 @@ fn edit_dist( } /// SAFETY: `old_row` length == `new_row` length -#[inline(always)] +#[inline] unsafe fn swap(old_row: &mut &mut [usize], new_row: &mut &mut [MaybeUninit]) { // SAFETY: It's safe to transmute initialized data to uninitialized let temp = unsafe { @@ -162,7 +162,7 @@ trait FuzzySearch { max_edit_dist: usize, ) -> bool; - #[inline(always)] + #[inline] fn fuzzy_search_prefix( &self, key: &[u8], @@ -355,7 +355,7 @@ macro_rules! gen_iter { { type Item = $ret; - #[inline(always)] + #[inline] fn next(&mut self) -> Option { let mut old_row = self.old_row.as_mut(); let mut new_row = self.new_row.as_mut(); diff --git a/src/nodes/operations/delete.rs b/src/nodes/operations/delete.rs index ab808f7c..595ef4bb 100644 --- a/src/nodes/operations/delete.rs +++ b/src/nodes/operations/delete.rs @@ -401,7 +401,7 @@ where /// - This function cannot be called concurrently with any mutating operation /// on `root` or any child node of `root`. This function will arbitrarily /// read to any child in the given tree. -#[inline(always)] +#[inline] pub unsafe fn find_minimum_to_delete( root: OpaqueNodePtr, ) -> DeletePoint { @@ -439,7 +439,7 @@ pub unsafe fn find_minimum_to_delete( /// - This function cannot be called concurrently with any mutating operation /// on `root` or any child node of `root`. This function will arbitrarily /// read to any child in the given tree. -#[inline(always)] +#[inline] pub unsafe fn find_maximum_to_delete( root: OpaqueNodePtr, ) -> DeletePoint { diff --git a/src/nodes/operations/insert.rs b/src/nodes/operations/insert.rs index 217bddac..6395de6e 100644 --- a/src/nodes/operations/insert.rs +++ b/src/nodes/operations/insert.rs @@ -607,6 +607,7 @@ pub unsafe fn search_for_insert_point( where K: AsBytes, { + #[inline] fn test_prefix_identify_insert( inner_ptr: NodePtr, key: &[u8], diff --git a/src/nodes/operations/lookup.rs b/src/nodes/operations/lookup.rs index 8f979ebc..aebb1a84 100644 --- a/src/nodes/operations/lookup.rs +++ b/src/nodes/operations/lookup.rs @@ -59,6 +59,7 @@ impl PrefixMatchBehavior { /// This function will choose between different "match prefix" methods /// depending on the current behavior and attempt to match the prefix of the /// given inner node against the given key. + #[inline] pub fn match_prefix( &mut self, inner_node: &impl InnerNode, diff --git a/src/nodes/operations/minmax.rs b/src/nodes/operations/minmax.rs index a4d22ae7..ac721b95 100644 --- a/src/nodes/operations/minmax.rs +++ b/src/nodes/operations/minmax.rs @@ -6,7 +6,7 @@ use crate::{ConcreteNodePtr, InnerNode, LeafNode, NodePtr, OpaqueNodePtr}; /// - This function cannot be called concurrently with any mutating operation /// on `root` or any child node of `root`. This function will arbitrarily /// read to any child in the given tree. -#[inline(always)] +#[inline] pub unsafe fn minimum_unchecked( root: OpaqueNodePtr, ) -> NodePtr> { @@ -31,7 +31,7 @@ pub unsafe fn minimum_unchecked( /// - This function cannot be called concurrently with any mutating operation /// on `root` or any child node of `root`. This function will arbitrarily /// read to any child in the given tree. -#[inline(always)] +#[inline] pub unsafe fn maximum_unchecked( root: OpaqueNodePtr, ) -> NodePtr> { diff --git a/src/nodes/representation.rs b/src/nodes/representation.rs index b7cd2b49..9a2a51df 100644 --- a/src/nodes/representation.rs +++ b/src/nodes/representation.rs @@ -149,7 +149,7 @@ impl OpaqueNodePtr { where N: Node, { - let mut tagged_ptr = TaggedPointer::from(pointer).cast::(); + let mut tagged_ptr = TaggedPointer::from(pointer.cast::()); tagged_ptr.set_data(N::TYPE as usize); OpaqueNodePtr(tagged_ptr, PhantomData) @@ -164,9 +164,10 @@ impl OpaqueNodePtr { /// Create a non-opaque node pointer that will eliminate future type /// assertions, if the type of the pointed node matches the given /// node type. + #[inline] pub fn cast>(self) -> Option> { if self.is::() { - Some(NodePtr(self.0.cast::().into())) + Some(NodePtr(self.0.to_ptr().cast::())) } else { None } @@ -176,25 +177,30 @@ impl OpaqueNodePtr { /// concrete node type. pub fn to_node_ptr(self) -> ConcreteNodePtr { match self.node_type() { - NodeType::Node4 => ConcreteNodePtr::Node4(NodePtr( - self.0.cast::>().into(), - )), + NodeType::Node4 => { + ConcreteNodePtr::Node4(NodePtr( + self.0.to_ptr().cast::>(), + )) + }, NodeType::Node16 => ConcreteNodePtr::Node16(NodePtr( - self.0.cast::>().into(), + self.0.to_ptr().cast::>(), )), NodeType::Node48 => ConcreteNodePtr::Node48(NodePtr( - self.0.cast::>().into(), + self.0.to_ptr().cast::>(), )), NodeType::Node256 => ConcreteNodePtr::Node256(NodePtr( - self.0.cast::>().into(), - )), - NodeType::Leaf => ConcreteNodePtr::LeafNode(NodePtr( - self.0.cast::>().into(), + self.0.to_ptr().cast::>(), )), + NodeType::Leaf => { + ConcreteNodePtr::LeafNode(NodePtr( + self.0.to_ptr().cast::>(), + )) + }, } } /// Retrieve the runtime node type information. + #[inline] pub fn node_type(self) -> NodeType { // SAFETY: We know that we can convert the usize into a `NodeType` because // we have only stored `NodeType` values into this pointer @@ -237,7 +243,7 @@ impl OpaqueNodePtr { /// lifetime, the memory the pointer points to must not get accessed /// (read or written) through any other pointer. pub(crate) unsafe fn header_mut_unchecked<'h>(self) -> &'h mut Header { - unsafe { &mut *self.0.cast::>().to_ptr() } + unsafe { self.0.to_ptr().cast::>().as_mut() } } /// Get a shared reference to the header, this doesn't check if the pointer @@ -251,7 +257,7 @@ impl OpaqueNodePtr { /// lifetime, the memory the pointer points to must not be mutated /// through any other pointer. pub(crate) unsafe fn header_ref_unchecked<'h>(self) -> &'h Header { - unsafe { &*self.0.cast::>().to_ptr() } + unsafe { self.0.to_ptr().cast::>().as_ref() } } } @@ -725,6 +731,7 @@ pub trait InnerNode: Node + Sized + fmt::De Self: 'a; /// Create an empty [`InnerNode`], with no children and no prefix + #[inline] fn empty() -> Self { Self::from_header(Header::empty()) } @@ -818,6 +825,7 @@ pub trait InnerNode: Node + Sized + fmt::De /// the key. The caller who uses this function must perform a final check /// against the leaf key bytes to make sure that the search key matches the /// found key. + #[inline] fn optimistic_match_prefix( &self, truncated_key: &[u8], @@ -851,7 +859,7 @@ pub trait InnerNode: Node + Sized + fmt::De /// reaches a leaf node using these results, then the caller must perform a /// final check against the leaf key bytes to make sure that the search /// key matches the found key. - #[inline(always)] + #[inline] fn attempt_pessimistic_match_prefix( &self, truncated_key: &[u8], @@ -895,7 +903,7 @@ pub trait InnerNode: Node + Sized + fmt::De /// /// # Safety /// - `current_depth` > key len - #[inline(always)] + #[inline] fn match_full_prefix( &self, key: &[u8], @@ -933,7 +941,7 @@ pub trait InnerNode: Node + Sized + fmt::De /// Read the prefix as a whole, by reconstructing it if necessary from a /// leaf - #[inline(always)] + #[inline] fn read_full_prefix( &self, current_depth: usize, @@ -1232,38 +1240,43 @@ impl Node for LeafNode, 3>::new(Box::into_raw(Box::new( - LeafNode::with_no_siblings([], 3u8), - ))) - .unwrap() - .cast::(); + let mut p0 = TaggedPointer::::new( + Box::into_raw(Box::>::new( + LeafNode::with_no_siblings([], 3u8), + )) + .cast::(), + ) + .unwrap(); p0.set_data(0b001); #[repr(align(64))] struct LargeAlignment; - let mut p1 = TaggedPointer::, 3>::new(Box::into_raw( - Box::new(LeafNode::with_no_siblings(LargeAlignment, 2u16)), - )) - .unwrap() - .cast::(); + let mut p1 = TaggedPointer::::new( + Box::into_raw(Box::>::new( + LeafNode::with_no_siblings(LargeAlignment, 2u16), + )) + .cast::(), + ) + .unwrap(); p1.set_data(0b011); - let mut p2 = TaggedPointer::, 3>::new(Box::into_raw( - Box::new(LeafNode::with_no_siblings(1u64, LargeAlignment)), - )) - .unwrap() - .cast::(); + let mut p2 = TaggedPointer::::new( + Box::into_raw(Box::>::new( + LeafNode::with_no_siblings(1u64, LargeAlignment), + )) + .cast::(), + ) + .unwrap(); p2.set_data(0b111); unsafe { @@ -1272,13 +1285,17 @@ mod tests { // are required, even though the tests pass under normal execution otherwise (I // guess normal test execution doesn't care about leaked memory?) drop(Box::from_raw( - p0.cast::>().to_ptr(), + p0.to_ptr().cast::>().as_ptr(), )); drop(Box::from_raw( - p1.cast::>().to_ptr(), + p1.to_ptr() + .cast::>() + .as_ptr(), )); drop(Box::from_raw( - p2.cast::>().to_ptr(), + p2.to_ptr() + .cast::>() + .as_ptr(), )); } } @@ -1384,10 +1401,10 @@ mod tests { let n48 = InnerNode4::, (), 16>::empty(); let n256 = InnerNode4::, (), 16>::empty(); - let n4_ptr = (&n4 as *const InnerNode4, (), 16>).addr(); - let n16_ptr = (&n16 as *const InnerNode4, (), 16>).addr(); - let n48_ptr = (&n48 as *const InnerNode4, (), 16>).addr(); - let n256_ptr = (&n256 as *const InnerNode4, (), 16>).addr(); + let n4_ptr = const_addr(&n4 as *const InnerNode4, (), 16>); + let n16_ptr = const_addr(&n16 as *const InnerNode4, (), 16>); + let n48_ptr = const_addr(&n48 as *const InnerNode4, (), 16>); + let n256_ptr = const_addr(&n256 as *const InnerNode4, (), 16>); // Ensure that there are 3 bits of unused space in the node pointer because of // the alignment. diff --git a/src/nodes/representation/header.rs b/src/nodes/representation/header.rs index b00eeb26..5c94bfce 100644 --- a/src/nodes/representation/header.rs +++ b/src/nodes/representation/header.rs @@ -26,7 +26,7 @@ pub struct Header { } impl Header { - #[inline(always)] + #[inline] pub fn new(prefix: &[u8], prefix_len: usize) -> Self { let mut header = Self { num_children: 0, @@ -40,7 +40,7 @@ impl Header { } /// Create a new `Header` for an empty node. - #[inline(always)] + #[inline] pub fn empty() -> Self { Self { num_children: 0, @@ -50,25 +50,25 @@ impl Header { } /// Read the initialized portion of the prefix present in the header. - #[inline(always)] + #[inline] pub fn read_prefix(&self) -> &[u8] { &self.prefix[0..self.capped_prefix_len()] } /// Get the number of bytes in the prefix. - #[inline(always)] + #[inline] pub fn prefix_len(&self) -> usize { self.prefix_len as usize } /// Minimum between [`Self::prefix_len`] and `PREFIX_LEN`. - #[inline(always)] + #[inline] pub fn capped_prefix_len(&self) -> usize { (self.prefix_len as usize).min(PREFIX_LEN) } /// Return the number of children of this node. - #[inline(always)] + #[inline] pub fn num_children(&self) -> usize { usize::from(self.num_children) } @@ -78,7 +78,7 @@ impl Header { /// /// # Panics /// - If `len` > length of the prefix - #[inline(always)] + #[inline] pub fn ltrim_by(&mut self, len: usize) { assert!( (len as u32) <= self.prefix_len, @@ -106,7 +106,7 @@ impl Header { /// Set the length of the prefix to 0 and returns a copy of the /// prefix, length and capped length - #[inline(always)] + #[inline] pub fn clear_prefix(&mut self) -> ([u8; PREFIX_LEN], usize, usize) { let len = self.prefix_len(); let capped_len = self.capped_prefix_len(); @@ -116,7 +116,7 @@ impl Header { } /// Append `new` to the prefix and sums `new_len` to the prefix length - #[inline(always)] + #[inline] pub fn push_prefix(&mut self, new: &[u8], new_len: usize) { let begin = self.capped_prefix_len(); let end = (begin + new.len()).min(PREFIX_LEN); @@ -126,24 +126,24 @@ impl Header { } /// Increments the number of children - #[inline(always)] + #[inline] pub fn inc_num_children(&mut self) { self.num_children += 1; } /// Decrements the number of children - #[inline(always)] + #[inline] pub fn dec_num_children(&mut self) { self.num_children -= 1; } /// Reset the number of children to 0. - #[inline(always)] + #[inline] pub fn reset_num_children(&mut self) { self.num_children = 0; } - #[inline(always)] + #[inline] pub fn ltrim_by_with_leaf( &mut self, len: usize, @@ -177,7 +177,7 @@ impl Header { self.prefix[..len.min(PREFIX_LEN)].copy_from_slice(leaf_key) } - #[inline(always)] + #[inline] pub fn inner_read_full_prefix<'a, N: InnerNode>( &'a self, node: &'a N, diff --git a/src/rust_nightly_apis.rs b/src/rust_nightly_apis.rs index 8dc748ce..d28f3208 100644 --- a/src/rust_nightly_apis.rs +++ b/src/rust_nightly_apis.rs @@ -17,7 +17,7 @@ /// issue is [#63569][issue-63569]** /// /// [issue-63569]: https://github.com/rust-lang/rust/issues/63569 -#[inline(always)] +#[inline] pub const unsafe fn maybe_uninit_slice_assume_init_ref( slice: &[std::mem::MaybeUninit], ) -> &[T] { @@ -54,7 +54,7 @@ pub const unsafe fn maybe_uninit_slice_assume_init_ref( /// issue is [#63569][issue-63569]** /// /// [issue-63569]: https://github.com/rust-lang/rust/issues/63569 -#[inline(always)] +#[inline] pub unsafe fn maybe_uninit_slice_assume_init_mut( slice: &mut [std::mem::MaybeUninit], ) -> &mut [T] { @@ -99,7 +99,7 @@ pub unsafe fn maybe_uninit_slice_assume_init_mut( /// issue is [#96762][issue-96762]** /// /// [issue-96762]: https://github.com/rust-lang/rust/issues/96762 -#[inline(always)] +#[inline] pub fn hasher_write_length_prefix(state: &mut H, num_entries: usize) { #[cfg(feature = "nightly")] { @@ -125,7 +125,7 @@ pub fn hasher_write_length_prefix(state: &mut H, num_entri /// /// [issue-96097]: https://github.com/rust-lang/rust/issues/96097 #[cfg(feature = "nightly")] -#[inline(always)] +#[inline] pub const fn maybe_uninit_uninit_array() -> [std::mem::MaybeUninit; N] { std::mem::MaybeUninit::uninit_array() } @@ -143,7 +143,7 @@ pub const fn maybe_uninit_uninit_array() -> [std::mem::MaybeU /// /// [issue-96097]: https://github.com/rust-lang/rust/issues/96097 #[cfg(not(feature = "nightly"))] -#[inline(always)] +#[inline] pub fn maybe_uninit_uninit_array() -> [std::mem::MaybeUninit; N] { std::array::from_fn(|_| std::mem::MaybeUninit::uninit()) } @@ -154,7 +154,7 @@ pub fn maybe_uninit_uninit_array() -> [std::mem::MaybeUninit< /// issue is [#63291][issue-63291]** /// /// [issue-63291]: https://github.com/rust-lang/rust/issues/63291 -#[inline(always)] +#[inline] pub fn box_new_uninit_slice(len: usize) -> Box<[std::mem::MaybeUninit]> { #[cfg(feature = "nightly")] { @@ -272,3 +272,73 @@ macro_rules! unlikely { } pub(crate) use unlikely; + +pub(crate) mod ptr { + //! This module contains shim functions for strict-provenance stuff related + //! to pointers + + use std::{num::NonZeroUsize, ptr::NonNull}; + + #[inline] + #[cfg(not(feature = "nightly"))] + #[allow(clippy::transmutes_expressible_as_ptr_casts)] + pub fn mut_addr(ptr: *mut T) -> usize { + // FIXME(strict_provenance_magic): I am magic and should be a compiler + // intrinsic. SAFETY: Pointer-to-integer transmutes are valid (if you + // are okay with losing the provenance). + unsafe { std::mem::transmute(ptr.cast::<()>()) } + } + + #[inline] + #[cfg(feature = "nightly")] + pub fn mut_addr(ptr: *mut T) -> usize { + ptr.addr() + } + + #[cfg(test)] + #[inline] + #[cfg(not(feature = "nightly"))] + pub fn const_addr(ptr: *const T) -> usize { + // FIXME(strict_provenance_magic): I am magic and should be a compiler + // intrinsic. SAFETY: Pointer-to-integer transmutes are valid (if you + // are okay with losing the provenance). + unsafe { std::mem::transmute(ptr.cast::<()>()) } + } + + #[cfg(test)] + #[inline] + #[cfg(feature = "nightly")] + pub fn const_addr(ptr: *const T) -> usize { + ptr.addr() + } + + #[inline] + #[cfg(not(feature = "nightly"))] + pub fn nonnull_map_addr( + ptr: NonNull, + f: impl FnOnce(NonZeroUsize) -> NonZeroUsize, + ) -> NonNull { + let ptr = ptr.as_ptr(); + let old_addr = mut_addr(ptr); + let new_addr = f(unsafe { + // SAFETY: TODO + NonZeroUsize::new_unchecked(old_addr) + }); + let offset = new_addr.get().wrapping_sub(old_addr); + + // This is the canonical desugaring of this operation + let new_ptr = ptr.wrapping_byte_offset(offset as isize); + + // SAFETY: TODO + unsafe { NonNull::new_unchecked(new_ptr) } + } + + #[inline] + #[cfg(feature = "nightly")] + pub fn nonnull_map_addr( + ptr: NonNull, + f: impl FnOnce(NonZeroUsize) -> NonZeroUsize, + ) -> NonNull { + ptr.map_addr(f) + } +} diff --git a/src/tagged_pointer.rs b/src/tagged_pointer.rs index a54aed83..f049d886 100644 --- a/src/tagged_pointer.rs +++ b/src/tagged_pointer.rs @@ -8,10 +8,9 @@ //! pointed-to type, so that it can store several bits of information. For a //! type with alignment `A`, the number of available bits is `log_2(A)`. -use std::{fmt, mem::align_of, ptr::NonNull}; +use std::{fmt, mem::align_of, num::NonZeroUsize, ptr::NonNull}; -#[cfg(not(feature = "nightly"))] -use sptr::Strict; +use crate::rust_nightly_apis::ptr; /// A non-null pointer type which carries several bits of metadata. /// @@ -85,7 +84,7 @@ impl TaggedPointer { // assumes that null is always a zero value. let unchecked_ptr = unsafe { NonNull::new_unchecked(pointer) }; - let ptr_addr = unchecked_ptr.as_ptr().addr(); + let ptr_addr = ptr::mut_addr(unchecked_ptr.as_ptr()); // Double-check that there are no existing bits stored in the data-carrying // positions @@ -117,15 +116,17 @@ impl TaggedPointer { /// memory location. /// /// This pointer is guaranteed to be non-null. - pub fn to_ptr(self) -> *mut P { - self.0 - .as_ptr() - .map_addr(|ptr_addr| ptr_addr & Self::POINTER_MASK) + #[inline] + pub fn to_ptr(self) -> NonNull

{ + ptr::nonnull_map_addr(self.0, |ptr_addr| + // SAFETY: TODO + unsafe { NonZeroUsize::new_unchecked(ptr_addr.get() & Self::POINTER_MASK) }) } /// Consume this tagged pointer and produce the data it carries. + #[inline] pub fn to_data(self) -> usize { - let ptr_addr = self.0.as_ptr().addr(); + let ptr_addr = ptr::mut_addr(self.0.as_ptr()); ptr_addr & Self::DATA_MASK } @@ -142,42 +143,16 @@ impl TaggedPointer { ); let data = data & Self::DATA_MASK; - let ptr_with_new_data = self - .0 - .as_ptr() - .map_addr(|ptr_addr| (ptr_addr & Self::POINTER_MASK) | data); - - // The `ptr_with_new_data` is guaranteed to be non-null because it's pointer - // address was derived from a non-null pointer using operations that would not - // have zeroed out the address. - // - // The bit-and operation combining the original pointer address (non-null) - // combined with the POINTER_MASK would not have produced a zero value because - // the POINTER_MASK value must have the high bits set. - self.0 = unsafe { NonNull::new_unchecked(ptr_with_new_data) }; - } - - /// Casts to a [`TaggedPointer`] of another type. - /// - /// This function will transfer the data bits from the original pointer to - /// the new pointer. - /// - /// # Safety - /// - The alignment of the new type must be equal to the alignment of the - /// existing type. This is because the number of data-carrying bits could - /// be different. - pub fn cast(self) -> TaggedPointer { - let data = self.to_data(); - let raw_ptr = self.to_ptr(); - let cast_raw_ptr = raw_ptr.cast::(); - - // SAFETY: The `cast_raw_ptr` is guaranteed to be non-null because it is derived - // from an existing `TaggedPointer` which carries that guarantee. - let mut new_tagged = unsafe { TaggedPointer::new_unchecked(cast_raw_ptr) }; - - new_tagged.set_data(data); - - new_tagged + self.0 = ptr::nonnull_map_addr(self.0, |ptr_addr| unsafe { + // SAFETY: The `ptr_with_new_data` is guaranteed to be non-null because it's + // pointer address was derived from a non-null pointer using + // operations that would not have zeroed out the address. + // + // The bit-and operation combining the original pointer address (non-null) + // combined with the POINTER_MASK would not have produced a zero value because + // the POINTER_MASK value must have the high bits set. + NonZeroUsize::new_unchecked(ptr_addr.get() & Self::POINTER_MASK) | data + }); } } @@ -193,7 +168,7 @@ impl From> for NonNull

{ fn from(pointer: TaggedPointer) -> Self { // SAFETY: The pointer produced by the `TaggedPointer::to_ptr` is guaranteed to // be non-null. - unsafe { NonNull::new_unchecked(pointer.to_ptr()) } + unsafe { NonNull::new_unchecked(pointer.to_ptr().as_ptr()) } } } @@ -270,40 +245,17 @@ mod tests { let mut tagged_pointer = TaggedPointer::<&str, 3>::new_with_data(pointer, tag_data).unwrap(); - assert_eq!(unsafe { *tagged_pointer.to_ptr() }, "Hello world!"); + assert_eq!(unsafe { *tagged_pointer.to_ptr().as_ptr() }, "Hello world!"); assert_eq!(tagged_pointer.to_data(), 0b101); tagged_pointer.set_data(0b010); - assert_eq!(unsafe { *tagged_pointer.to_ptr() }, "Hello world!"); + assert_eq!(unsafe { *tagged_pointer.to_ptr().as_ptr() }, "Hello world!"); assert_eq!(tagged_pointer.to_data(), 0b010); // Collecting the data into `Box` to safely drop it unsafe { - drop(Box::from_raw(tagged_pointer.to_ptr())); - } - } - - #[test] - fn cast_and_back() { - let pointee = u64::MAX; - let pointer = Box::into_raw(Box::new(pointee)); - let tag_data = 0b010usize; - - let mut tagged_pointer = TaggedPointer::::new_with_data(pointer, tag_data).unwrap(); - - assert_eq!(unsafe { *tagged_pointer.to_ptr() }, u64::MAX); - assert_eq!(tagged_pointer.to_data(), 0b010); - - tagged_pointer.set_data(0b101); - - let new_tagged_pointer = tagged_pointer.cast::(); - - assert_eq!(unsafe { *new_tagged_pointer.to_ptr() }, -1); - assert_eq!(new_tagged_pointer.to_data(), 0b101); - - unsafe { - drop(Box::from_raw(tagged_pointer.to_ptr())); + drop(Box::from_raw(tagged_pointer.to_ptr().as_ptr())); } } @@ -316,14 +268,14 @@ mod tests { p.set_data(1); assert_eq!(p.to_data(), 1); - assert_eq!(unsafe { *p.to_ptr() }, 10); + assert_eq!(unsafe { *p.to_ptr().as_ptr() }, 10); p.set_data(3); assert_eq!(p.to_data(), 3); - assert_eq!(unsafe { *p.to_ptr() }, 10); + assert_eq!(unsafe { *p.to_ptr().as_ptr() }, 10); unsafe { - let _ = Box::from_raw(p.to_ptr()); + let _ = Box::from_raw(p.to_ptr().as_ptr()); }; } @@ -333,14 +285,14 @@ mod tests { let mut p = TaggedPointer::<_, 2>::new_with_data(raw_pointer, 3).unwrap(); assert_eq!(p.to_data(), 3); - assert_eq!(unsafe { *p.to_ptr() }, 30); + assert_eq!(unsafe { *p.to_ptr().as_ptr() }, 30); p.set_data(0); - assert_eq!(unsafe { *p.to_ptr() }, 30); + assert_eq!(unsafe { *p.to_ptr().as_ptr() }, 30); assert_eq!(p.to_data(), 0); unsafe { - let _ = Box::from_raw(p.to_ptr()); + let _ = Box::from_raw(p.to_ptr().as_ptr()); }; } @@ -394,47 +346,47 @@ mod tests { let mut p5 = TaggedPointer::<_, 3>::new(Box::into_raw(Box::new(5u64))).unwrap(); assert_eq!(p0.to_data(), 0); - assert_eq!(unsafe { *p0.to_ptr() }.len(), 0); + assert_eq!(unsafe { *p0.to_ptr().as_ptr() }.len(), 0); p0.set_data(0); - assert_eq!(unsafe { *p0.to_ptr() }.len(), 0); + assert_eq!(unsafe { *p0.to_ptr().as_ptr() }.len(), 0); assert_eq!(p0.to_data(), 0); assert_eq!(p1.to_data(), 0); - assert!(unsafe { !*p1.to_ptr() }); + assert!(unsafe { !*p1.to_ptr().as_ptr() }); p1.set_data(0); assert_eq!(p1.to_data(), 0); - assert!(unsafe { !*p1.to_ptr() }); + assert!(unsafe { !*p1.to_ptr().as_ptr() }); assert_eq!(p2.to_data(), 0); - assert_eq!(unsafe { *p2.to_ptr() }, 2); + assert_eq!(unsafe { *p2.to_ptr().as_ptr() }, 2); p2.set_data(0); assert_eq!(p2.to_data(), 0); - assert_eq!(unsafe { *p2.to_ptr() }, 2); + assert_eq!(unsafe { *p2.to_ptr().as_ptr() }, 2); assert_eq!(p3.to_data(), 0); - assert_eq!(unsafe { *p3.to_ptr() }, 3); + assert_eq!(unsafe { *p3.to_ptr().as_ptr() }, 3); p3.set_data(1); assert_eq!(p3.to_data(), 1); - assert_eq!(unsafe { *p3.to_ptr() }, 3); + assert_eq!(unsafe { *p3.to_ptr().as_ptr() }, 3); assert_eq!(p4.to_data(), 0); - assert_eq!(unsafe { *p4.to_ptr() }, 4); + assert_eq!(unsafe { *p4.to_ptr().as_ptr() }, 4); p4.set_data(3); assert_eq!(p4.to_data(), 3); - assert_eq!(unsafe { *p4.to_ptr() }, 4); + assert_eq!(unsafe { *p4.to_ptr().as_ptr() }, 4); assert_eq!(p5.to_data(), 0); - assert_eq!(unsafe { *p5.to_ptr() }, 5); + assert_eq!(unsafe { *p5.to_ptr().as_ptr() }, 5); p5.set_data(7); assert_eq!(p5.to_data(), 7); - assert_eq!(unsafe { *p5.to_ptr() }, 5); + assert_eq!(unsafe { *p5.to_ptr().as_ptr() }, 5); unsafe { - drop(Box::from_raw(p1.to_ptr())); - drop(Box::from_raw(p2.to_ptr())); - drop(Box::from_raw(p3.to_ptr())); - drop(Box::from_raw(p4.to_ptr())); - drop(Box::from_raw(p5.to_ptr())); + drop(Box::from_raw(p1.to_ptr().as_ptr())); + drop(Box::from_raw(p2.to_ptr().as_ptr())); + drop(Box::from_raw(p3.to_ptr().as_ptr())); + drop(Box::from_raw(p4.to_ptr().as_ptr())); + drop(Box::from_raw(p5.to_ptr().as_ptr())); } } @@ -491,24 +443,4 @@ mod tests { 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_0000_usize ); } - - #[test] - fn cast_checks_alignment() { - let mut p1 = TaggedPointer::<_, 3>::new(Box::into_raw(Box::new(1u64))).unwrap(); - let mut p2 = TaggedPointer::<_, 2>::new(Box::into_raw(Box::new(2u32))).unwrap(); - - p1.set_data(1); - p2.set_data(2); - - let p1_i64 = p1.cast::(); - assert_eq!(p1_i64.to_data(), 1); - - let p2_i32 = p2.cast::(); - assert_eq!(p2_i32.to_data(), 2); - - unsafe { - drop(Box::from_raw(p1.to_ptr())); - drop(Box::from_raw(p2.to_ptr())); - } - } }