From e5f0a2337886f74a4dddd398b1d406d0752547f6 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Mon, 29 Jun 2020 13:13:31 -0700 Subject: [PATCH] Encapsulate unsafe code in a raw module --- src/lib.rs | 4 +- src/map.rs | 6 +- src/{map_core.rs => map/core.rs} | 265 +---------------------------- src/map/core/raw.rs | 276 +++++++++++++++++++++++++++++++ 4 files changed, 287 insertions(+), 264 deletions(-) rename src/{map_core.rs => map/core.rs} (56%) create mode 100644 src/map/core/raw.rs diff --git a/src/lib.rs b/src/lib.rs index 1fa09ea8..996c8d89 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -// We *mostly* avoid unsafe code, but `mod map_core` allows it to use `RawTable`. +// We *mostly* avoid unsafe code, but `map::core::raw` allows it to use `RawTable` buckets. #![deny(unsafe_code)] #![doc(html_root_url = "https://docs.rs/indexmap/1/")] #![cfg_attr(not(has_std), no_std)] @@ -80,8 +80,6 @@ mod mutable_keys; mod serde; mod util; -mod map_core; - pub mod map; pub mod set; diff --git a/src/map.rs b/src/map.rs index b387c255..cc660ee3 100644 --- a/src/map.rs +++ b/src/map.rs @@ -1,6 +1,8 @@ //! `IndexMap` is a hash table where the iteration order of the key-value //! pairs is independent of the hash values of the keys. +mod core; + #[cfg(not(has_std))] use std::vec::Vec; @@ -21,12 +23,12 @@ use std::collections::hash_map::RandomState; use std::cmp::Ordering; use std::fmt; +use self::core::IndexMapCore; use equivalent::Equivalent; -use map_core::IndexMapCore; use util::third; use {Bucket, Entries, HashValue}; -pub use map_core::{Entry, OccupiedEntry, VacantEntry}; +pub use self::core::{Entry, OccupiedEntry, VacantEntry}; /// A hash table where the iteration order of the key-value pairs is independent /// of the hash values of the keys. diff --git a/src/map_core.rs b/src/map/core.rs similarity index 56% rename from src/map_core.rs rename to src/map/core.rs index 20c00d1e..e5b82679 100644 --- a/src/map_core.rs +++ b/src/map/core.rs @@ -1,4 +1,3 @@ -#![allow(unsafe_code)] //! This is the core implementation that doesn't depend on the hasher at all. //! //! The methods of `IndexMapCore` don't use any Hash properties of K. @@ -8,6 +7,8 @@ //! //! However, we should probably not let this show in the public API or docs. +mod raw; + #[cfg(not(has_std))] use std::vec::Vec; @@ -23,8 +24,6 @@ use equivalent::Equivalent; use util::enumerate; use {Bucket, Entries, HashValue}; -type RawBucket = hashbrown::raw::Bucket; - /// Core of the map that does not depend on S pub(crate) struct IndexMapCore { /// indices mapping from the entry hash to its index. @@ -67,16 +66,8 @@ where V: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - struct DebugIndices<'a>(&'a RawTable); - impl fmt::Debug for DebugIndices<'_> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let indices = unsafe { self.0.iter().map(|raw_bucket| raw_bucket.read()) }; - f.debug_list().entries(indices).finish() - } - } - f.debug_struct("IndexMapCore") - .field("indices", &DebugIndices(&self.indices)) + .field("indices", &raw::DebugIndices(&self.indices)) .field("entries", &self.entries) .finish() } @@ -168,8 +159,7 @@ impl IndexMapCore { pub(crate) fn pop(&mut self) -> Option<(K, V)> { if let Some(entry) = self.entries.pop() { let last = self.entries.len(); - let raw_bucket = self.find_index(entry.hash, last).unwrap(); - unsafe { self.indices.erase_no_drop(&raw_bucket) }; + self.erase_index(entry.hash, last); Some((entry.key, entry.value)) } else { None @@ -190,17 +180,6 @@ impl IndexMapCore { i } - /// Return the index in `entries` where an equivalent key can be found - pub(crate) fn get_index_of(&self, hash: HashValue, key: &Q) -> Option - where - Q: ?Sized + Equivalent, - { - match self.find_equivalent(hash, key) { - Some(raw_bucket) => Some(unsafe { raw_bucket.read() }), - None => None, - } - } - pub(crate) fn insert_full(&mut self, hash: HashValue, key: K, value: V) -> (usize, Option) where K: Eq, @@ -211,150 +190,6 @@ impl IndexMapCore { } } - pub(crate) fn entry(&mut self, hash: HashValue, key: K) -> Entry - where - K: Eq, - { - match self.find_equivalent(hash, &key) { - // Safety: The entry is created with a live raw bucket, at the same time we have a &mut - // reference to the map, so it can not be modified further. - Some(raw_bucket) => Entry::Occupied(OccupiedEntry { - map: self, - raw_bucket, - key, - }), - None => Entry::Vacant(VacantEntry { - map: self, - hash, - key, - }), - } - } - - /// Return the raw bucket with an equivalent key - fn find_equivalent(&self, hash: HashValue, key: &Q) -> Option - where - Q: ?Sized + Equivalent, - { - self.indices.find(hash.get(), { - |&i| Q::equivalent(key, &self.entries[i].key) - }) - } - - /// Return the raw bucket for the given index - fn find_index(&self, hash: HashValue, index: usize) -> Option { - self.indices.find(hash.get(), |&i| i == index) - } - - /// Remove an entry by shifting all entries that follow it - pub(crate) fn shift_remove_full(&mut self, hash: HashValue, key: &Q) -> Option<(usize, K, V)> - where - Q: ?Sized + Equivalent, - { - match self.find_equivalent(hash, key) { - Some(raw_bucket) => unsafe { Some(self.shift_remove_bucket(raw_bucket)) }, - None => None, - } - } - - /// Remove an entry by shifting all entries that follow it - pub(crate) fn shift_remove_index(&mut self, index: usize) -> Option<(K, V)> { - let raw_bucket = match self.entries.get(index) { - Some(entry) => self.find_index(entry.hash, index).unwrap(), - None => return None, - }; - unsafe { - let (_, key, value) = self.shift_remove_bucket(raw_bucket); - Some((key, value)) - } - } - - /// Remove an entry by shifting all entries that follow it - /// - /// Safety: The caller must pass a live `raw_bucket`. - #[allow(unused_unsafe)] - unsafe fn shift_remove_bucket(&mut self, raw_bucket: RawBucket) -> (usize, K, V) { - // use Vec::remove, but then we need to update the indices that point - // to all of the other entries that have to move - let index = unsafe { - self.indices.erase_no_drop(&raw_bucket); - raw_bucket.read() - }; - let entry = self.entries.remove(index); - - // correct indices that point to the entries that followed the removed entry. - // use a heuristic between a full sweep vs. a `find()` for every shifted item. - let raw_capacity = self.indices.buckets(); - let shifted_entries = &self.entries[index..]; - if shifted_entries.len() > raw_capacity / 2 { - // shift all indices greater than `index` - unsafe { - for bucket in self.indices.iter() { - let i = bucket.read(); - if i > index { - bucket.write(i - 1); - } - } - } - } else { - // find each following entry to shift its index - for (i, entry) in (index + 1..).zip(shifted_entries) { - let shifted_bucket = self.find_index(entry.hash, i).unwrap(); - unsafe { shifted_bucket.write(i - 1) }; - } - } - - (index, entry.key, entry.value) - } - - /// Remove an entry by swapping it with the last - pub(crate) fn swap_remove_full(&mut self, hash: HashValue, key: &Q) -> Option<(usize, K, V)> - where - Q: ?Sized + Equivalent, - { - match self.find_equivalent(hash, key) { - Some(raw_bucket) => unsafe { Some(self.swap_remove_bucket(raw_bucket)) }, - None => None, - } - } - - /// Remove an entry by swapping it with the last - pub(crate) fn swap_remove_index(&mut self, index: usize) -> Option<(K, V)> { - let raw_bucket = match self.entries.get(index) { - Some(entry) => self.find_index(entry.hash, index).unwrap(), - None => return None, - }; - unsafe { - let (_, key, value) = self.swap_remove_bucket(raw_bucket); - Some((key, value)) - } - } - - /// Remove an entry by swapping it with the last - /// - /// Safety: The caller must pass a live `raw_bucket`. - #[allow(unused_unsafe)] - unsafe fn swap_remove_bucket(&mut self, raw_bucket: RawBucket) -> (usize, K, V) { - // use swap_remove, but then we need to update the index that points - // to the other entry that has to move - let index = unsafe { - self.indices.erase_no_drop(&raw_bucket); - raw_bucket.read() - }; - let entry = self.entries.swap_remove(index); - - // correct index that points to the entry that had to swap places - if let Some(entry) = self.entries.get(index) { - // was not last element - // examine new element in `index` and find it in indices - let last = self.entries.len(); - let swapped_bucket = self.find_index(entry.hash, last).unwrap(); - unsafe { swapped_bucket.write(index) }; - } - - (index, entry.key, entry.value) - } - pub(crate) fn retain_in_order(&mut self, mut keep: F) where F: FnMut(&mut K, &mut V) -> bool, @@ -381,20 +216,6 @@ impl IndexMapCore { } } - pub(crate) fn reverse(&mut self) { - self.entries.reverse(); - - // No need to save hash indices, can easily calculate what they should - // be, given that this is an in-place reversal. - let len = self.entries.len(); - unsafe { - for raw_bucket in self.indices.iter() { - let i = raw_bucket.read(); - raw_bucket.write(len - i - 1); - } - } - } - fn rebuild_hash_table(&mut self) { self.indices.clear_no_drop(); debug_assert!(self.indices.capacity() >= self.entries.len()); @@ -487,52 +308,10 @@ impl<'a, K: 'a + fmt::Debug, V: 'a + fmt::Debug> fmt::Debug for Entry<'a, K, V> } } -/// A view into an occupied entry in a `IndexMap`. -/// It is part of the [`Entry`] enum. -/// -/// [`Entry`]: enum.Entry.html -pub struct OccupiedEntry<'a, K: 'a, V: 'a> { - map: &'a mut IndexMapCore, - raw_bucket: RawBucket, - key: K, -} - -// `hashbrown::raw::Bucket` is only `Send`, not `Sync`. -// SAFETY: `&self` only accesses the bucket to read it. -unsafe impl Sync for OccupiedEntry<'_, K, V> {} +pub use self::raw::OccupiedEntry; +// Extra methods that don't threaten the unsafe encapsulation. impl<'a, K, V> OccupiedEntry<'a, K, V> { - pub fn key(&self) -> &K { - &self.key - } - - pub fn get(&self) -> &V { - &self.map.entries[self.index()].value - } - - pub fn get_mut(&mut self) -> &mut V { - let index = self.index(); - &mut self.map.entries[index].value - } - - /// Put the new key in the occupied entry's key slot - pub(crate) fn replace_key(self) -> K { - let index = self.index(); - let old_key = &mut self.map.entries[index].key; - replace(old_key, self.key) - } - - /// Return the index of the key-value pair - #[inline] - pub fn index(&self) -> usize { - unsafe { self.raw_bucket.read() } - } - - pub fn into_mut(self) -> &'a mut V { - let index = self.index(); - &mut self.map.entries[index].value - } - /// Sets the value of the entry to `value`, and returns the entry's old value. pub fn insert(&mut self, value: V) -> V { replace(self.get_mut(), value) @@ -573,38 +352,6 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { pub fn remove_entry(self) -> (K, V) { self.swap_remove_entry() } - - /// Remove and return the key, value pair stored in the map for this entry - /// - /// Like `Vec::swap_remove`, the pair is removed by swapping it with the - /// last element of the map and popping it off. **This perturbs - /// the postion of what used to be the last element!** - /// - /// Computes in **O(1)** time (average). - pub fn swap_remove_entry(self) -> (K, V) { - // This is safe because it can only happen once (self is consumed) - // and map.indices have not been modified since entry construction - unsafe { - let (_, key, value) = self.map.swap_remove_bucket(self.raw_bucket); - (key, value) - } - } - - /// Remove and return the key, value pair stored in the map for this entry - /// - /// Like `Vec::remove`, the pair is removed by shifting all of the - /// elements that follow it, preserving their relative order. - /// **This perturbs the index of all of those elements!** - /// - /// Computes in **O(n)** time (average). - pub fn shift_remove_entry(self) -> (K, V) { - // This is safe because it can only happen once (self is consumed) - // and map.indices have not been modified since entry construction - unsafe { - let (_, key, value) = self.map.shift_remove_bucket(self.raw_bucket); - (key, value) - } - } } impl<'a, K: 'a + fmt::Debug, V: 'a + fmt::Debug> fmt::Debug for OccupiedEntry<'a, K, V> { diff --git a/src/map/core/raw.rs b/src/map/core/raw.rs new file mode 100644 index 00000000..8d7ba82a --- /dev/null +++ b/src/map/core/raw.rs @@ -0,0 +1,276 @@ +#![allow(unsafe_code)] +//! This module encapsulates the `unsafe` access to `hashbrown::raw::RawTable`, +//! mostly in dealing with its bucket "pointers". + +use super::{Entry, Equivalent, HashValue, IndexMapCore, VacantEntry}; +use hashbrown::raw::RawTable; +use std::fmt; +use std::mem::replace; + +type RawBucket = hashbrown::raw::Bucket; + +pub(super) struct DebugIndices<'a>(pub &'a RawTable); +impl fmt::Debug for DebugIndices<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let indices = unsafe { self.0.iter().map(|raw_bucket| raw_bucket.read()) }; + f.debug_list().entries(indices).finish() + } +} + +impl IndexMapCore { + /// Return the raw bucket with an equivalent key + fn find_equivalent(&self, hash: HashValue, key: &Q) -> Option + where + Q: ?Sized + Equivalent, + { + self.indices.find(hash.get(), { + move |&i| Q::equivalent(key, &self.entries[i].key) + }) + } + + /// Return the raw bucket for the given index + fn find_index(&self, hash: HashValue, index: usize) -> Option { + self.indices.find(hash.get(), move |&i| i == index) + } + + /// Return the index in `entries` where an equivalent key can be found + pub(crate) fn get_index_of(&self, hash: HashValue, key: &Q) -> Option + where + Q: ?Sized + Equivalent, + { + match self.find_equivalent(hash, key) { + Some(raw_bucket) => Some(unsafe { raw_bucket.read() }), + None => None, + } + } + + pub(super) fn erase_index(&mut self, hash: HashValue, index: usize) { + let raw_bucket = self.find_index(hash, index).unwrap(); + unsafe { self.indices.erase_no_drop(&raw_bucket) }; + } + + pub(crate) fn entry(&mut self, hash: HashValue, key: K) -> Entry + where + K: Eq, + { + match self.find_equivalent(hash, &key) { + // Safety: The entry is created with a live raw bucket, at the same time we have a &mut + // reference to the map, so it can not be modified further. + Some(raw_bucket) => Entry::Occupied(OccupiedEntry { + map: self, + raw_bucket, + key, + }), + None => Entry::Vacant(VacantEntry { + map: self, + hash, + key, + }), + } + } + + /// Remove an entry by shifting all entries that follow it + pub(crate) fn shift_remove_full(&mut self, hash: HashValue, key: &Q) -> Option<(usize, K, V)> + where + Q: ?Sized + Equivalent, + { + match self.find_equivalent(hash, key) { + Some(raw_bucket) => unsafe { Some(self.shift_remove_bucket(raw_bucket)) }, + None => None, + } + } + + /// Remove an entry by shifting all entries that follow it + pub(crate) fn shift_remove_index(&mut self, index: usize) -> Option<(K, V)> { + let raw_bucket = match self.entries.get(index) { + Some(entry) => self.find_index(entry.hash, index).unwrap(), + None => return None, + }; + unsafe { + let (_, key, value) = self.shift_remove_bucket(raw_bucket); + Some((key, value)) + } + } + + /// Remove an entry by shifting all entries that follow it + /// + /// Safety: The caller must pass a live `raw_bucket`. + #[allow(unused_unsafe)] + unsafe fn shift_remove_bucket(&mut self, raw_bucket: RawBucket) -> (usize, K, V) { + // use Vec::remove, but then we need to update the indices that point + // to all of the other entries that have to move + let index = unsafe { + self.indices.erase_no_drop(&raw_bucket); + raw_bucket.read() + }; + let entry = self.entries.remove(index); + + // correct indices that point to the entries that followed the removed entry. + // use a heuristic between a full sweep vs. a `find()` for every shifted item. + let raw_capacity = self.indices.buckets(); + let shifted_entries = &self.entries[index..]; + if shifted_entries.len() > raw_capacity / 2 { + // shift all indices greater than `index` + unsafe { + for bucket in self.indices.iter() { + let i = bucket.read(); + if i > index { + bucket.write(i - 1); + } + } + } + } else { + // find each following entry to shift its index + for (i, entry) in (index + 1..).zip(shifted_entries) { + let shifted_bucket = self.find_index(entry.hash, i).unwrap(); + unsafe { shifted_bucket.write(i - 1) }; + } + } + + (index, entry.key, entry.value) + } + + /// Remove an entry by swapping it with the last + pub(crate) fn swap_remove_full(&mut self, hash: HashValue, key: &Q) -> Option<(usize, K, V)> + where + Q: ?Sized + Equivalent, + { + match self.find_equivalent(hash, key) { + Some(raw_bucket) => unsafe { Some(self.swap_remove_bucket(raw_bucket)) }, + None => None, + } + } + + /// Remove an entry by swapping it with the last + pub(crate) fn swap_remove_index(&mut self, index: usize) -> Option<(K, V)> { + let raw_bucket = match self.entries.get(index) { + Some(entry) => self.find_index(entry.hash, index).unwrap(), + None => return None, + }; + unsafe { + let (_, key, value) = self.swap_remove_bucket(raw_bucket); + Some((key, value)) + } + } + + /// Remove an entry by swapping it with the last + /// + /// Safety: The caller must pass a live `raw_bucket`. + #[allow(unused_unsafe)] + unsafe fn swap_remove_bucket(&mut self, raw_bucket: RawBucket) -> (usize, K, V) { + // use swap_remove, but then we need to update the index that points + // to the other entry that has to move + let index = unsafe { + self.indices.erase_no_drop(&raw_bucket); + raw_bucket.read() + }; + let entry = self.entries.swap_remove(index); + + // correct index that points to the entry that had to swap places + if let Some(entry) = self.entries.get(index) { + // was not last element + // examine new element in `index` and find it in indices + let last = self.entries.len(); + let swapped_bucket = self.find_index(entry.hash, last).unwrap(); + unsafe { swapped_bucket.write(index) }; + } + + (index, entry.key, entry.value) + } + + pub(crate) fn reverse(&mut self) { + self.entries.reverse(); + + // No need to save hash indices, can easily calculate what they should + // be, given that this is an in-place reversal. + let len = self.entries.len(); + unsafe { + for raw_bucket in self.indices.iter() { + let i = raw_bucket.read(); + raw_bucket.write(len - i - 1); + } + } + } +} + +/// A view into an occupied entry in a `IndexMap`. +/// It is part of the [`Entry`] enum. +/// +/// [`Entry`]: enum.Entry.html +// SAFETY: The lifetime of the map reference also constrains the raw bucket, +// which is essentially a raw pointer into the map indices. +pub struct OccupiedEntry<'a, K: 'a, V: 'a> { + map: &'a mut IndexMapCore, + raw_bucket: RawBucket, + key: K, +} + +// `hashbrown::raw::Bucket` is only `Send`, not `Sync`. +// SAFETY: `&self` only accesses the bucket to read it. +unsafe impl Sync for OccupiedEntry<'_, K, V> {} + +// The parent module also adds methods that don't threaten the unsafe encapsulation. +impl<'a, K, V> OccupiedEntry<'a, K, V> { + pub fn key(&self) -> &K { + &self.key + } + + pub fn get(&self) -> &V { + &self.map.entries[self.index()].value + } + + pub fn get_mut(&mut self) -> &mut V { + let index = self.index(); + &mut self.map.entries[index].value + } + + /// Put the new key in the occupied entry's key slot + pub(crate) fn replace_key(self) -> K { + let index = self.index(); + let old_key = &mut self.map.entries[index].key; + replace(old_key, self.key) + } + + /// Return the index of the key-value pair + #[inline] + pub fn index(&self) -> usize { + unsafe { self.raw_bucket.read() } + } + + pub fn into_mut(self) -> &'a mut V { + let index = self.index(); + &mut self.map.entries[index].value + } + + /// Remove and return the key, value pair stored in the map for this entry + /// + /// Like `Vec::swap_remove`, the pair is removed by swapping it with the + /// last element of the map and popping it off. **This perturbs + /// the postion of what used to be the last element!** + /// + /// Computes in **O(1)** time (average). + pub fn swap_remove_entry(self) -> (K, V) { + // This is safe because it can only happen once (self is consumed) + // and map.indices have not been modified since entry construction + unsafe { + let (_, key, value) = self.map.swap_remove_bucket(self.raw_bucket); + (key, value) + } + } + + /// Remove and return the key, value pair stored in the map for this entry + /// + /// Like `Vec::remove`, the pair is removed by shifting all of the + /// elements that follow it, preserving their relative order. + /// **This perturbs the index of all of those elements!** + /// + /// Computes in **O(n)** time (average). + pub fn shift_remove_entry(self) -> (K, V) { + // This is safe because it can only happen once (self is consumed) + // and map.indices have not been modified since entry construction + unsafe { + let (_, key, value) = self.map.shift_remove_bucket(self.raw_bucket); + (key, value) + } + } +}