diff --git a/src/bfuse16.rs b/src/bfuse16.rs index 81b2ea8..17ba865 100644 --- a/src/bfuse16.rs +++ b/src/bfuse16.rs @@ -1,6 +1,10 @@ //! Implements BinaryFuse16 filters. -use crate::{bfuse_contains_impl, bfuse_from_impl, Filter}; +use crate::{ + bfuse_contains_impl, bfuse_from_impl, + prelude::bfuse::{parse_bfuse_descriptor, serialize_bfuse_descriptor, Descriptor}, + DmaSerializable, Filter, FilterRef, +}; use alloc::{boxed::Box, vec::Vec}; use core::convert::TryFrom; @@ -60,10 +64,8 @@ use bincode::{Decode, Encode}; #[cfg_attr(feature = "bincode", derive(Encode, Decode))] #[derive(Debug, Clone)] pub struct BinaryFuse16 { - seed: u64, - segment_length: u32, - segment_length_mask: u32, - segment_count_length: u32, + #[cfg_attr(feature = "serde", serde(flatten))] + descriptor: Descriptor, /// The fingerprints for the filter pub fingerprints: Box<[u16]>, } @@ -120,9 +122,73 @@ impl TryFrom> for BinaryFuse16 { } } +impl DmaSerializable for BinaryFuse16 { + const DESCRIPTOR_LEN: usize = Descriptor::DMA_LEN; + + fn dma_copy_descriptor_to(&self, out: &mut [u8]) { + serialize_bfuse_descriptor(&self.descriptor, out) + } + + fn dma_fingerprints(&self) -> &[u8] { + let fingerprints = self.fingerprints.as_ref(); + #[allow(clippy::manual_slice_size_calculation)] + let len = fingerprints.len() * core::mem::size_of::(); + unsafe { core::slice::from_raw_parts(fingerprints.as_ptr() as *const u8, len) } + } +} + +/// Like [`BinaryFuse16`] except that it can be constructed 0-copy from external buffers. +#[derive(Debug)] +pub struct BinaryFuse16Ref<'a> { + descriptor: Descriptor, + fingerprints: &'a [u16], +} + +impl<'a> Filter for BinaryFuse16Ref<'a> { + /// Returns `true` if the filter contains the specified key. + /// Has a false positive rate of <0.4%. + /// Has no false negatives. + fn contains(&self, key: &u64) -> bool { + bfuse_contains_impl!(*key, self, fingerprint u16) + } + + fn len(&self) -> usize { + self.fingerprints.len() + } +} + +impl<'a> FilterRef<'a, u64> for BinaryFuse16Ref<'a> { + const FINGERPRINT_ALIGNMENT: usize = 2; + + fn from_dma(descriptor: &[u8], fingerprints: &'a [u8]) -> Self { + assert_eq!( + fingerprints + .as_ptr() + .align_offset(core::mem::align_of::()), + 0, + "Invalid fingerprint pointer provided - must be u16 aligned" + ); + assert_eq!( + fingerprints.len() % core::mem::align_of::(), + 0, + "Invalid fingerprint buffer provided - length must be a multiple of u16" + ); + + // #[allow(clippy::manual_slice_size_calculation)] + let len = fingerprints.len() / core::mem::size_of::(); + let fingerprints = + unsafe { core::slice::from_raw_parts(fingerprints.as_ptr() as *const u16, len) }; + + Self { + descriptor: parse_bfuse_descriptor(descriptor), + fingerprints, + } + } +} + #[cfg(test)] mod test { - use crate::{BinaryFuse16, Filter}; + use crate::{bfuse16::BinaryFuse16Ref, BinaryFuse16, DmaSerializable, Filter, FilterRef}; use core::convert::TryFrom; use alloc::vec::Vec; @@ -177,4 +243,63 @@ mod test { fn test_debug_assert_duplicates() { let _ = BinaryFuse16::try_from(vec![1, 2, 1]); } + + #[test] + fn test_dma_roundtrip() { + const SAMPLE_SIZE: usize = 1_000_000; + let mut rng = rand::thread_rng(); + let keys: Vec = (0..SAMPLE_SIZE).map(|_| rng.gen()).collect(); + + let filter = BinaryFuse16::try_from(&keys).unwrap(); + + // Unaligned descriptor is fine. + let mut descriptor = [0; BinaryFuse16::DESCRIPTOR_LEN + 1]; + filter.dma_copy_descriptor_to(&mut descriptor[1..]); + + let filter_ref = BinaryFuse16Ref::from_dma(&descriptor[1..], filter.dma_fingerprints()); + assert_eq!(filter_ref.descriptor, filter.descriptor); + + for key in &keys { + assert!(filter_ref.contains(key)); + } + } + + #[test] + #[should_panic(expected = "Invalid fingerprint pointer provided - must be u16 aligned")] + fn test_dma_unaligned_fingerprints() { + const SAMPLE_SIZE: usize = 1_000_000; + let mut rng = rand::thread_rng(); + let keys: Vec = (0..SAMPLE_SIZE).map(|_| rng.gen()).collect(); + + let filter = BinaryFuse16::try_from(&keys).unwrap(); + + let mut descriptor = [0; BinaryFuse16::DESCRIPTOR_LEN + 1]; + filter.dma_copy_descriptor_to(&mut descriptor[1..]); + + let mut as_vec = vec![1]; + as_vec.extend_from_slice(filter.dma_fingerprints()); + + let filter_ref = BinaryFuse16Ref::from_dma(&descriptor[1..], &as_vec[1..]); + assert_eq!(filter_ref.descriptor, filter.descriptor); + } + + #[test] + #[should_panic( + expected = "Invalid fingerprint buffer provided - length must be a multiple of u16" + )] + fn test_dma_unaligned_fingerprints_len() { + const SAMPLE_SIZE: usize = 1_000_000; + let mut rng = rand::thread_rng(); + let keys: Vec = (0..SAMPLE_SIZE).map(|_| rng.gen()).collect(); + + let filter = BinaryFuse16::try_from(&keys).unwrap(); + + let mut descriptor = [0; BinaryFuse16::DESCRIPTOR_LEN + 1]; + filter.dma_copy_descriptor_to(&mut descriptor[1..]); + + let serialized = filter.dma_fingerprints(); + let serialized = &serialized[..serialized.len() - 1]; + + BinaryFuse16Ref::from_dma(&descriptor[1..], serialized); + } } diff --git a/src/bfuse32.rs b/src/bfuse32.rs index a4a5ae2..ecdca3e 100644 --- a/src/bfuse32.rs +++ b/src/bfuse32.rs @@ -1,6 +1,10 @@ //! Implements BinaryFuse16 filters. -use crate::{bfuse_contains_impl, bfuse_from_impl, Filter}; +use crate::{ + bfuse_contains_impl, bfuse_from_impl, + prelude::bfuse::{parse_bfuse_descriptor, serialize_bfuse_descriptor, Descriptor}, + DmaSerializable, Filter, FilterRef, +}; use alloc::{boxed::Box, vec::Vec}; use core::convert::TryFrom; @@ -61,10 +65,8 @@ use bincode::{Decode, Encode}; #[cfg_attr(feature = "bincode", derive(Encode, Decode))] #[derive(Debug, Clone)] pub struct BinaryFuse32 { - seed: u64, - segment_length: u32, - segment_length_mask: u32, - segment_count_length: u32, + #[cfg_attr(feature = "serde", serde(flatten))] + descriptor: Descriptor, /// The fingerprints for the filter pub fingerprints: Box<[u32]>, } @@ -121,9 +123,73 @@ impl TryFrom> for BinaryFuse32 { } } +impl DmaSerializable for BinaryFuse32 { + const DESCRIPTOR_LEN: usize = Descriptor::DMA_LEN; + + fn dma_copy_descriptor_to(&self, out: &mut [u8]) { + serialize_bfuse_descriptor(&self.descriptor, out) + } + + fn dma_fingerprints(&self) -> &[u8] { + let fingerprints = self.fingerprints.as_ref(); + #[allow(clippy::manual_slice_size_calculation)] + let len = fingerprints.len() * core::mem::size_of::(); + unsafe { core::slice::from_raw_parts(fingerprints.as_ptr() as *const u8, len) } + } +} + +/// Like [`BinaryFuse32`] except that it can be constructed 0-copy from external buffers. +#[derive(Debug)] +pub struct BinaryFuse32Ref<'a> { + descriptor: Descriptor, + fingerprints: &'a [u32], +} + +impl<'a> Filter for BinaryFuse32Ref<'a> { + /// Returns `true` if the filter contains the specified key. + /// Has a false positive rate of <0.4%. + /// Has no false negatives. + fn contains(&self, key: &u64) -> bool { + bfuse_contains_impl!(*key, self, fingerprint u32) + } + + fn len(&self) -> usize { + self.fingerprints.len() + } +} + +impl<'a> FilterRef<'a, u64> for BinaryFuse32Ref<'a> { + const FINGERPRINT_ALIGNMENT: usize = 4; + + fn from_dma(descriptor: &[u8], fingerprints: &'a [u8]) -> Self { + assert_eq!( + fingerprints + .as_ptr() + .align_offset(core::mem::align_of::()), + 0, + "Invalid fingerprint pointer provided - must be u32 aligned" + ); + assert_eq!( + fingerprints.len() % core::mem::align_of::(), + 0, + "Invalid fingerprint buffer provided - length must be a multiple of u32" + ); + + // #[allow(clippy::manual_slice_size_calculation)] + let len = fingerprints.len() / core::mem::size_of::(); + let fingerprints = + unsafe { core::slice::from_raw_parts(fingerprints.as_ptr() as *const u32, len) }; + + Self { + descriptor: parse_bfuse_descriptor(descriptor), + fingerprints, + } + } +} + #[cfg(test)] mod test { - use crate::{BinaryFuse32, Filter}; + use crate::{bfuse32::BinaryFuse32Ref, BinaryFuse32, DmaSerializable, Filter, FilterRef}; use core::convert::TryFrom; use alloc::vec::Vec; @@ -182,4 +248,58 @@ mod test { fn test_debug_assert_duplicates() { let _ = BinaryFuse32::try_from(vec![1, 2, 1]); } + + #[test] + fn test_dma_roundtrip() { + const SAMPLE_SIZE: usize = 1_000_000; + let mut rng = rand::thread_rng(); + let keys: Vec = (0..SAMPLE_SIZE).map(|_| rng.gen()).collect(); + + let filter = BinaryFuse32::try_from(&keys).unwrap(); + + // Unaligned descriptor is fine. + let mut descriptor = [0; BinaryFuse32::DESCRIPTOR_LEN + 1]; + filter.dma_copy_descriptor_to(&mut descriptor[1..]); + + let filter_ref = BinaryFuse32Ref::from_dma(&descriptor[1..], filter.dma_fingerprints()); + assert_eq!(filter_ref.descriptor, filter.descriptor); + } + + #[test] + #[should_panic(expected = "Invalid fingerprint pointer provided - must be u32 aligned")] + fn test_dma_unaligned_fingerprints() { + const SAMPLE_SIZE: usize = 1_000_000; + let mut rng = rand::thread_rng(); + let keys: Vec = (0..SAMPLE_SIZE).map(|_| rng.gen()).collect(); + + let filter = BinaryFuse32::try_from(&keys).unwrap(); + + let mut descriptor = [0; BinaryFuse32::DESCRIPTOR_LEN + 1]; + filter.dma_copy_descriptor_to(&mut descriptor[1..]); + + let mut as_vec = vec![1]; + as_vec.extend_from_slice(filter.dma_fingerprints()); + + BinaryFuse32Ref::from_dma(&descriptor[1..], &as_vec[1..]); + } + + #[test] + #[should_panic( + expected = "Invalid fingerprint buffer provided - length must be a multiple of u32" + )] + fn test_dma_unaligned_fingerprints_len() { + const SAMPLE_SIZE: usize = 1_000_000; + let mut rng = rand::thread_rng(); + let keys: Vec = (0..SAMPLE_SIZE).map(|_| rng.gen()).collect(); + + let filter = BinaryFuse32::try_from(&keys).unwrap(); + + let mut descriptor = [0; BinaryFuse32::DESCRIPTOR_LEN + 1]; + filter.dma_copy_descriptor_to(&mut descriptor[1..]); + + let serialized = filter.dma_fingerprints(); + let serialized = &serialized[..serialized.len() - 1]; + + BinaryFuse32Ref::from_dma(&descriptor[1..], serialized); + } } diff --git a/src/bfuse8.rs b/src/bfuse8.rs index c031bab..c0c0d71 100644 --- a/src/bfuse8.rs +++ b/src/bfuse8.rs @@ -1,6 +1,10 @@ //! Implements BinaryFuse8 filters. -use crate::{bfuse_contains_impl, bfuse_from_impl, Filter}; +use crate::{ + bfuse_contains_impl, bfuse_from_impl, + prelude::bfuse::{parse_bfuse_descriptor, serialize_bfuse_descriptor, Descriptor}, + DmaSerializable, Filter, FilterRef, +}; use alloc::{boxed::Box, vec::Vec}; use core::convert::TryFrom; @@ -61,10 +65,8 @@ use bincode::{Decode, Encode}; #[cfg_attr(feature = "bincode", derive(Encode, Decode))] #[derive(Debug, Clone)] pub struct BinaryFuse8 { - seed: u64, - segment_length: u32, - segment_length_mask: u32, - segment_count_length: u32, + #[cfg_attr(feature = "serde", serde(flatten))] + descriptor: Descriptor, /// The fingerprints for the filter pub fingerprints: Box<[u8]>, } @@ -121,9 +123,52 @@ impl TryFrom> for BinaryFuse8 { } } +impl DmaSerializable for BinaryFuse8 { + const DESCRIPTOR_LEN: usize = Descriptor::DMA_LEN; + + fn dma_copy_descriptor_to(&self, out: &mut [u8]) { + serialize_bfuse_descriptor(&self.descriptor, out) + } + + fn dma_fingerprints(&self) -> &[u8] { + self.fingerprints.as_ref() + } +} + +/// Like [`BinaryFuse8`] except that it can be constructed 0-copy from external buffers. +#[derive(Debug)] +pub struct BinaryFuse8Ref<'a> { + descriptor: Descriptor, + fingerprints: &'a [u8], +} + +impl<'a> Filter for BinaryFuse8Ref<'a> { + /// Returns `true` if the filter contains the specified key. + /// Has a false positive rate of <0.4%. + /// Has no false negatives. + fn contains(&self, key: &u64) -> bool { + bfuse_contains_impl!(*key, self, fingerprint u8) + } + + fn len(&self) -> usize { + self.fingerprints.len() + } +} + +impl<'a> FilterRef<'a, u64> for BinaryFuse8Ref<'a> { + const FINGERPRINT_ALIGNMENT: usize = 1; + + fn from_dma(descriptor: &[u8], fingerprints: &'a [u8]) -> Self { + Self { + descriptor: parse_bfuse_descriptor(descriptor), + fingerprints, + } + } +} + #[cfg(test)] mod test { - use crate::{BinaryFuse8, Filter}; + use crate::{bfuse8::BinaryFuse8Ref, BinaryFuse8, DmaSerializable, Filter, FilterRef}; use core::convert::TryFrom; use alloc::vec::Vec; @@ -185,4 +230,20 @@ mod test { let filter = BinaryFuse8::try_from(vec![key]).unwrap(); assert!(filter.contains(&key)); } + + #[test] + fn test_dma_roundtrip() { + const SAMPLE_SIZE: usize = 1_000_000; + let mut rng = rand::thread_rng(); + let keys: Vec = (0..SAMPLE_SIZE).map(|_| rng.gen()).collect(); + + let filter = BinaryFuse8::try_from(&keys).unwrap(); + + // Unaligned descriptor is fine. + let mut descriptor = [0; BinaryFuse8::DESCRIPTOR_LEN + 1]; + filter.dma_copy_descriptor_to(&mut descriptor[1..]); + + let filter_ref = BinaryFuse8Ref::from_dma(&descriptor[1..], filter.dma_fingerprints()); + assert_eq!(filter_ref.descriptor, filter.descriptor); + } } diff --git a/src/lib.rs b/src/lib.rs index ad8c714..baca3fd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -94,11 +94,11 @@ mod xor32; mod xor8; #[cfg(feature = "binary-fuse")] -pub use bfuse16::BinaryFuse16; +pub use bfuse16::{BinaryFuse16, BinaryFuse16Ref}; #[cfg(feature = "binary-fuse")] -pub use bfuse32::BinaryFuse32; +pub use bfuse32::{BinaryFuse32, BinaryFuse32Ref}; #[cfg(feature = "binary-fuse")] -pub use bfuse8::BinaryFuse8; +pub use bfuse8::{BinaryFuse8, BinaryFuse8Ref}; #[allow(deprecated)] pub use fuse16::Fuse16; #[allow(deprecated)] @@ -121,3 +121,31 @@ pub trait Filter { /// Returns the number of fingerprints in the filter. fn len(&self) -> usize; } + +/// Equivalent to Filter except represents a reference to fingerprints stored elsewhere. +pub trait FilterRef<'a, Type>: Filter { + /// The alignment required of the fingerprints slice. + const FINGERPRINT_ALIGNMENT: usize; + + /// Create a filter from memory slices. These slices can be mmap from a file. The descriptor + /// is eagerly destructured while the fingerprints reference is retained. If the fingerprints + /// slice provided doesn't have an alignment of `FINGERPRINT_ALIGNMENT`, this function will + /// panic. + fn from_dma(descriptor: &[u8], fingerprints: &'a [u8]) -> Self; +} + +/// DMA serializable filters are ones who can be essentially directly accessed into/out of DMA buffers. +/// This isn't a true 0-copy implementation and instead we make the following simplification. +/// A DMA serializable filter has two components - the "fixed" descriptor and the variable length fingerprints. +/// The fixed descriptor is small (a few words at most) and is copied into / out of the serialized form. +/// The variable length fingerprints however are referenced directly. +pub trait DmaSerializable { + /// The serialized length of the descriptor. Very small and safe to allocate on-stack if needed. + const DESCRIPTOR_LEN: usize; + + /// Copies the small fixed-length descriptor part of the filter to an output buffer. + fn dma_copy_descriptor_to(&self, out: &mut [u8]); + + /// Obtains the raw byte slice of the fingerprints to serialize to disk. + fn dma_fingerprints(&self) -> &[u8]; +} diff --git a/src/prelude/bfuse.rs b/src/prelude/bfuse.rs index 24fe40b..fc1e901 100644 --- a/src/prelude/bfuse.rs +++ b/src/prelude/bfuse.rs @@ -1,8 +1,16 @@ //! Implements Binary Fuse filters. // Port of https://github.com/FastFilter/xorfilter/blob/master/binaryfusefilter.go +use core::convert::TryInto; + use libm::{floor, fmax, log}; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +#[cfg(feature = "bincode")] +use bincode::{Decode, Encode}; + #[inline] pub fn segment_length(arity: u32, size: u32) -> u32 { if size == 0 { @@ -53,6 +61,38 @@ pub const fn mod3(x: u8) -> u8 { } } +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "bincode", derive(Encode, Decode))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Descriptor { + pub seed: u64, + pub segment_length: u32, + pub segment_length_mask: u32, + pub segment_count_length: u32, +} + +impl Descriptor { + pub const DMA_LEN: usize = u64::BITS as usize / 8 + (u32::BITS as usize / 8) * 3; +} + +#[inline] +pub fn parse_bfuse_descriptor(descriptor: &[u8]) -> Descriptor { + Descriptor { + seed: u64::from_le_bytes(descriptor[0..8].try_into().unwrap()), + segment_length: u32::from_le_bytes(descriptor[8..12].try_into().unwrap()), + segment_length_mask: u32::from_le_bytes(descriptor[12..16].try_into().unwrap()), + segment_count_length: u32::from_le_bytes(descriptor[16..20].try_into().unwrap()), + } +} + +#[inline] +pub fn serialize_bfuse_descriptor(descriptor: &Descriptor, out: &mut [u8]) { + out[0..8].copy_from_slice(&descriptor.seed.to_le_bytes()); + out[8..12].copy_from_slice(&descriptor.segment_length.to_le_bytes()); + out[12..16].copy_from_slice(&descriptor.segment_length_mask.to_le_bytes()); + out[16..20].copy_from_slice(&descriptor.segment_count_length.to_le_bytes()); +} + /// Implements `try_from(&[u64])` for an binary fuse filter of fingerprint type `$fpty`. #[doc(hidden)] #[macro_export] @@ -265,10 +305,10 @@ macro_rules! bfuse_from_impl( } Ok(Self { - seed, + descriptor: Descriptor{seed, segment_length, segment_length_mask, - segment_count_length, + segment_count_length,}, fingerprints, }) } @@ -288,9 +328,9 @@ macro_rules! bfuse_contains_impl( bfuse::hash_of_hash }, }; - let hash = mix($key, $self.seed); + let hash = mix($key, $self.descriptor.seed); let mut f = fingerprint!(hash) as $fpty; - let (h0, h1, h2) = hash_of_hash(hash, $self.segment_length, $self.segment_length_mask, $self.segment_count_length); + let (h0, h1, h2) = hash_of_hash(hash, $self.descriptor.segment_length, $self.descriptor.segment_length_mask, $self.descriptor.segment_count_length); f ^= $self.fingerprints[h0 as usize] ^ $self.fingerprints[h1 as usize] ^ $self.fingerprints[h2 as usize];