diff --git a/README.md b/README.md index 1c8565c..13de90c 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ A mortifying serialization library for Rust Abomonation (spelling intentional) is a serialization library for Rust based on the very simple idea that if someone presents data for serialization it will copy those exact bits, and then follow any pointers and copy those bits, and so on. When deserializing it recovers the exact bits, and then corrects pointers to aim at the serialized forms of the chased data. -**Warning**: Abomonation should not be used on any data you care strongly about, or from any computer you value the data on. The `encode` and `decode` methods do things that may be undefined behavior, and you shouldn't stand for that. Specifically, `encode` exposes padding bytes to `memcpy`, and `decode` doesn't much respect alignment. +**Warning**: Abomonation should not be used on any data you care strongly about, or from any computer you value the data on. The `encode` and `decode` methods do things that may be undefined behavior, and you shouldn't stand for that. Specifically, `encode` exposes padding bytes to `memcpy`, and `decode` doesn't much respect alignment and may need to construct Rust references to invalid data. Please consult the [abomonation documentation](https://frankmcsherry.github.com/abomonation) for more specific information. @@ -49,7 +49,7 @@ Be warned that these numbers are not *goodput*, but rather the total number of b ## unsafe_abomonate! -Abomonation comes with the `unsafe_abomonate!` macro implementing `Abomonation` for structs which are essentially equivalent to a tuple of other `Abomonable` types. To use the macro, you must put the `#[macro_use]` modifier before `extern crate abomonation;`. +Abomonation comes with the `unsafe_abomonate!` macro implementing `Abomonation` for structs which are essentially equivalent to a tuple of other `Abomonation` types. To use the macro, you must put the `#[macro_use]` modifier before `extern crate abomonation;`. Please note that `unsafe_abomonate!` synthesizes unsafe implementations of `Abomonation`, and it is should be considered unsafe to invoke. @@ -82,4 +82,4 @@ if let Some((result, remaining)) = unsafe { decode::(&mut bytes) } { } ``` -Be warned that implementing `Abomonable` for types can be a giant disaster and is entirely discouraged. +Be warned that implementing `Abomonation` for types can be a giant disaster and is entirely discouraged. diff --git a/src/lib.rs b/src/lib.rs index 2e617dc..e5750eb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,6 +40,7 @@ use std::io::Write; // for bytes.write_all; push_all is unstable and extend is s use std::io::Result as IOResult; use std::marker::PhantomData; use std::num::*; +use std::ptr::NonNull; pub mod abomonated; @@ -124,9 +125,9 @@ pub unsafe fn decode(bytes: &mut [u8]) -> Option<(&T, &mut [u8]) if bytes.len() < mem::size_of::() { None } else { let (split1, split2) = bytes.split_at_mut(mem::size_of::()); - let result: &mut T = mem::transmute(split1.get_unchecked_mut(0)); - if let Some(remaining) = result.exhume(split2) { - Some((result, remaining)) + let result: NonNull = mem::transmute(split1.get_unchecked_mut(0)); + if let Some(remaining) = T::exhume(result, split2) { + Some((&*result.as_ptr(), remaining)) } else { None @@ -165,10 +166,17 @@ pub trait Abomonation { /// reports any failures in writing to `write`. #[inline(always)] unsafe fn entomb(&self, _write: &mut W) -> IOResult<()> { Ok(()) } - /// Recover any information for `&mut self` not evident from its binary representation. + /// Recover any information for `self_` not evident from its binary representation. /// /// Most commonly this populates pointers with valid references into `bytes`. - #[inline(always)] unsafe fn exhume<'a,'b>(&'a mut self, bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { Some(bytes) } + /// + /// Implementors should take note that `self_` is initially in an invalid state, as its inner + /// pointers may be dangling. As Rust references come with a data validity invariant, building + /// references to invalid state is undefined behavior, so one should strive to implement + /// `exhume` using raw pointer operations as much as feasible. + // + // FIXME: Replace self_ with self once Rust has arbitrary self types + #[inline(always)] unsafe fn exhume<'a>(_self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { Some(bytes) } /// Reports the number of further bytes required to entomb `self`. #[inline(always)] fn extent(&self) -> usize { 0 } @@ -227,10 +235,17 @@ macro_rules! unsafe_abomonate { $( self.$field.entomb(write)?; )* Ok(()) } - #[inline] unsafe fn exhume<'a,'b>(&'a mut self, mut bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { - $( let temp = bytes; bytes = self.$field.exhume(temp)?; )* + + #[inline] unsafe fn exhume<'a>(self_: ::std::ptr::NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + $( + // FIXME: This (briefly) constructs an &mut _ to invalid data, which is UB. + // The proposed &raw mut operator would allow avoiding this. + let field_ptr: ::std::ptr::NonNull<_> = From::from(&mut (*self_.as_ptr()).$field); + bytes = Abomonation::exhume(field_ptr, bytes)?; + )* Some(bytes) } + #[inline] fn extent(&self) -> usize { let mut size = 0; $( size += self.$field.extent(); )* @@ -242,25 +257,33 @@ macro_rules! unsafe_abomonate { // general code for tuples (can't use '0', '1', ... as field identifiers) macro_rules! tuple_abomonate { - ( $($name:ident)+) => ( - impl<$($name: Abomonation),*> Abomonation for ($($name,)*) { + ( $($ty:ident)+) => ( + impl<$($ty: Abomonation),*> Abomonation for ($($ty,)*) { #[allow(non_snake_case)] #[inline(always)] unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { - let ($(ref $name,)*) = *self; - $($name.entomb(write)?;)* + let ($(ref $ty,)*) = *self; + $($ty.entomb(write)?;)* Ok(()) } + #[allow(non_snake_case)] - #[inline(always)] unsafe fn exhume<'a,'b>(&'a mut self, mut bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { - let ($(ref mut $name,)*) = *self; - $( let temp = bytes; bytes = $name.exhume(temp)?; )* + #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. + // I think avoiding this would require a cleaner way to iterate over tuple fields. + // One possibility would be a C++11-style combination of variadic generics and recursion. + let ($(ref mut $ty,)*) = *self_.as_ptr(); + $( + let field_ptr : NonNull<$ty> = From::from($ty); + bytes = $ty::exhume(field_ptr, bytes)?; + )* Some(bytes) } + #[allow(non_snake_case)] #[inline(always)] fn extent(&self) -> usize { let mut size = 0; - let ($(ref $name,)*) = *self; - $( size += $name.extent(); )* + let ($(ref $ty,)*) = *self; + $( size += $ty.extent(); )* size } } @@ -314,12 +337,17 @@ impl Abomonation for Option { } Ok(()) } - #[inline(always)] unsafe fn exhume<'a, 'b>(&'a mut self, mut bytes: &'b mut[u8]) -> Option<&'b mut [u8]> { - if let &mut Some(ref mut inner) = self { - let tmp = bytes; bytes = inner.exhume(tmp)?; + + #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { + // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on enum implementation details. + if let Some(ref mut inner) = *self_.as_ptr() { + let inner_ptr : NonNull = From::from(inner); + bytes = T::exhume(inner_ptr, bytes)?; } Some(bytes) } + #[inline] fn extent(&self) -> usize { self.as_ref().map(|inner| inner.extent()).unwrap_or(0) } @@ -333,12 +361,22 @@ impl Abomonation for Result { }; Ok(()) } - #[inline(always)] unsafe fn exhume<'a, 'b>(&'a mut self, bytes: &'b mut[u8]) -> Option<&'b mut [u8]> { - match self { - &mut Ok(ref mut inner) => inner.exhume(bytes), - &mut Err(ref mut inner) => inner.exhume(bytes), + + #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { + // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on enum implementation details. + match *self_.as_ptr() { + Ok(ref mut inner) => { + let inner_ptr : NonNull = From::from(inner); + T::exhume(inner_ptr, bytes) + } + Err(ref mut inner) => { + let inner_ptr : NonNull = From::from(inner); + E::exhume(inner_ptr, bytes) + } } } + #[inline] fn extent(&self) -> usize { match self { &Ok(ref inner) => inner.extent(), @@ -386,22 +424,16 @@ macro_rules! array_abomonate { impl Abomonation for [T; $size] { #[inline(always)] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { - for element in self { element.entomb(write)?; } - Ok(()) + entomb_slice(&self[..], write) } + #[inline(always)] - unsafe fn exhume<'a, 'b>(&'a mut self, mut bytes: &'b mut[u8]) -> Option<&'b mut [u8]> { - for element in self { - let tmp = bytes; bytes = element.exhume(tmp)?; - } - Some(bytes) + unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { + exhume_slice(self_.as_ptr() as *mut T, $size, bytes) } + #[inline(always)] fn extent(&self) -> usize { - let mut size = 0; - for element in self { - size += element.extent(); - } - size + slice_extent(&self[..]) } } ) @@ -447,15 +479,20 @@ impl Abomonation for String { write.write_all(self.as_bytes())?; Ok(()) } + #[inline] - unsafe fn exhume<'a,'b>(&'a mut self, bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { - if self.len() > bytes.len() { None } + unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + // FIXME: This (briefly) constructs an &String to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on String implementation details. + let self_len = self_.as_ref().len(); + if self_len > bytes.len() { None } else { - let (mine, rest) = bytes.split_at_mut(self.len()); - std::ptr::write(self, String::from_raw_parts(mem::transmute(mine.as_ptr()), self.len(), self.len())); + let (mine, rest) = bytes.split_at_mut(self_len); + self_.as_ptr().write(String::from_raw_parts(mine.as_mut_ptr(), self_len, self_len)); Some(rest) } } + #[inline] fn extent(&self) -> usize { self.len() } @@ -465,33 +502,28 @@ impl Abomonation for Vec { #[inline] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(&self[..]))?; - for element in self.iter() { element.entomb(write)?; } - Ok(()) + entomb_slice(&self[..], write) } - #[inline] - unsafe fn exhume<'a,'b>(&'a mut self, bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { - // extract memory from bytes to back our vector - let binary_len = self.len() * mem::size_of::(); + #[inline] + unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + // FIXME: This (briefly) constructs an &Vec to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on Vec implementation details. + let self_len = self_.as_ref().len(); + let binary_len = self_len * mem::size_of::(); if binary_len > bytes.len() { None } else { let (mine, mut rest) = bytes.split_at_mut(binary_len); - let slice = std::slice::from_raw_parts_mut(mine.as_mut_ptr() as *mut T, self.len()); - std::ptr::write(self, Vec::from_raw_parts(slice.as_mut_ptr(), self.len(), self.len())); - for element in self.iter_mut() { - let temp = rest; // temp variable explains lifetimes (mysterious!) - rest = element.exhume(temp)?; - } + let first_ptr = mine.as_mut_ptr() as *mut T; + rest = exhume_slice(first_ptr, self_len, rest)?; + self_.as_ptr().write(Vec::from_raw_parts(first_ptr, self_len, self_len)); Some(rest) } } + #[inline] fn extent(&self) -> usize { - let mut sum = mem::size_of::() * self.len(); - for element in self.iter() { - sum += element.extent(); - } - sum + mem::size_of::() * self.len() + slice_extent(&self[..]) } } @@ -502,17 +534,20 @@ impl Abomonation for Box { (**self).entomb(bytes)?; Ok(()) } + #[inline] - unsafe fn exhume<'a,'b>(&'a mut self, bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { let binary_len = mem::size_of::(); if binary_len > bytes.len() { None } else { let (mine, mut rest) = bytes.split_at_mut(binary_len); - std::ptr::write(self, mem::transmute(mine.as_mut_ptr() as *mut T)); - let temp = rest; rest = (**self).exhume(temp)?; + let box_target : NonNull = NonNull::new_unchecked(mine.as_mut_ptr() as *mut T); + rest = T::exhume(box_target, rest)?; + self_.as_ptr().write(Box::from_raw(box_target.as_ptr())); Some(rest) } } + #[inline] fn extent(&self) -> usize { mem::size_of::() + (&**self).extent() } @@ -523,6 +558,28 @@ impl Abomonation for Box { std::slice::from_raw_parts(slice.as_ptr() as *const u8, slice.len() * mem::size_of::()) } +// Common subset of "entomb" for all [T]-like types +unsafe fn entomb_slice(slice: &[T], write: &mut W) -> IOResult<()> { + for element in slice { element.entomb(write)?; } + Ok(()) +} + +// Common subset of "exhume" for all [T]-like types +// (I'd gladly take a NonNull<[T]>, but it is too difficult to build raw pointers to slices) +#[inline] +unsafe fn exhume_slice<'a, T: Abomonation>(first_ptr: *mut T, length: usize, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + for i in 0..length { + let element_ptr: NonNull = NonNull::new_unchecked(first_ptr.add(i)); + bytes = T::exhume(element_ptr, bytes)?; + } + Some(bytes) +} + +// Common subset of "extent" for all [T]-like types +fn slice_extent(slice: &[T]) -> usize { + slice.iter().map(T::extent).sum() +} + mod network { use Abomonation; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6, IpAddr, Ipv4Addr, Ipv6Addr}; diff --git a/tests/tests.rs b/tests/tests.rs index 340e776..3ff0dec 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,6 +1,7 @@ extern crate abomonation; use abomonation::*; +use std::fmt::Debug; #[test] fn test_array() { _test_pass(vec![[0, 1, 2]; 1024]); } #[test] fn test_nonzero() { _test_pass(vec![[std::num::NonZeroI32::new(1)]; 1024]); } @@ -38,21 +39,21 @@ fn test_phantom_data_for_non_abomonatable_type() { _test_pass(PhantomData::::default()); } -fn _test_pass(record: T) { +fn _test_pass(record: T) { let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } { let (result, rest) = unsafe { decode::(&mut bytes[..]) }.unwrap(); - assert!(&record == result); - assert!(rest.len() == 0); + assert_eq!(&record, result); + assert_eq!(rest.len(), 0); } } -fn _test_fail(record: T) { +fn _test_fail(record: T) { let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } bytes.pop(); - assert!(unsafe { decode::(&mut bytes[..]) }.is_none()); + assert_eq!(unsafe { decode::(&mut bytes[..]) }, None); } fn _test_size(record: T) { @@ -62,7 +63,7 @@ fn _test_size(record: T) { } -#[derive(Eq, PartialEq)] +#[derive(Debug, Eq, PartialEq)] struct MyStruct { a: String, b: u64, @@ -82,8 +83,8 @@ fn test_macro() { // decode a &Vec<(u64, String)> from binary data if let Some((result, rest)) = unsafe { decode::(&mut bytes) } { - assert!(result == &record); - assert!(rest.len() == 0); + assert_eq!(result, &record); + assert_eq!(rest.len(), 0); } } @@ -106,10 +107,10 @@ fn test_multiple_encode_decode() { unsafe { encode(&vec![1,2,3], &mut bytes).unwrap(); } unsafe { encode(&"grawwwwrr".to_owned(), &mut bytes).unwrap(); } - let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert!(*t == 0); - let (t, r) = unsafe { decode::(r) }.unwrap(); assert!(*t == 7); - let (t, r) = unsafe { decode::>(r) }.unwrap(); assert!(*t == vec![1,2,3]); - let (t, _r) = unsafe { decode::(r) }.unwrap(); assert!(*t == "grawwwwrr".to_owned()); + let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert_eq!(*t, 0); + let (t, r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, 7); + let (t, r) = unsafe { decode::>(r) }.unwrap(); assert_eq!(*t, vec![1,2,3]); + let (t, _r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, "grawwwwrr".to_owned()); } #[test] @@ -125,6 +126,6 @@ fn test_net_types() { unsafe { encode(&socket_addr4, &mut bytes).unwrap(); } unsafe { encode(&socket_addr6, &mut bytes).unwrap(); } - let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert!(*t == socket_addr4); - let (t, _r) = unsafe { decode::(r) }.unwrap(); assert!(*t == socket_addr6); + let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert_eq!(*t, socket_addr4); + let (t, _r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, socket_addr6); }