Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix TensorStorage memory deallocation #145

Merged
merged 8 commits into from
Sep 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions crates/kornia-core/src/allocator.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::alloc::{GlobalAlloc, Layout, System};
use std::alloc;
use std::alloc::Layout;

use thiserror::Error;

Expand Down Expand Up @@ -55,7 +56,7 @@ impl TensorAllocator for CpuAllocator {
///
/// A non-null pointer to the allocated memory if successful, otherwise an error.
fn alloc(&self, layout: Layout) -> Result<*mut u8, TensorAllocatorError> {
let ptr = unsafe { System.alloc(layout) };
let ptr = unsafe { alloc::alloc(layout) };
if ptr.is_null() {
Err(TensorAllocatorError::NullPointer)?
}
Expand All @@ -74,7 +75,7 @@ impl TensorAllocator for CpuAllocator {
/// The pointer must be non-null and the layout must be correct.
#[allow(clippy::not_unsafe_ptr_arg_deref)]
fn dealloc(&self, ptr: *mut u8, layout: Layout) {
unsafe { System.dealloc(ptr, layout) }
unsafe { alloc::dealloc(ptr, layout) }
}
}

Expand Down
5 changes: 3 additions & 2 deletions crates/kornia-core/src/serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ use crate::{
use serde::ser::SerializeStruct;
use serde::Deserialize;

impl<T, const N: usize, A: TensorAllocator> serde::Serialize for Tensor<T, N, A>
impl<T, const N: usize, A> serde::Serialize for Tensor<T, N, A>
where
T: serde::Serialize + SafeTensorType,
A: TensorAllocator + 'static,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
Expand All @@ -23,7 +24,7 @@ where
}
}

impl<'de, T, const N: usize, A: TensorAllocator + Default> serde::Deserialize<'de>
impl<'de, T, const N: usize, A: TensorAllocator + Default + 'static> serde::Deserialize<'de>
for Tensor<T, N, A>
where
T: serde::Deserialize<'de> + SafeTensorType,
Expand Down
141 changes: 127 additions & 14 deletions crates/kornia-core/src/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,30 @@ impl SafeTensorType for i64 {}
impl SafeTensorType for f32 {}
impl SafeTensorType for f64 {}

/// Represents the owner of custom Arrow Buffer memory allocations.
///
/// This struct is used to facilitate the automatic deallocation of the memory it owns,
/// using the `Drop` trait.
pub struct TensorCustomAllocationOwner<A: TensorAllocator> {
/// The allocator used to allocate the tensor storage.
alloc: Arc<A>,
/// The layout used for the allocation.
layout: Layout,
/// The pointer to the allocated memory
ptr: NonNull<u8>,
}

// SAFETY: TensorCustomAllocationOwner is never modifed from multiple threads.
impl<A: TensorAllocator> std::panic::RefUnwindSafe for TensorCustomAllocationOwner<A> {}
unsafe impl<A: TensorAllocator> Sync for TensorCustomAllocationOwner<A> {}
unsafe impl<A: TensorAllocator> Send for TensorCustomAllocationOwner<A> {}

impl<A: TensorAllocator> Drop for TensorCustomAllocationOwner<A> {
fn drop(&mut self) {
self.alloc.dealloc(self.ptr.as_ptr(), self.layout);
}
}

/// Represents a contiguous memory region that can be shared with other buffers and across thread boundaries.
///
/// This struct provides methods to create, access, and manage tensor storage using a custom allocator.
Expand All @@ -32,12 +56,13 @@ where
/// The buffer containing the tensor storage.
data: ScalarBuffer<T>,
/// The allocator used to allocate the tensor storage.
alloc: A,
alloc: Arc<A>,
}

impl<T, A: TensorAllocator> TensorStorage<T, A>
impl<T, A> TensorStorage<T, A>
where
T: SafeTensorType + Clone,
A: TensorAllocator + 'static,
{
/// Creates a new tensor storage with the given length and allocator.
///
Expand All @@ -51,17 +76,19 @@ where
/// A new tensor storage if successful, otherwise an error.
pub fn new(len: usize, alloc: A) -> Result<Self, TensorAllocatorError> {
// allocate memory for tensor storage
let ptr =
alloc.alloc(Layout::array::<T>(len).map_err(TensorAllocatorError::LayoutError)?)?;
let layout = Layout::array::<T>(len).map_err(TensorAllocatorError::LayoutError)?;
let ptr = NonNull::new(alloc.alloc(layout)?).ok_or(TensorAllocatorError::NullPointer)?;
let alloc = Arc::new(alloc);
let owner = TensorCustomAllocationOwner {
alloc: alloc.clone(),
emilmgeorge marked this conversation as resolved.
Show resolved Hide resolved
layout,
ptr,
};

// create the buffer
let buffer = unsafe {
// SAFETY: `ptr` is non-null and properly aligned, and `len` is the correct size.
Buffer::from_custom_allocation(
NonNull::new_unchecked(ptr),
len * std::mem::size_of::<T>(),
Arc::new(Vec::<T>::with_capacity(len)),
)
Buffer::from_custom_allocation(ptr, len * std::mem::size_of::<T>(), Arc::new(owner))
};

Ok(Self {
Expand Down Expand Up @@ -99,7 +126,7 @@ where
// create tensor storage
Self {
data: buffer.into(),
alloc,
alloc: Arc::new(alloc),
}
}

Expand Down Expand Up @@ -223,13 +250,13 @@ where
let buffer = Buffer::from_custom_allocation(
NonNull::new_unchecked(ptr as *mut u8),
len * std::mem::size_of::<T>(),
Arc::new(Vec::<T>::with_capacity(len)),
Arc::new(()),
);

// create tensor storage
Self {
data: buffer.into(),
alloc: alloc.clone(),
alloc: Arc::new(alloc.clone()),
}
}
}
Expand All @@ -238,10 +265,10 @@ where
impl<T, A> Clone for TensorStorage<T, A>
where
T: SafeTensorType + Clone,
A: TensorAllocator + Clone,
A: TensorAllocator + Clone + 'static,
{
fn clone(&self) -> Self {
let mut new_storage = Self::new(self.len(), self.alloc.clone())
let mut new_storage = Self::new(self.len(), (*self.alloc).clone())
.expect("Failed to allocate memory for cloned TensorStorage");
new_storage.as_mut_slice().clone_from_slice(self.as_slice());
new_storage
Expand All @@ -253,6 +280,8 @@ mod tests {
use super::*;
use crate::allocator::CpuAllocator;
use std::alloc::Layout;
use std::cell::RefCell;
use std::rc::Rc;

#[test]
fn test_tensor_storage() -> Result<(), TensorAllocatorError> {
Expand Down Expand Up @@ -365,4 +394,88 @@ mod tests {
assert_eq!(result_vec.capacity(), original_vec_capacity);
assert!(std::ptr::eq(result_vec.as_ptr(), original_vec_ptr));
}

#[test]
fn test_tensor_storage_allocator() -> Result<(), TensorAllocatorError> {
// A test TensorAllocator that keeps a count of the bytes that are allocated but not yet
// deallocated via the allocator.
#[derive(Clone)]
struct TestAllocator {
bytes_allocated: Rc<RefCell<i32>>,
}
impl TensorAllocator for TestAllocator {
fn alloc(&self, layout: Layout) -> Result<*mut u8, TensorAllocatorError> {
*self.bytes_allocated.borrow_mut() += layout.size() as i32;
CpuAllocator.alloc(layout)
}
fn dealloc(&self, ptr: *mut u8, layout: Layout) {
*self.bytes_allocated.borrow_mut() -= layout.size() as i32;
CpuAllocator.dealloc(ptr, layout)
}
}

let allocator = TestAllocator {
bytes_allocated: Rc::new(RefCell::new(0)),
};
let len = 1024;

// TensorStorage::new()
// Deallocation should happen when `storage` goes out of scope.
{
let _storage = TensorStorage::<u8, _>::new(len, allocator.clone())?;
assert_eq!(*allocator.bytes_allocated.borrow(), len as i32);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No pointer test here ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case the storage is not created from another buffer so there is no original pointer to compare to. Are there any other checks I should have added?

}
assert_eq!(*allocator.bytes_allocated.borrow(), 0);

// TensorStorage::new() -> TensorStorage::into_vec()
// TensorStorage::into_vec() consumes the storage and creates a copy (in this case).
edgarriba marked this conversation as resolved.
Show resolved Hide resolved
// This should cause deallocation of the original memory.
{
let storage = TensorStorage::<u8, _>::new(len, allocator.clone())?;
assert_eq!(*allocator.bytes_allocated.borrow(), len as i32);

let _vec = storage.into_vec();
assert_eq!(*allocator.bytes_allocated.borrow(), 0);
}
assert_eq!(*allocator.bytes_allocated.borrow(), 0);

// TensorStorage::from_vec() -> TensorStorage::into_vec()
// TensorStorage::from_vec() currently does not use the custom allocator, so the
// bytes_allocated value should not change.
{
let original_vec = Vec::<u8>::with_capacity(len);
let original_vec_ptr = original_vec.as_ptr();
let original_vec_capacity = original_vec.capacity();

let storage = TensorStorage::<u8, _>::from_vec(original_vec, allocator.clone());
assert_eq!(*allocator.bytes_allocated.borrow(), 0);

let result_vec = storage.into_vec();
assert_eq!(*allocator.bytes_allocated.borrow(), 0);

assert_eq!(result_vec.capacity(), original_vec_capacity);
assert!(std::ptr::eq(result_vec.as_ptr(), original_vec_ptr));
}
assert_eq!(*allocator.bytes_allocated.borrow(), 0);

// TensorStorage::from_ptr()
// TensorStorage::from_ptr() does not take ownership of buffer. So the memory should not be
// deallocated when the TensorStorage goes out of scope.
// In this case, the memory will be deallocated when the vector goes out of scope.
{
let mut original_vec = Vec::<u8>::with_capacity(len);
let original_ptr = original_vec.as_ptr();
{
let storage = unsafe {
TensorStorage::<u8, _>::from_ptr(original_vec.as_mut_ptr(), len, &allocator)
};
assert_eq!(*allocator.bytes_allocated.borrow(), 0);

assert_eq!(storage.as_ptr(), original_ptr);
}
assert_eq!(*allocator.bytes_allocated.borrow(), 0);
}

Ok(())
}
}
6 changes: 3 additions & 3 deletions crates/kornia-core/src/tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ where
impl<T, const N: usize, A> Tensor<T, N, A>
where
T: SafeTensorType,
A: TensorAllocator,
A: TensorAllocator + 'static,
{
/// Create a new `Tensor` with uninitialized data.
///
Expand Down Expand Up @@ -875,7 +875,7 @@ where
impl<T, const N: usize, A> Clone for Tensor<T, N, A>
where
T: SafeTensorType + Clone,
A: TensorAllocator + Clone,
A: TensorAllocator + Clone + 'static,
{
fn clone(&self) -> Self {
Self {
Expand All @@ -889,7 +889,7 @@ where
impl<T, const N: usize, A> std::fmt::Display for Tensor<T, N, A>
where
T: SafeTensorType + std::fmt::Display + std::fmt::LowerExp,
A: TensorAllocator,
A: TensorAllocator + 'static,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let width = self
Expand Down
2 changes: 1 addition & 1 deletion crates/kornia-core/src/view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub struct TensorView<'a, T: SafeTensorType, const N: usize, A: TensorAllocator>
pub strides: [usize; N],
}

impl<'a, T: SafeTensorType, const N: usize, A: TensorAllocator> TensorView<'a, T, N, A> {
impl<'a, T: SafeTensorType, const N: usize, A: TensorAllocator + 'static> TensorView<'a, T, N, A> {
/// Returns the data slice of the tensor.
#[inline]
pub fn as_slice(&self) -> &[T] {
Expand Down
Loading