From a9555c58c6f9e01608d24bba777fee14bdc45ab9 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 27 Jun 2024 05:53:06 +0000
Subject: [PATCH 01/24] Add _ref versions of get/set_current_device_resource
 and friends.

---
 include/rmm/mr/device/per_device_resource.hpp | 205 +++++++++++++++++-
 1 file changed, 199 insertions(+), 6 deletions(-)
diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index a56a784a1..67aac51ac 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -20,6 +20,7 @@
 #include <rmm/detail/export.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <map>
 #include <mutex>
@@ -121,6 +122,25 @@ RMM_EXPORT inline auto& get_map()
   return device_id_to_resource;
 }
 
+/**
+ * @briefreturn{Reference to the lock}
+ */
+inline std::mutex& ref_map_lock()
+{
+  static std::mutex ref_map_lock;
+  return ref_map_lock;
+}
+
+// This symbol must have default visibility, see: https://github.com/rapidsai/rmm/issues/826
+/**
+ * @briefreturn{Reference to the map from device id -> resource_ref}
+ */
+RMM_EXPORT inline auto& get_ref_map()
+{
+  static std::map<cuda_device_id::value_type, device_async_resource_ref> device_id_to_resource_ref;
+  return device_id_to_resource_ref;
+}
+
 }  // namespace detail
 
 /**
@@ -129,20 +149,21 @@ RMM_EXPORT inline auto& get_map()
  * Returns a pointer to the `device_memory_resource` for the specified device. The initial
  * resource is a `cuda_memory_resource`.
  *
- * `id.value()` must be in the range `[0, cudaGetDeviceCount())`, otherwise behavior is undefined.
+ * `device_id.value()` must be in the range `[0, cudaGetDeviceCount())`, otherwise behavior is
+ * undefined.
  *
  * This function is thread-safe with respect to concurrent calls to `set_per_device_resource`,
  * `get_per_device_resource`, `get_current_device_resource`, and `set_current_device_resource`.
  * Concurrent calls to any of these functions will result in a valid state, but the order of
  * execution is undefined.
  *
- * @note The returned `device_memory_resource` should only be used when CUDA device `id` is the
- * current device  (e.g. set using `cudaSetDevice()`). The behavior of a device_memory_resource is
- * undefined if used while the active CUDA device is a different device from the one that was active
- * when the device_memory_resource was created.
+ * @note The returned `device_memory_resource` should only be used when CUDA device `device_id` is
+ * the current device  (e.g. set using `cudaSetDevice()`). The behavior of a
+ * `device_memory_resource` is undefined if used while the active CUDA device is a different device
+ * from the one that was active when the `device_memory_resource` was created.
  *
  * @param device_id The id of the target device
- * @return Pointer to the current `device_memory_resource` for device `id`
+ * @return Pointer to the current `device_memory_resource` for device `device_id`
  */
 inline device_memory_resource* get_per_device_resource(cuda_device_id device_id)
 {
@@ -247,5 +268,177 @@ inline device_memory_resource* set_current_device_resource(device_memory_resourc
 {
   return set_per_device_resource(rmm::get_current_cuda_device(), new_mr);
 }
+
+/**
+ * @brief Get the `device_async_resource_ref for the specified device.
+ *
+ * Returns a `device_async_resource_ref` for the specified device. The initial resource_ref
+ * references a `cuda_memory_resource`.
+ *
+ * `device_id.value()` must be in the range `[0, cudaGetDeviceCount())`, otherwise behavior is
+ * undefined.
+ *
+ * This function is thread-safe with respect to concurrent calls to `set_per_device_resource_ref`,
+ * `get_per_device_resource_ref`, `get_current_device_resource_ref`,
+ * `set_current_device_resource_ref` and `reset_current_device_resource_ref. Concurrent calls to any
+ * of these functions will result in a valid state, but the order of execution is undefined.
+ *
+ * @note The returned `device_async_resource_ref` should only be used when CUDA device `device_id`
+ * is the current device  (e.g. set using `cudaSetDevice()`). The behavior of a
+ * `device_async_resource_ref` is undefined if used while the active CUDA device is a different
+ * device from the one that was active when the memory resource was created.
+ *
+ * @param device_id The id of the target device
+ * @return The current `device_async_resource_ref` for device `device_id`
+ */
+inline device_async_resource_ref get_per_device_resource_ref(cuda_device_id device_id)
+{
+  std::lock_guard<std::mutex> lock{detail::ref_map_lock()};
+  auto& map = detail::get_ref_map();
+  // If a resource was never set for `id`, set to the initial resource
+  auto const found = map.find(device_id.value());
+  if (found == map.end()) {
+    //
+    auto item = map.insert({device_id.value(), detail::initial_resource()});
+    return item.first->second;
+  }
+  return found->second;
+}
+
+/**
+ * @brief Set the `device_async_resource_ref` for the specified device to `new_resource_ref`
+ *
+ * `device_id.value()` must be in the range `[0, cudaGetDeviceCount())`, otherwise behavior is
+ * undefined.
+ *
+ * The object referenced by `new_resource_ref` must outlive the last use of the resource, otherwise
+ * behavior is undefined. It is the caller's responsibility to maintain the lifetime of the resource
+ * object.
+ *
+ * This function is thread-safe with respect to concurrent calls to `set_per_device_resource_ref`,
+ * `get_per_device_resource_ref`, `get_current_device_resource_ref`,
+ * `set_current_device_resource_ref` and `reset_current_device_resource_ref. Concurrent calls to any
+ * of these functions will result in a valid state, but the order of execution is undefined.
+ *
+ * @note The resource passed in `new_resource_ref` must have been created when device `device_id`
+ * was the current CUDA device (e.g. set using `cudaSetDevice()`). The behavior of a
+ * `device_async_resource_ref` is undefined if used while the active CUDA device is a different
+ * device from the one that was active when the memory resource was created.
+ *
+ * @param device_id The id of the target device
+ * @param new_resource_ref new `device_async_resource_ref` to use as new resource for `device_id`
+ * @return The previous `device_async_resource_ref` for `device_id`
+ */
+inline device_async_resource_ref set_per_device_resource_ref(
+  cuda_device_id device_id, device_async_resource_ref new_resource_ref)
+{
+  std::lock_guard<std::mutex> lock{detail::ref_map_lock()};
+  auto& map          = detail::get_ref_map();
+  auto const old_itr = map.find(device_id.value());
+  // If a resource didn't previously exist for `id`, return pointer to initial_resource
+  // Note: because resource_ref is not default-constructible, we can't use std::map::operator[]
+  if (old_itr == map.end()) {
+    map.insert({device_id.value(), new_resource_ref});
+    std::cout << "returning initial resource in set_per_device_resource_ref\n";
+    return device_async_resource_ref{detail::initial_resource()};
+  }
+
+  auto old_resource_ref = old_itr->second;
+  old_itr->second       = new_resource_ref;  // update map directly via iterator
+  return old_resource_ref;
+}
+
+/**
+ * @brief Get the `device_async_resource_ref` for the current device.
+ *
+ * Returns the `device_async_resource_ref` set for the current device. The initial resource_ref
+ * references a `cuda_memory_resource`.
+ *
+ * The "current device" is the device returned by `cudaGetDevice`.
+ *
+ * This function is thread-safe with respect to concurrent calls to `set_per_device_resource_ref`,
+ * `get_per_device_resource_ref`, `get_current_device_resource_ref`,
+ * `set_current_device_resource_ref` and `reset_current_device_resource_ref. Concurrent calls to any
+ * of these functions will result in a valid state, but the order of execution is undefined.
+
+ *
+ * @note The returned `device_async_resource_ref` should only be used with the current CUDA device.
+ * Changing the current device (e.g. using `cudaSetDevice()`) and then using the returned
+ * `resource_ref` can result in undefined behavior. The behavior of a `device_async_resource_ref` is
+ * undefined if used while the active CUDA device is a different device from the one that was active
+ * when the memory resource was created.
+ *
+ * @return `device_async_resource_ref` active for the current device
+ */
+inline device_async_resource_ref get_current_device_resource_ref()
+{
+  return get_per_device_resource_ref(rmm::get_current_cuda_device());
+}
+
+/**
+ * @brief Set the `device_async_resource_ref` for the current device.
+ *
+ * The "current device" is the device returned by `cudaGetDevice`.
+ *
+ * The object referenced by `new_resource_ref` must outlive the last use of the resource, otherwise
+ * behavior is undefined. It is the caller's responsibility to maintain the lifetime of the resource
+ * object.
+ *
+ * This function is thread-safe with respect to concurrent calls to `set_per_device_resource_ref`,
+ * `get_per_device_resource_ref`, `get_current_device_resource_ref`,
+ * `set_current_device_resource_ref` and `reset_current_device_resource_ref. Concurrent calls to any
+ * of these functions will result in a valid state, but the order of execution is undefined.
+ *
+ * @note The resource passed in `new_resource` must have been created for the current CUDA device.
+ * The behavior of a `device_async_resource_ref` is undefined if used while the active CUDA device
+ * is a different device from the one that was active when the memory resource was created.
+ *
+ * @param new_resource_ref New `device_async_resource_ref` to use for the current device
+ * @return Previous `device_async_resource_ref` for the current device
+ */
+inline device_async_resource_ref set_current_device_resource_ref(
+  device_async_resource_ref new_resource_ref)
+{
+  return set_per_device_resource_ref(rmm::get_current_cuda_device(), new_resource_ref);
+}
+
+/**
+ * @brief Reset the `device_async_resource_ref` for the specified device to the initial resource.
+ *
+ * Resets to a reference to the initial `cuda_memory_resource`.
+ *
+ * `device_id.value()` must be in the range `[0, cudaGetDeviceCount())`, otherwise behavior is
+ * undefined.
+ *
+ * This function is thread-safe with respect to concurrent calls to `set_per_device_resource_ref`,
+ * `get_per_device_resource_ref`, `get_current_device_resource_ref`,
+ * `set_current_device_resource_ref` and `reset_current_device_resource_ref. Concurrent calls to any
+ * of these functions will result in a valid state, but the order of execution is undefined.
+ *
+ * @param device_id The id of the target device
+ * @return Previous `device_async_resource_ref` for `device_id`
+ */
+inline device_async_resource_ref reset_per_device_resource_ref(cuda_device_id device_id)
+{
+  return set_per_device_resource_ref(device_id, detail::initial_resource());
+}
+
+/**
+ * @brief Reset the `device_async_resource_ref` for the current device to the initial resource.
+ *
+ * Resets to a reference to the initial `cuda_memory_resource`. The "current device" is the device
+ * returned by `cudaGetDevice`.
+ *
+ * This function is thread-safe with respect to concurrent calls to `set_per_device_resource_ref`,
+ * `get_per_device_resource_ref`, `get_current_device_resource_ref`,
+ * `set_current_device_resource_ref` and `reset_current_device_resource_ref. Concurrent calls to any
+ * of these functions will result in a valid state, but the order of execution is undefined.
+ *
+ * @return Previous `device_async_resource_ref` for `device_id`
+ */
+inline device_async_resource_ref reset_current_device_resource_ref()
+{
+  return reset_per_device_resource_ref(rmm::get_current_cuda_device());
+}
 /** @} */  // end of group
 }  // namespace rmm::mr

From d8936988d6d35e860d8bd4b83bda91566d3668b3 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 20 Aug 2024 05:56:19 +0000
Subject: [PATCH 02/24] doc: explain that set_current_device_resource is in
 transition

---
 include/rmm/mr/device/per_device_resource.hpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index 67aac51ac..15823cfe9 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -53,9 +53,16 @@
  * `get_per_device_resource(id)` will return a pointer to a `cuda_memory_resource`.
  *
  * To fetch and modify the resource for the current CUDA device, `get_current_device_resource()` and
- * `set_current_device_resource()` will automatically use the current CUDA device id from
+ * `set_current_device_resource()` automatically use the current CUDA device id from
  * `cudaGetDevice()`.
  *
+ * RMM is in transition to use `cuda::mr::async_resource_ref` in place of raw pointers to
+ * `device_memory_resource`. The `set_per_device_resource_ref`, `get_per_device_resource_ref`,
+ * `get_current_device_resource_ref`, `set_current_device_resource_ref`, and
+ * `reset_current_device_resource_ref` functions provide the same functionality as their
+ * `device_memory_resource` counterparts, but with `device_async_resource_ref` objects.
+ * The raw pointer versions are expected to be deprecated and removed in a future release.
+ *
  * Creating a device_memory_resource for each device requires care to set the current device
  * before creating each resource, and to maintain the lifetime of the resources as long as they
  * are set as per-device resources. Here is an example loop that creates `unique_ptr`s to

From b068ded2a7ea07e9ce04a6a5617b1924ecce2cbc Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Tue, 20 Aug 2024 06:26:22 +0000
Subject: [PATCH 03/24] Update/add tests for 
 get/set_current_device_resource_ref

---
 .../mr/device/mr_ref_multithreaded_tests.cpp  | 86 +++++++++++--------
 tests/mr/device/mr_ref_test.hpp               | 13 ++-
 tests/mr/device/mr_ref_tests.cpp              | 36 +++++---
 3 files changed, 87 insertions(+), 48 deletions(-)

diff --git a/tests/mr/device/mr_ref_multithreaded_tests.cpp b/tests/mr/device/mr_ref_multithreaded_tests.cpp
index 3ba32445f..7d749efd1 100644
--- a/tests/mr/device/mr_ref_multithreaded_tests.cpp
+++ b/tests/mr/device/mr_ref_multithreaded_tests.cpp
@@ -22,6 +22,7 @@
 #include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/mr/device/pool_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cuda/memory_resource>
 
@@ -69,6 +70,8 @@ void spawn(Task task, Arguments&&... args)
 
 TEST(DefaultTest, UseCurrentDeviceResource_mt) { spawn(test_get_current_device_resource); }
 
+TEST(DefaultTest, UseCurrentDeviceResourceRef_mt) { spawn(test_get_current_device_resource_ref); }
+
 TEST(DefaultTest, CurrentDeviceResourceIsCUDA_mt)
 {
   spawn([]() {
@@ -77,6 +80,14 @@ TEST(DefaultTest, CurrentDeviceResourceIsCUDA_mt)
   });
 }
 
+TEST(DefaultTest, CurrentDeviceResourceRefIsCUDA_mt)
+{
+  spawn([]() {
+    EXPECT_EQ(rmm::mr::get_current_device_resource_ref(),
+              rmm::device_async_resource_ref{rmm::mr::detail::initial_resource()});
+  });
+}
+
 TEST(DefaultTest, GetCurrentDeviceResource_mt)
 {
   spawn([]() {
@@ -86,57 +97,64 @@ TEST(DefaultTest, GetCurrentDeviceResource_mt)
   });
 }
 
-// Disable until we support resource_ref with set_current_device_resource
-/*TEST_P(mr_ref_test_mt, SetCurrentDeviceResource_mt)
+TEST(DefaultTest, GetCurrentDeviceResourceRef_mt)
 {
-  // single thread changes default resource, then multiple threads use it
+  spawn([]() {
+    auto mr = rmm::mr::get_current_device_resource_ref();
+    EXPECT_EQ(mr, rmm::device_async_resource_ref{rmm::mr::detail::initial_resource()});
+  });
+}
 
-  rmm::mr::device_memory_resource* old = rmm::mr::set_current_device_resource(this->mr.get());
-  EXPECT_NE(nullptr, old);
+TEST_P(mr_ref_test_mt, SetCurrentDeviceResourceRef_mt)
+{
+  // single thread changes default resource, then multiple threads use it
+  auto old = rmm::mr::set_current_device_resource_ref(this->ref);
 
-  spawn([mr = this->mr.get()]() {
-    EXPECT_EQ(mr, rmm::mr::get_current_device_resource());
-    test_get_current_device_resource();  // test allocating with the new default resource
+  spawn([mr = this->ref]() {
+    EXPECT_EQ(mr, rmm::mr::get_current_device_resource_ref());
+    test_get_current_device_resource_ref();  // test allocating with the new default resource
   });
 
-  // setting default resource w/ nullptr should reset to initial
-  rmm::mr::set_current_device_resource(nullptr);
-  EXPECT_TRUE(old->is_equal(*rmm::mr::get_current_device_resource()));
-}*/
+  // resetting default resource should reset to initial
+  rmm::mr::reset_current_device_resource_ref();
+  EXPECT_EQ(old, rmm::mr::get_current_device_resource_ref());
+}
 
-/*TEST_P(mr_ref_test_mt, SetCurrentDeviceResourcePerThread_mt)
+TEST_P(mr_ref_test_mt, SetCurrentDeviceResourceRefPerThread_mt)
 {
   int num_devices{};
   RMM_CUDA_TRY(cudaGetDeviceCount(&num_devices));
 
   std::vector<std::thread> threads;
   threads.reserve(num_devices);
+
+  auto mr = this->ref;
+
   for (int i = 0; i < num_devices; ++i) {
-    threads.emplace_back(std::thread{[mr = this->mr.get()](auto dev_id) {
-                                       RMM_CUDA_TRY(cudaSetDevice(dev_id));
-                                       rmm::mr::device_memory_resource* old =
-                                         rmm::mr::set_current_device_resource(mr);
-                                       EXPECT_NE(nullptr, old);
-                                       // initial resource for this device should be CUDA mr
-                                       EXPECT_TRUE(old->is_equal(rmm::mr::cuda_memory_resource{}));
-                                       // get_current_device_resource should equal the resource we
-                                       // just set
-                                       EXPECT_EQ(mr, rmm::mr::get_current_device_resource());
-                                       // Setting current dev resource to nullptr should reset to
-                                       // cuda MR and return the MR we previously set
-                                       old = rmm::mr::set_current_device_resource(nullptr);
-                                       EXPECT_NE(nullptr, old);
-                                       EXPECT_EQ(old, mr);
-                                       EXPECT_TRUE(rmm::mr::get_current_device_resource()->is_equal(
-                                         rmm::mr::cuda_memory_resource{}));
-                                     },
-                                     i});
+    threads.emplace_back(
+      [mr](auto dev_id) {
+        RMM_CUDA_TRY(cudaSetDevice(dev_id));
+        auto cuda_ref = rmm::mr::get_current_device_resource_ref();
+        auto old      = rmm::mr::set_current_device_resource_ref(mr);
+
+        // initial resource for this device should be CUDA mr
+        EXPECT_EQ(old, cuda_ref);
+        // get_current_device_resource_ref should equal the resource we
+        // just set
+        EXPECT_EQ(mr, rmm::mr::get_current_device_resource_ref());
+        // Resetting current dev resource ref should make it
+        // cuda MR and return the MR we previously set
+        old = rmm::mr::reset_current_device_resource_ref();
+        EXPECT_EQ(old, mr);
+        EXPECT_EQ(cuda_ref, rmm::mr::get_current_device_resource_ref());
+      },
+      i);
   }
 
   for (auto& thread : threads) {
     thread.join();
   }
-}*/
+}
 
 TEST_P(mr_ref_test_mt, Allocate) { spawn(test_various_allocations, this->ref); }
 
@@ -247,7 +265,7 @@ void test_allocate_async_free_different_threads(rmm::device_async_resource_ref r
   std::mutex mtx;
   std::condition_variable allocations_ready;
   std::list<allocation> allocations;
-  cudaEvent_t event;
+  cudaEvent_t event{};
 
   RMM_CUDA_TRY(cudaEventCreate(&event));
 
diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp
index df0045d2b..6e63b3838 100644
--- a/tests/mr/device/mr_ref_test.hpp
+++ b/tests/mr/device/mr_ref_test.hpp
@@ -73,6 +73,15 @@ inline void test_get_current_device_resource()
   rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB);
 }
 
+inline void test_get_current_device_resource_ref()
+{
+  void* ptr = rmm::mr::get_current_device_resource_ref().allocate(1_MiB);
+  EXPECT_NE(nullptr, ptr);
+  EXPECT_TRUE(is_properly_aligned(ptr));
+  EXPECT_TRUE(is_device_accessible_memory(ptr));
+  rmm::mr::get_current_device_resource_ref().deallocate(ptr, 1_MiB);
+}
+
 inline void test_allocate(resource_ref ref, std::size_t bytes)
 {
   try {
@@ -392,7 +401,7 @@ inline auto make_binning()
 
 struct mr_factory_base {
   std::string name{};  ///< Name to associate with tests that use this factory
-  resource_ref mr{rmm::mr::get_current_device_resource()};
+  resource_ref mr{rmm::mr::get_current_device_resource_ref()};
   bool skip_test{false};
 };
 
@@ -468,7 +477,7 @@ struct mr_ref_test : public ::testing::TestWithParam<std::string> {
   }
 
   std::shared_ptr<mr_factory_base> factory_obj{};
-  resource_ref ref{rmm::mr::get_current_device_resource()};
+  resource_ref ref{rmm::mr::get_current_device_resource_ref()};
   rmm::cuda_stream stream{};
 };
 
diff --git a/tests/mr/device/mr_ref_tests.cpp b/tests/mr/device/mr_ref_tests.cpp
index d94817bef..55e91d765 100644
--- a/tests/mr/device/mr_ref_tests.cpp
+++ b/tests/mr/device/mr_ref_tests.cpp
@@ -16,7 +16,9 @@
 
 #include "mr_ref_test.hpp"
 
+#include <rmm/mr/device/cuda_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cuda/memory_resource>
 
@@ -63,6 +65,8 @@ TEST(DefaultTest, CurrentDeviceResourceIsCUDA)
 
 TEST(DefaultTest, UseCurrentDeviceResource) { test_get_current_device_resource(); }
 
+TEST(DefaultTest, UseCurrentDeviceResourceRef) { test_get_current_device_resource_ref(); }
+
 TEST(DefaultTest, GetCurrentDeviceResource)
 {
   auto* mr = rmm::mr::get_current_device_resource();
@@ -70,25 +74,33 @@ TEST(DefaultTest, GetCurrentDeviceResource)
   EXPECT_TRUE(mr->is_equal(rmm::mr::cuda_memory_resource{}));
 }
 
-// Disable until we support resource_ref with set_current_device_resource
-/*TEST_P(mr_ref_test, SetCurrentDeviceResource)
+TEST(DefaultTest, GetCurrentDeviceResourceRef)
+{
+  auto mr = rmm::mr::get_current_device_resource_ref();
+  EXPECT_EQ(mr, rmm::device_async_resource_ref{rmm::mr::detail::initial_resource()});
+}
+
+TEST_P(mr_ref_test, SetCurrentDeviceResourceRef)
 {
-  rmm::mr::device_memory_resource* old{};
-  old = rmm::mr::set_current_device_resource(this->mr.get());
-  EXPECT_NE(nullptr, old);
+  rmm::mr::cuda_memory_resource cuda_mr{};
+  auto cuda_ref = rmm::device_async_resource_ref{cuda_mr};
+
+  rmm::mr::set_current_device_resource_ref(cuda_ref);
+  auto old = rmm::mr::set_current_device_resource_ref(this->ref);
 
   // old mr should equal a cuda mr
-  EXPECT_TRUE(old->is_equal(rmm::mr::cuda_memory_resource{}));
+  EXPECT_EQ(old, cuda_ref);
 
   // current dev resource should equal this resource
-  EXPECT_TRUE(this->mr->is_equal(*rmm::mr::get_current_device_resource()));
+  EXPECT_EQ(this->ref, rmm::mr::get_current_device_resource_ref());
 
-  test_get_current_device_resource();
+  test_get_current_device_resource_ref();
 
-  // setting to `nullptr` should reset to initial cuda resource
-  rmm::mr::set_current_device_resource(nullptr);
-  EXPECT_TRUE(rmm::mr::get_current_device_resource()->is_equal(rmm::mr::cuda_memory_resource{}));
-}*/
+  // Resetting should reset to initial cuda resource
+  rmm::mr::reset_current_device_resource_ref();
+  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::detail::initial_resource()},
+            rmm::mr::get_current_device_resource_ref());
+}
 
 TEST_P(mr_ref_test, SelfEquality) { EXPECT_TRUE(this->ref == this->ref); }
 

From c7d443d3c015677ddc8e25bfaf45380377174cf1 Mon Sep 17 00:00:00 2001
From: Mark Harris <783069+harrism@users.noreply.github.com>
Date: Tue, 20 Aug 2024 22:37:34 +1000
Subject: [PATCH 04/24] Apply suggestions from code review

Co-authored-by: Lawrence Mitchell <wence@gmx.li>
---
 include/rmm/mr/device/per_device_resource.hpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index 15823cfe9..e9fbc74b6 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -305,7 +305,6 @@ inline device_async_resource_ref get_per_device_resource_ref(cuda_device_id devi
   // If a resource was never set for `id`, set to the initial resource
   auto const found = map.find(device_id.value());
   if (found == map.end()) {
-    //
     auto item = map.insert({device_id.value(), detail::initial_resource()});
     return item.first->second;
   }
@@ -342,11 +341,10 @@ inline device_async_resource_ref set_per_device_resource_ref(
   std::lock_guard<std::mutex> lock{detail::ref_map_lock()};
   auto& map          = detail::get_ref_map();
   auto const old_itr = map.find(device_id.value());
-  // If a resource didn't previously exist for `id`, return pointer to initial_resource
+  // If a resource didn't previously exist for `device_id`, return pointer to initial_resource
   // Note: because resource_ref is not default-constructible, we can't use std::map::operator[]
   if (old_itr == map.end()) {
     map.insert({device_id.value(), new_resource_ref});
-    std::cout << "returning initial resource in set_per_device_resource_ref\n";
     return device_async_resource_ref{detail::initial_resource()};
   }
 

From 3cd6917cbb44f734d98d6a598a37e00d7c6739cd Mon Sep 17 00:00:00 2001
From: Mark Harris <783069+harrism@users.noreply.github.com>
Date: Wed, 21 Aug 2024 10:37:17 +1000
Subject: [PATCH 05/24] RMM_EXPORT ref_map_lock()

Co-authored-by: Lawrence Mitchell <wence@gmx.li>
---
 include/rmm/mr/device/per_device_resource.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index e9fbc74b6..2e963b5fd 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -132,7 +132,7 @@ RMM_EXPORT inline auto& get_map()
 /**
  * @briefreturn{Reference to the lock}
  */
-inline std::mutex& ref_map_lock()
+RMM_EXPORT inline std::mutex& ref_map_lock()
 {
   static std::mutex ref_map_lock;
   return ref_map_lock;

From adbf7f666c8d2a8105b1f9deaafeb4baa5d02ac8 Mon Sep 17 00:00:00 2001
From: Mark Harris <783069+harrism@users.noreply.github.com>
Date: Wed, 21 Aug 2024 10:39:01 +1000
Subject: [PATCH 06/24] Apply suggestions from code review

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
---
 include/rmm/mr/device/per_device_resource.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index 2e963b5fd..a2ca1819e 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -277,7 +277,7 @@ inline device_memory_resource* set_current_device_resource(device_memory_resourc
 }
 
 /**
- * @brief Get the `device_async_resource_ref for the specified device.
+ * @brief Get the `device_async_resource_ref` for the specified device.
  *
  * Returns a `device_async_resource_ref` for the specified device. The initial resource_ref
  * references a `cuda_memory_resource`.
@@ -287,7 +287,7 @@ inline device_memory_resource* set_current_device_resource(device_memory_resourc
  *
  * This function is thread-safe with respect to concurrent calls to `set_per_device_resource_ref`,
  * `get_per_device_resource_ref`, `get_current_device_resource_ref`,
- * `set_current_device_resource_ref` and `reset_current_device_resource_ref. Concurrent calls to any
+ * `set_current_device_resource_ref` and `reset_current_device_resource_ref`. Concurrent calls to any
  * of these functions will result in a valid state, but the order of execution is undefined.
  *
  * @note The returned `device_async_resource_ref` should only be used when CUDA device `device_id`

From b5a831ab4f4541dd9d19e53579a4fc130d292e7a Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 21 Aug 2024 02:16:49 +0000
Subject: [PATCH 07/24] Explicit RMM_EXPORT on all functions that return a
 function-local static.

---
 include/rmm/mr/device/per_device_resource.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index a2ca1819e..7d747395f 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -104,7 +104,7 @@ namespace detail {
  *
  * @return Pointer to the static cuda_memory_resource used as the initial, default resource
  */
-inline device_memory_resource* initial_resource()
+RMM_EXPORT inline device_memory_resource* initial_resource()
 {
   static cuda_memory_resource mr{};
   return &mr;
@@ -113,7 +113,7 @@ inline device_memory_resource* initial_resource()
 /**
  * @briefreturn{Reference to the lock}
  */
-inline std::mutex& map_lock()
+RMM_EXPORT inline std::mutex& map_lock()
 {
   static std::mutex map_lock;
   return map_lock;
@@ -287,8 +287,8 @@ inline device_memory_resource* set_current_device_resource(device_memory_resourc
  *
  * This function is thread-safe with respect to concurrent calls to `set_per_device_resource_ref`,
  * `get_per_device_resource_ref`, `get_current_device_resource_ref`,
- * `set_current_device_resource_ref` and `reset_current_device_resource_ref`. Concurrent calls to any
- * of these functions will result in a valid state, but the order of execution is undefined.
+ * `set_current_device_resource_ref` and `reset_current_device_resource_ref`. Concurrent calls to
+ * any of these functions will result in a valid state, but the order of execution is undefined.
  *
  * @note The returned `device_async_resource_ref` should only be used when CUDA device `device_id`
  * is the current device  (e.g. set using `cudaSetDevice()`). The behavior of a

From 812d08d115203e78705f01d252182db7753963ec Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 21 Aug 2024 02:20:46 +0000
Subject: [PATCH 08/24] Doc: ref and ptr versions are not interchangeable.

---
 include/rmm/mr/device/per_device_resource.hpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index 7d747395f..6b78ee463 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -60,8 +60,10 @@
  * `device_memory_resource`. The `set_per_device_resource_ref`, `get_per_device_resource_ref`,
  * `get_current_device_resource_ref`, `set_current_device_resource_ref`, and
  * `reset_current_device_resource_ref` functions provide the same functionality as their
- * `device_memory_resource` counterparts, but with `device_async_resource_ref` objects.
- * The raw pointer versions are expected to be deprecated and removed in a future release.
+ * `device_memory_resource` counterparts, but with `device_async_resource_ref` objects. The raw
+ * pointer versions and the `resource_ref` versions maintain distinc state and are not
+ * interchangeable. The raw pointer versions are expected to be deprecated and removed in a future
+ * release.
  *
  * Creating a device_memory_resource for each device requires care to set the current device
  * before creating each resource, and to maintain the lifetime of the resources as long as they

From 1c43d63da1b6776a83ec9473b7f2871299af4916 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 22 Aug 2024 03:12:40 +0000
Subject: [PATCH 09/24] Use resource_ref in MR adaptors and related tests.

---
 .../mr/device/aligned_resource_adaptor.hpp    | 53 +++++++++++++------
 .../rmm/mr/device/arena_memory_resource.hpp   | 35 ++++++++++--
 .../mr/device/callback_memory_resource.hpp    | 13 ++---
 include/rmm/mr/device/detail/arena.hpp        | 31 ++++++-----
 .../failure_callback_resource_adaptor.hpp     | 42 ++++++++++-----
 .../mr/device/limiting_resource_adaptor.hpp   | 42 ++++++++++-----
 .../rmm/mr/device/pool_memory_resource.hpp    | 41 +++++++++++---
 .../mr/device/statistics_resource_adaptor.hpp | 27 +++++-----
 .../mr/device/thrust_allocator_adaptor.hpp    |  2 +-
 .../mr/device/tracking_resource_adaptor.hpp   | 37 ++++++++-----
 tests/container_multidevice_tests.cu          | 24 ++++-----
 tests/cuda_stream_tests.cpp                   |  2 +-
 tests/device_check_resource_adaptor.hpp       | 16 +++---
 tests/mock_resource.hpp                       |  5 ++
 tests/mr/device/aligned_mr_tests.cpp          |  4 +-
 tests/mr/device/arena_mr_tests.cpp            | 37 +++++++------
 tests/mr/device/callback_mr_tests.cpp         | 37 +++++++------
 tests/mr/device/limiting_mr_tests.cpp         | 14 ++---
 tests/mr/device/pool_mr_tests.cpp             | 40 +++++---------
 tests/mr/device/statistics_mr_tests.cpp       | 20 +++----
 tests/mr/device/tracking_mr_tests.cpp         | 24 ++++-----
 tests/mr/host/mr_ref_tests.cpp                |  3 ++
 22 files changed, 335 insertions(+), 214 deletions(-)

diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp
index a91056dfa..088d28413 100644
--- a/include/rmm/mr/device/aligned_resource_adaptor.hpp
+++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp
@@ -19,6 +19,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cstddef>
@@ -65,12 +66,36 @@ class aligned_resource_adaptor final : public device_memory_resource {
    * @param alignment_threshold Only allocations with a size larger than or equal to this threshold
    * are aligned.
    */
-  explicit aligned_resource_adaptor(Upstream* upstream,
+  explicit aligned_resource_adaptor(device_async_resource_ref upstream,
                                     std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,
                                     std::size_t alignment_threshold = default_alignment_threshold)
     : upstream_{upstream}, alignment_{alignment}, alignment_threshold_{alignment_threshold}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+    RMM_EXPECTS(rmm::is_supported_alignment(alignment),
+                "Allocation alignment is not a power of 2.");
+  }
+
+  /**
+   * @brief Construct an aligned resource adaptor using `upstream` to satisfy allocation requests.
+   *
+   * @throws rmm::logic_error if `upstream == nullptr`
+   * @throws rmm::logic_error if `allocation_alignment` is not a power of 2
+   *
+   * @param upstream The resource used for allocating/deallocating device memory.
+   * @param alignment The size used for allocation alignment.
+   * @param alignment_threshold Only allocations with a size larger than or equal to this threshold
+   * are aligned.
+   */
+  explicit aligned_resource_adaptor(Upstream* upstream,
+                                    std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,
+                                    std::size_t alignment_threshold = default_alignment_threshold)
+    : upstream_{[upstream]() {
+        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+        return device_async_resource_ref{*upstream};
+      }()},
+      alignment_{alignment},
+      alignment_threshold_{alignment_threshold}
+  {
     RMM_EXPECTS(rmm::is_supported_alignment(alignment),
                 "Allocation alignment is not a power of 2.");
   }
@@ -90,11 +115,6 @@ class aligned_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
   /**
    * @brief The default alignment used by the adaptor.
    */
@@ -104,8 +124,8 @@ class aligned_resource_adaptor final : public device_memory_resource {
   using lock_guard = std::lock_guard<std::mutex>;
 
   /**
-   * @brief Allocates memory of size at least `bytes` using the upstream resource with the specified
-   * alignment.
+   * @brief Allocates memory of size at least `bytes` using the upstream resource with the
+   * specified alignment.
    *
    * @throws rmm::bad_alloc if the requested allocation could not be fulfilled
    * by the upstream resource.
@@ -117,10 +137,10 @@ class aligned_resource_adaptor final : public device_memory_resource {
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
     if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
-      return upstream_->allocate(bytes, stream);
+      return get_upstream_resource().allocate_async(bytes, 1, stream);
     }
     auto const size = upstream_allocation_size(bytes);
-    void* pointer   = upstream_->allocate(size, stream);
+    void* pointer   = get_upstream_resource().allocate_async(size, 1, stream);
     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
     auto const address         = reinterpret_cast<std::size_t>(pointer);
     auto const aligned_address = rmm::align_up(address, alignment_);
@@ -143,7 +163,7 @@ class aligned_resource_adaptor final : public device_memory_resource {
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
     if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
-      upstream_->deallocate(ptr, bytes, stream);
+      get_upstream_resource().deallocate_async(ptr, bytes, 1, stream);
     } else {
       {
         lock_guard lock(mtx_);
@@ -153,7 +173,7 @@ class aligned_resource_adaptor final : public device_memory_resource {
           pointers_.erase(iter);
         }
       }
-      upstream_->deallocate(ptr, upstream_allocation_size(bytes), stream);
+      get_upstream_resource().deallocate_async(ptr, upstream_allocation_size(bytes), 1, stream);
     }
   }
 
@@ -174,8 +194,8 @@ class aligned_resource_adaptor final : public device_memory_resource {
   }
 
   /**
-   * @brief Calculate the allocation size needed from upstream to account for alignments of both the
-   * size and the base pointer.
+   * @brief Calculate the allocation size needed from upstream to account for alignments of both
+   * the size and the base pointer.
    *
    * @param bytes The requested allocation size.
    * @return Allocation size needed from upstream to align both the size and the base pointer.
@@ -186,7 +206,8 @@ class aligned_resource_adaptor final : public device_memory_resource {
     return aligned_size + alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT;
   }
 
-  Upstream* upstream_;  ///< The upstream resource used for satisfying allocation requests
+  /// The upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_{rmm::mr::get_current_device_resource_ref()};
   std::unordered_map<void*, void*> pointers_;  ///< Map of aligned pointers to upstream pointers.
   std::size_t alignment_;                      ///< The size used for allocation alignment
   std::size_t alignment_threshold_;  ///< The size above which allocations should be aligned
diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp
index b274e0c18..0860a46d5 100644
--- a/include/rmm/mr/device/arena_memory_resource.hpp
+++ b/include/rmm/mr/device/arena_memory_resource.hpp
@@ -21,6 +21,7 @@
 #include <rmm/logger.hpp>
 #include <rmm/mr/device/detail/arena.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cuda_runtime_api.h>
 
@@ -80,6 +81,26 @@ namespace rmm::mr {
 template <typename Upstream>
 class arena_memory_resource final : public device_memory_resource {
  public:
+  /**
+   * @brief Construct an `arena_memory_resource`.
+   *
+   * @param upstream_mr The memory resource from which to allocate blocks for the global arena.
+   * @param arena_size Size in bytes of the global arena. Defaults to half of the available
+   * memory on the current device.
+   * @param dump_log_on_failure If true, dump memory log when running out of memory.
+   */
+  explicit arena_memory_resource(device_async_resource_ref upstream_mr,
+                                 std::optional<std::size_t> arena_size = std::nullopt,
+                                 bool dump_log_on_failure              = false)
+    : global_arena_{upstream_mr, arena_size}, dump_log_on_failure_{dump_log_on_failure}
+  {
+    if (dump_log_on_failure_) {
+      logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log");
+      // Set the level to `debug` for more detailed output.
+      logger_->set_level(spdlog::level::info);
+    }
+  }
+
   /**
    * @brief Construct an `arena_memory_resource`.
    *
@@ -93,7 +114,13 @@ class arena_memory_resource final : public device_memory_resource {
   explicit arena_memory_resource(Upstream* upstream_mr,
                                  std::optional<std::size_t> arena_size = std::nullopt,
                                  bool dump_log_on_failure              = false)
-    : global_arena_{upstream_mr, arena_size}, dump_log_on_failure_{dump_log_on_failure}
+    : global_arena_{[upstream_mr]() {
+                      RMM_EXPECTS(upstream_mr != nullptr,
+                                  "Unexpected null upstream memory resource.");
+                      return device_async_resource_ref{*upstream_mr};
+                    }(),
+                    arena_size},
+      dump_log_on_failure_{dump_log_on_failure}
   {
     if (dump_log_on_failure_) {
       logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log");
@@ -111,8 +138,8 @@ class arena_memory_resource final : public device_memory_resource {
   arena_memory_resource& operator=(arena_memory_resource&&) noexcept = delete;
 
  private:
-  using global_arena = rmm::mr::detail::arena::global_arena<Upstream>;
-  using arena        = rmm::mr::detail::arena::arena<Upstream>;
+  using global_arena = rmm::mr::detail::arena::global_arena;
+  using arena        = rmm::mr::detail::arena::arena;
 
   /**
    * @brief Allocates memory of size at least `bytes`.
@@ -272,7 +299,7 @@ class arena_memory_resource final : public device_memory_resource {
       std::unique_lock lock(map_mtx_);
       auto thread_arena = std::make_shared<arena>(global_arena_);
       thread_arenas_.emplace(thread_id, thread_arena);
-      thread_local detail::arena::arena_cleaner<Upstream> cleaner{thread_arena};
+      thread_local detail::arena::arena_cleaner cleaner{thread_arena};
       return *thread_arena;
     }
   }
diff --git a/include/rmm/mr/device/callback_memory_resource.hpp b/include/rmm/mr/device/callback_memory_resource.hpp
index 1483925de..74af8679a 100644
--- a/include/rmm/mr/device/callback_memory_resource.hpp
+++ b/include/rmm/mr/device/callback_memory_resource.hpp
@@ -84,12 +84,13 @@ class callback_memory_resource final : public device_memory_resource {
    * It is the caller's responsibility to maintain the lifetime of the pointed-to data
    * for the duration of the lifetime of the `callback_memory_resource`.
    */
-  callback_memory_resource(allocate_callback_t allocate_callback,
-                           deallocate_callback_t deallocate_callback,
-                           void* allocate_callback_arg   = nullptr,
-                           void* deallocate_callback_arg = nullptr) noexcept
-    : allocate_callback_(allocate_callback),
-      deallocate_callback_(deallocate_callback),
+  callback_memory_resource(
+    allocate_callback_t allocate_callback,
+    deallocate_callback_t deallocate_callback,
+    void* allocate_callback_arg   = nullptr,  // NOLINT(bugprone-easily-swappable-parameters)
+    void* deallocate_callback_arg = nullptr) noexcept
+    : allocate_callback_(std::move(allocate_callback)),
+      deallocate_callback_(std::move(deallocate_callback)),
       allocate_callback_arg_(allocate_callback_arg),
       deallocate_callback_arg_(deallocate_callback_arg)
   {
diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp
index c7965ca34..324b9c928 100644
--- a/include/rmm/mr/device/detail/arena.hpp
+++ b/include/rmm/mr/device/detail/arena.hpp
@@ -22,6 +22,7 @@
 #include <rmm/detail/error.hpp>
 #include <rmm/detail/logging_assert.hpp>
 #include <rmm/logger.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cuda_runtime_api.h>
 
@@ -492,7 +493,6 @@ inline auto max_free_size(std::set<superblock> const& superblocks)
  * @tparam Upstream Memory resource to use for allocating the arena. Implements
  * rmm::mr::device_memory_resource interface.
  */
-template <typename Upstream>
 class global_arena final {
  public:
   /**
@@ -504,10 +504,9 @@ class global_arena final {
    * @param arena_size Size in bytes of the global arena. Defaults to half of the available memory
    * on the current device.
    */
-  global_arena(Upstream* upstream_mr, std::optional<std::size_t> arena_size)
+  global_arena(device_async_resource_ref upstream_mr, std::optional<std::size_t> arena_size)
     : upstream_mr_{upstream_mr}
   {
-    RMM_EXPECTS(nullptr != upstream_mr_, "Unexpected null upstream pointer.");
     auto const size =
       rmm::align_down(arena_size.value_or(default_size()), rmm::CUDA_ALLOCATION_ALIGNMENT);
     RMM_EXPECTS(size >= superblock::minimum_size,
@@ -528,7 +527,7 @@ class global_arena final {
   ~global_arena()
   {
     std::lock_guard lock(mtx_);
-    upstream_mr_->deallocate(upstream_block_.pointer(), upstream_block_.size());
+    upstream_mr_.deallocate(upstream_block_.pointer(), upstream_block_.size());
   }
 
   /**
@@ -537,7 +536,7 @@ class global_arena final {
    * @param size The size in bytes of the allocation.
    * @return bool True if the allocation should be handled by the global arena.
    */
-  bool handles(std::size_t size) const { return size > superblock::minimum_size; }
+  static bool handles(std::size_t size) { return size > superblock::minimum_size; }
 
   /**
    * @brief Acquire a superblock that can fit a block of the given size.
@@ -608,7 +607,7 @@ class global_arena final {
    * @param stream Stream on which to perform deallocation.
    * @return bool true if the allocation is found, false otherwise.
    */
-  bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream)
+  bool deallocate_async(void* ptr, std::size_t size, cuda_stream_view stream)
   {
     RMM_LOGGING_ASSERT(handles(size));
     stream.synchronize_no_throw();
@@ -690,7 +689,7 @@ class global_arena final {
    * @brief Default size of the global arena if unspecified.
    * @return the default global arena size.
    */
-  constexpr std::size_t default_size() const
+  static std::size_t default_size()
   {
     auto const [free, total] = rmm::available_device_memory();
     return free / 2;
@@ -703,7 +702,7 @@ class global_arena final {
    */
   void initialize(std::size_t size)
   {
-    upstream_block_ = {upstream_mr_->allocate(size), size};
+    upstream_block_ = {upstream_mr_.allocate(size), size};
     superblocks_.emplace(upstream_block_.pointer(), size);
   }
 
@@ -775,7 +774,7 @@ class global_arena final {
   }
 
   /// The upstream resource to allocate memory from.
-  Upstream* upstream_mr_;
+  device_async_resource_ref upstream_mr_;
   /// Block allocated from upstream so that it can be quickly freed.
   block upstream_block_;
   /// Address-ordered set of superblocks.
@@ -793,7 +792,6 @@ class global_arena final {
  * @tparam Upstream Memory resource to use for allocating the global arena. Implements
  * rmm::mr::device_memory_resource interface.
  */
-template <typename Upstream>
 class arena {
  public:
   /**
@@ -801,7 +799,7 @@ class arena {
    *
    * @param global_arena The global arena from which to allocate superblocks.
    */
-  explicit arena(global_arena<Upstream>& global_arena) : global_arena_{global_arena} {}
+  explicit arena(global_arena& global_arena) : global_arena_{global_arena} {}
 
   // Disable copy (and move) semantics.
   arena(arena const&)                = delete;
@@ -835,7 +833,9 @@ class arena {
    */
   bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream)
   {
-    if (global_arena_.handles(size) && global_arena_.deallocate(ptr, size, stream)) { return true; }
+    if (global_arena::handles(size) && global_arena_.deallocate_async(ptr, size, stream)) {
+      return true;
+    }
     return deallocate(ptr, size);
   }
 
@@ -959,7 +959,7 @@ class arena {
   }
 
   /// The global arena to allocate superblocks from.
-  global_arena<Upstream>& global_arena_;
+  global_arena& global_arena_;
   /// Acquired superblocks.
   std::set<superblock> superblocks_;
   /// Mutex for exclusive lock.
@@ -974,10 +974,9 @@ class arena {
  * @tparam Upstream Memory resource to use for allocating the global arena. Implements
  * rmm::mr::device_memory_resource interface.
  */
-template <typename Upstream>
 class arena_cleaner {
  public:
-  explicit arena_cleaner(std::shared_ptr<arena<Upstream>> const& arena) : arena_(arena) {}
+  explicit arena_cleaner(std::shared_ptr<arena> const& arena) : arena_(arena) {}
 
   // Disable copy (and move) semantics.
   arena_cleaner(arena_cleaner const&)            = delete;
@@ -995,7 +994,7 @@ class arena_cleaner {
 
  private:
   /// A non-owning pointer to the arena that may need cleaning.
-  std::weak_ptr<arena<Upstream>> arena_;
+  std::weak_ptr<arena> arena_;
 };
 
 }  // namespace rmm::mr::detail::arena
diff --git a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
index 53bc572c2..f1a2178ee 100644
--- a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
+++ b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
@@ -17,6 +17,7 @@
 
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cstddef>
@@ -79,7 +80,7 @@ using failure_callback_t = std::function<bool(std::size_t, void*)>;
  * {
  *   bool retried{false};
  *   failure_callback_adaptor mr{
- *     rmm::mr::get_current_device_resource(), failure_handler, &retried
+ *     rmm::mr::get_current_device_resource_ref(), failure_handler, &retried
  *   };
  *   rmm::mr::set_current_device_resource(&mr);
  * }
@@ -103,12 +104,33 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
    * @param callback Callback function @see failure_callback_t
    * @param callback_arg Extra argument passed to `callback`
    */
-  failure_callback_resource_adaptor(Upstream* upstream,
+  failure_callback_resource_adaptor(device_async_resource_ref upstream,
                                     failure_callback_t callback,
                                     void* callback_arg)
     : upstream_{upstream}, callback_{std::move(callback)}, callback_arg_{callback_arg}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+  }
+
+  /**
+   * @brief Construct a new `failure_callback_resource_adaptor` using `upstream` to satisfy
+   * allocation requests.
+   *
+   * @throws rmm::logic_error if `upstream == nullptr`
+   *
+   * @param upstream The resource used for allocating/deallocating device memory
+   * @param callback Callback function @see failure_callback_t
+   * @param callback_arg Extra argument passed to `callback`
+   */
+  failure_callback_resource_adaptor(Upstream* upstream,
+                                    failure_callback_t callback,
+                                    void* callback_arg)
+    : upstream_{[upstream]() {
+        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+        return device_async_resource_ref{*upstream};
+      }()},
+      callback_{std::move(callback)},
+      callback_arg_{callback_arg}
+  {
   }
 
   failure_callback_resource_adaptor()                                                    = delete;
@@ -128,11 +150,6 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
  private:
   /**
    * @brief Allocates memory of size at least `bytes` using the upstream
@@ -151,7 +168,7 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
 
     while (true) {
       try {
-        ret = upstream_->allocate(bytes, stream);
+        ret = get_upstream_resource().allocate_async(bytes, stream);
         break;
       } catch (exception_type const& e) {
         if (!callback_(bytes, callback_arg_)) { throw; }
@@ -169,7 +186,7 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_->deallocate(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
   }
 
   /**
@@ -183,11 +200,12 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto cast = dynamic_cast<failure_callback_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
-  Upstream* upstream_;  // the upstream resource used for satisfying allocation requests
+  // the upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_{rmm::mr::get_current_device_resource_ref()};
   failure_callback_t callback_;
   void* callback_arg_;
 };
diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp
index 76a5a31c1..933682ead 100644
--- a/include/rmm/mr/device/limiting_resource_adaptor.hpp
+++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp
@@ -18,6 +18,7 @@
 #include <rmm/aligned.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cstddef>
@@ -53,7 +54,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
    * @param allocation_limit Maximum memory allowed for this allocator
    * @param alignment Alignment in bytes for the start of each allocated buffer
    */
-  limiting_resource_adaptor(Upstream* upstream,
+  limiting_resource_adaptor(device_async_resource_ref upstream,
                             std::size_t allocation_limit,
                             std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT)
     : allocation_limit_{allocation_limit},
@@ -61,7 +62,29 @@ class limiting_resource_adaptor final : public device_memory_resource {
       alignment_(alignment),
       upstream_{upstream}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+  }
+
+  /**
+   * @brief Construct a new limiting resource adaptor using `upstream` to satisfy
+   * allocation requests and limiting the total allocation amount possible.
+   *
+   * @throws rmm::logic_error if `upstream == nullptr`
+   *
+   * @param upstream The resource used for allocating/deallocating device memory
+   * @param allocation_limit Maximum memory allowed for this allocator
+   * @param alignment Alignment in bytes for the start of each allocated buffer
+   */
+  limiting_resource_adaptor(Upstream* upstream,
+                            std::size_t allocation_limit,
+                            std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT)
+    : allocation_limit_{allocation_limit},
+      allocated_bytes_(0),
+      alignment_(alignment),
+      upstream_{[upstream]() {
+        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+        return device_async_resource_ref{*upstream};
+      }()}
+  {
   }
 
   limiting_resource_adaptor()                                 = delete;
@@ -81,11 +104,6 @@ class limiting_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
   /**
    * @brief Query the number of bytes that have been allocated. Note that
    * this can not be used to know how large of an allocation is possible due
@@ -126,7 +144,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
     auto const old           = allocated_bytes_.fetch_add(proposed_size);
     if (old + proposed_size <= allocation_limit_) {
       try {
-        return upstream_->allocate(bytes, stream);
+        return get_upstream_resource().allocate_async(bytes, stream);
       } catch (...) {
         allocated_bytes_ -= proposed_size;
         throw;
@@ -147,7 +165,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
     std::size_t allocated_size = rmm::align_up(bytes, alignment_);
-    upstream_->deallocate(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
     allocated_bytes_ -= allocated_size;
   }
 
@@ -162,7 +180,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto const* cast = dynamic_cast<limiting_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
@@ -175,8 +193,8 @@ class limiting_resource_adaptor final : public device_memory_resource {
   // todo: should be some way to ask the upstream...
   std::size_t alignment_;
 
-  Upstream* upstream_;  ///< The upstream resource used for satisfying
-                        ///< allocation requests
+  // The upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_{rmm::mr::get_current_device_resource_ref()};
 };
 
 /**
diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp
index a3a972904..5e76aaf74 100644
--- a/include/rmm/mr/device/pool_memory_resource.hpp
+++ b/include/rmm/mr/device/pool_memory_resource.hpp
@@ -24,6 +24,7 @@
 #include <rmm/mr/device/detail/coalescing_free_list.hpp>
 #include <rmm/mr/device/detail/stream_ordered_memory_resource.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cuda/std/type_traits>
@@ -112,6 +113,34 @@ class pool_memory_resource final
   friend class detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,
                                                       detail::coalescing_free_list>;
 
+  /**
+   * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using
+   * `upstream_mr`.
+   *
+   * @throws rmm::logic_error if `upstream_mr == nullptr`
+   * @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of
+   * pool_memory_resource::allocation_alignment bytes.
+   * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a
+   * multiple of pool_memory_resource::allocation_alignment bytes.
+   *
+   * @param upstream_mr The memory_resource from which to allocate blocks for the pool.
+   * @param initial_pool_size Minimum size, in bytes, of the initial pool.
+   * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all
+   * of the available from the upstream resource.
+   */
+  explicit pool_memory_resource(device_async_resource_ref upstream_mr,
+                                std::size_t initial_pool_size,
+                                std::optional<std::size_t> maximum_pool_size = std::nullopt)
+    : upstream_mr_{upstream_mr}
+  {
+    RMM_EXPECTS(rmm::is_aligned(initial_pool_size, rmm::CUDA_ALLOCATION_ALIGNMENT),
+                "Error, Initial pool size required to be a multiple of 256 bytes");
+    RMM_EXPECTS(rmm::is_aligned(maximum_pool_size.value_or(0), rmm::CUDA_ALLOCATION_ALIGNMENT),
+                "Error, Maximum pool size required to be a multiple of 256 bytes");
+
+    initialize_pool(initial_pool_size, maximum_pool_size);
+  }
+
   /**
    * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using
    * `upstream_mr`.
@@ -132,7 +161,7 @@ class pool_memory_resource final
                                 std::optional<std::size_t> maximum_pool_size = std::nullopt)
     : upstream_mr_{[upstream_mr]() {
         RMM_EXPECTS(nullptr != upstream_mr, "Unexpected null upstream pointer.");
-        return upstream_mr;
+        return device_async_resource_ref{*upstream_mr};
       }()}
   {
     RMM_EXPECTS(rmm::is_aligned(initial_pool_size, rmm::CUDA_ALLOCATION_ALIGNMENT),
@@ -182,16 +211,11 @@ class pool_memory_resource final
   /**
    * @briefreturn{rmm::device_async_resource_ref to the upstream resource}
    */
-  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept
+  [[nodiscard]] device_async_resource_ref get_upstream_resource() const noexcept
   {
     return upstream_mr_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_mr_; }
-
   /**
    * @brief Computes the size of the current pool
    *
@@ -464,7 +488,8 @@ class pool_memory_resource final
   }
 
  private:
-  Upstream* upstream_mr_;  // The "heap" to allocate the pool from
+  // The "heap" to allocate the pool from
+  device_async_resource_ref upstream_mr_;
   std::size_t current_pool_size_{};
   std::optional<std::size_t> maximum_pool_size_{};
 
diff --git a/include/rmm/mr/device/statistics_resource_adaptor.hpp b/include/rmm/mr/device/statistics_resource_adaptor.hpp
index cbf1b87d2..fcb364ea8 100644
--- a/include/rmm/mr/device/statistics_resource_adaptor.hpp
+++ b/include/rmm/mr/device/statistics_resource_adaptor.hpp
@@ -16,6 +16,7 @@
 #pragma once
 
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cstddef>
@@ -111,6 +112,8 @@ class statistics_resource_adaptor final : public device_memory_resource {
     }
   };
 
+  statistics_resource_adaptor(device_async_resource_ref upstream) : upstream_{upstream} {}
+
   /**
    * @brief Construct a new statistics resource adaptor using `upstream` to satisfy
    * allocation requests.
@@ -119,9 +122,12 @@ class statistics_resource_adaptor final : public device_memory_resource {
    *
    * @param upstream The resource used for allocating/deallocating device memory
    */
-  statistics_resource_adaptor(Upstream* upstream) : upstream_{upstream}
+  statistics_resource_adaptor(Upstream* upstream)
+    : upstream_{[upstream]() {
+        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+        return device_async_resource_ref{*upstream};
+      }()}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
   }
 
   statistics_resource_adaptor()                                              = delete;
@@ -141,11 +147,6 @@ class statistics_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
   /**
    * @brief Returns a `counter` struct for this adaptor containing the current,
    * peak, and total number of allocated bytes for this
@@ -224,7 +225,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* ptr = upstream_->allocate(bytes, stream);
+    void* ptr = upstream_.allocate_async(bytes, stream);
 
     // increment the stats
     {
@@ -247,7 +248,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_->deallocate(ptr, bytes, stream);
+    upstream_.deallocate_async(ptr, bytes, stream);
 
     {
       write_lock_t lock(mtx_);
@@ -269,7 +270,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto cast = dynamic_cast<statistics_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
@@ -277,14 +278,14 @@ class statistics_resource_adaptor final : public device_memory_resource {
   // Invariant: the stack always contains at least one entry
   std::stack<std::pair<counter, counter>> counter_stack_{{std::make_pair(counter{}, counter{})}};
   std::shared_mutex mutable mtx_;  // mutex for thread safe access to allocations_
-  Upstream* upstream_;             // the upstream resource used for satisfying allocation requests
+  // the upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_{rmm::mr::get_current_device_resource_ref()};
 };
 
 /**
  * @brief Convenience factory to return a `statistics_resource_adaptor` around the
  * upstream resource `upstream`.
  *
- * @tparam Upstream Type of the upstream `device_memory_resource`.
  * @param upstream Pointer to the upstream resource
  * @return The new statistics resource adaptor
  */
@@ -295,7 +296,7 @@ template <typename Upstream>
   "instead.")]]
 statistics_resource_adaptor<Upstream> make_statistics_adaptor(Upstream* upstream)
 {
-  return statistics_resource_adaptor<Upstream>{upstream};
+  return statistics_resource_adaptor<rmm::mr::device_memory_resource>{upstream};
 }
 
 /** @} */  // end of group
diff --git a/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
index 3bfd65996..bb60ea522 100644
--- a/include/rmm/mr/device/thrust_allocator_adaptor.hpp
+++ b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
@@ -148,7 +148,7 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
 
  private:
   cuda_stream_view _stream{};
-  rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource()};
+  rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource_ref()};
   cuda_device_id _device{get_current_cuda_device()};
 };
 /** @} */  // end of group
diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp
index 0d3046973..f5bc43ee6 100644
--- a/include/rmm/mr/device/tracking_resource_adaptor.hpp
+++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp
@@ -19,6 +19,7 @@
 #include <rmm/detail/stack_trace.hpp>
 #include <rmm/logger.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <fmt/core.h>
@@ -90,10 +91,26 @@ class tracking_resource_adaptor final : public device_memory_resource {
    * @param upstream The resource used for allocating/deallocating device memory
    * @param capture_stacks If true, capture stacks for allocation calls
    */
-  tracking_resource_adaptor(Upstream* upstream, bool capture_stacks = false)
+  tracking_resource_adaptor(device_async_resource_ref upstream, bool capture_stacks = false)
     : capture_stacks_{capture_stacks}, allocated_bytes_{0}, upstream_{upstream}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+  }
+
+  /**
+   * @brief Construct a new tracking resource adaptor using `upstream` to satisfy
+   * allocation requests.
+   *
+   * @throws rmm::logic_error if `upstream == nullptr`
+   *
+   * @param upstream The resource used for allocating/deallocating device memory
+   * @param capture_stacks If true, capture stacks for allocation calls
+   */
+  tracking_resource_adaptor(Upstream* upstream, bool capture_stacks = false)
+    : capture_stacks_{capture_stacks}, allocated_bytes_{0}, upstream_{[upstream]() {
+        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+        return device_async_resource_ref{*upstream};
+      }()}
+  {
   }
 
   tracking_resource_adaptor()                                 = delete;
@@ -113,11 +130,6 @@ class tracking_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
   /**
    * @brief Get the outstanding allocations map
    *
@@ -197,8 +209,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* ptr = upstream_->allocate(bytes, stream);
-
+    void* ptr = upstream_.allocate_async(bytes, stream);
     // track it.
     {
       write_lock_t lock(mtx_);
@@ -218,7 +229,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_->deallocate(ptr, bytes, stream);
+    upstream_.deallocate_async(ptr, bytes, stream);
     {
       write_lock_t lock(mtx_);
 
@@ -263,7 +274,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto cast = dynamic_cast<tracking_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
@@ -271,7 +282,9 @@ class tracking_resource_adaptor final : public device_memory_resource {
   std::map<void*, allocation_info> allocations_;  // map of active allocations
   std::atomic<std::size_t> allocated_bytes_;      // number of bytes currently allocated
   std::shared_mutex mutable mtx_;                 // mutex for thread safe access to allocations_
-  Upstream* upstream_;  // the upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_{
+    rmm::mr::get_current_device_resource_ref()};  // the upstream resource used for satisfying
+                                                  // allocation requests
 };
 
 /**
diff --git a/tests/container_multidevice_tests.cu b/tests/container_multidevice_tests.cu
index e58ba53a2..4d00173ac 100644
--- a/tests/container_multidevice_tests.cu
+++ b/tests/container_multidevice_tests.cu
@@ -42,9 +42,9 @@ TYPED_TEST(ContainerMultiDeviceTest, CreateDestroyDifferentActiveDevice)
   // only run on multidevice systems
   if (num_devices >= 2) {
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
-    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto orig_mr  = rmm::mr::get_current_device_resource_ref();
     auto check_mr = device_check_resource_adaptor{orig_mr};
-    rmm::mr::set_current_device_resource(&check_mr);
+    rmm::mr::set_current_device_resource_ref(check_mr);
 
     {
       if constexpr (std::is_same_v<TypeParam, rmm::device_scalar<int>>) {
@@ -57,7 +57,7 @@ TYPED_TEST(ContainerMultiDeviceTest, CreateDestroyDifferentActiveDevice)
     }
 
     RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0));
-    rmm::mr::set_current_device_resource(orig_mr);
+    rmm::mr::set_current_device_resource_ref(orig_mr);
   }
 }
 
@@ -69,9 +69,9 @@ TYPED_TEST(ContainerMultiDeviceTest, CreateMoveDestroyDifferentActiveDevice)
   // only run on multidevice systems
   if (num_devices >= 2) {
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
-    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto orig_mr  = rmm::mr::get_current_device_resource_ref();
     auto check_mr = device_check_resource_adaptor{orig_mr};
-    rmm::mr::set_current_device_resource(&check_mr);
+    rmm::mr::set_current_device_resource_ref(&check_mr);
 
     {
       auto buf_1 = []() {
@@ -97,7 +97,7 @@ TYPED_TEST(ContainerMultiDeviceTest, CreateMoveDestroyDifferentActiveDevice)
     }
 
     RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0));
-    rmm::mr::set_current_device_resource(orig_mr);
+    rmm::mr::set_current_device_resource_ref(orig_mr);
   }
 }
 
@@ -109,9 +109,9 @@ TYPED_TEST(ContainerMultiDeviceTest, ResizeDifferentActiveDevice)
   // only run on multidevice systems
   if (num_devices >= 2) {
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
-    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto orig_mr  = rmm::mr::get_current_device_resource_ref();
     auto check_mr = device_check_resource_adaptor{orig_mr};
-    rmm::mr::set_current_device_resource(&check_mr);
+    rmm::mr::set_current_device_resource_ref(&check_mr);
 
     if constexpr (not std::is_same_v<TypeParam, rmm::device_scalar<int>>) {
       auto buf = TypeParam(128, rmm::cuda_stream_view{});
@@ -120,7 +120,7 @@ TYPED_TEST(ContainerMultiDeviceTest, ResizeDifferentActiveDevice)
     }
 
     RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0));
-    rmm::mr::set_current_device_resource(orig_mr);
+    rmm::mr::set_current_device_resource_ref(orig_mr);
   }
 }
 
@@ -132,9 +132,9 @@ TYPED_TEST(ContainerMultiDeviceTest, ShrinkDifferentActiveDevice)
   // only run on multidevice systems
   if (num_devices >= 2) {
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
-    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto orig_mr  = rmm::mr::get_current_device_resource_ref();
     auto check_mr = device_check_resource_adaptor{orig_mr};
-    rmm::mr::set_current_device_resource(&check_mr);
+    rmm::mr::set_current_device_resource_ref(&check_mr);
 
     if constexpr (not std::is_same_v<TypeParam, rmm::device_scalar<int>>) {
       auto buf = TypeParam(128, rmm::cuda_stream_view{});
@@ -144,6 +144,6 @@ TYPED_TEST(ContainerMultiDeviceTest, ShrinkDifferentActiveDevice)
     }
 
     RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0));
-    rmm::mr::set_current_device_resource(orig_mr);
+    rmm::mr::set_current_device_resource_ref(orig_mr);
   }
 }
diff --git a/tests/cuda_stream_tests.cpp b/tests/cuda_stream_tests.cpp
index 1cc068434..c4d0a6b9f 100644
--- a/tests/cuda_stream_tests.cpp
+++ b/tests/cuda_stream_tests.cpp
@@ -96,6 +96,6 @@ TEST_F(CudaStreamDeathTest, TestSyncNoThrow)
     // should assert here or in `~cuda_stream()`
     stream_a.synchronize_no_throw();
   };
-  EXPECT_DEATH(test(), "Assertion");
+  EXPECT_DEATH(test(), "");
 }
 #endif
diff --git a/tests/device_check_resource_adaptor.hpp b/tests/device_check_resource_adaptor.hpp
index fcb578fdf..c77c078db 100644
--- a/tests/device_check_resource_adaptor.hpp
+++ b/tests/device_check_resource_adaptor.hpp
@@ -17,13 +17,14 @@
 #include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <gtest/gtest.h>
 
 class device_check_resource_adaptor final : public rmm::mr::device_memory_resource {
  public:
-  device_check_resource_adaptor(rmm::mr::device_memory_resource* upstream)
+  device_check_resource_adaptor(rmm::device_async_resource_ref upstream)
     : device_id{rmm::get_current_cuda_device()}, upstream_(upstream)
   {
   }
@@ -36,11 +37,6 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
     return upstream_;
   }
 
-  /**
-   * @briefreturn{device_memory_resource* to the upstream memory resource}
-   */
-  [[nodiscard]] device_memory_resource* get_upstream() const noexcept { return upstream_; }
-
  private:
   [[nodiscard]] bool check_device_id() const { return device_id == rmm::get_current_cuda_device(); }
 
@@ -48,7 +44,7 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
   {
     bool const is_correct_device = check_device_id();
     EXPECT_TRUE(is_correct_device);
-    if (is_correct_device) { return upstream_->allocate(bytes, stream); }
+    if (is_correct_device) { return get_upstream_resource().allocate_async(bytes, stream); }
     return nullptr;
   }
 
@@ -56,7 +52,7 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
   {
     bool const is_correct_device = check_device_id();
     EXPECT_TRUE(is_correct_device);
-    if (is_correct_device) { upstream_->deallocate(ptr, bytes, stream); }
+    if (is_correct_device) { get_upstream_resource().deallocate_async(ptr, bytes, stream); }
   }
 
   [[nodiscard]] bool do_is_equal(
@@ -64,10 +60,10 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
   {
     if (this == &other) { return true; }
     auto const* cast = dynamic_cast<device_check_resource_adaptor const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
   rmm::cuda_device_id device_id;
-  rmm::mr::device_memory_resource* upstream_{};
+  rmm::device_async_resource_ref upstream_{rmm::mr::get_current_device_resource()};
 };
diff --git a/tests/mock_resource.hpp b/tests/mock_resource.hpp
index e06148d3a..555cf0d74 100644
--- a/tests/mock_resource.hpp
+++ b/tests/mock_resource.hpp
@@ -25,7 +25,12 @@ class mock_resource : public rmm::mr::device_memory_resource {
  public:
   MOCK_METHOD(void*, do_allocate, (std::size_t, cuda_stream_view), (override));
   MOCK_METHOD(void, do_deallocate, (void*, std::size_t, cuda_stream_view), (override));
+  bool operator==(mock_resource const&) const noexcept { return true; }
+  bool operator!=(mock_resource const&) const { return false; }
+  friend void get_property(mock_resource const&, cuda::mr::device_accessible) noexcept {}
   using size_pair = std::pair<std::size_t, std::size_t>;
 };
 
+static_assert(cuda::mr::async_resource_with<mock_resource, cuda::mr::device_accessible>);
+
 }  // namespace rmm::test
diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp
index b9ecbc8ca..85262c29d 100644
--- a/tests/mr/device/aligned_mr_tests.cpp
+++ b/tests/mr/device/aligned_mr_tests.cpp
@@ -59,13 +59,13 @@ TEST(AlignedTest, ThrowOnInvalidAllocationAlignment)
 TEST(AlignedTest, SupportsGetMemInfo)
 {
   mock_resource mock;
-  aligned_mock mr{&mock};
+  aligned_mock mr{mock};
 }
 
 TEST(AlignedTest, DefaultAllocationAlignmentPassthrough)
 {
   mock_resource mock;
-  aligned_mock mr{&mock};
+  aligned_mock mr{mock};
 
   cuda_stream_view stream;
   void* const pointer = int_to_address(123);
diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp
index 1eb38888e..6b7468d74 100644
--- a/tests/mr/device/arena_mr_tests.cpp
+++ b/tests/mr/device/arena_mr_tests.cpp
@@ -15,6 +15,7 @@
  */
 
 #include "../../byte_literals.hpp"
+#include "cuda/stream_ref"
 
 #include <rmm/aligned.hpp>
 #include <rmm/cuda_device.hpp>
@@ -23,6 +24,7 @@
 #include <rmm/mr/device/arena_memory_resource.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
@@ -37,15 +39,22 @@ namespace {
 
 class mock_memory_resource {
  public:
-  MOCK_METHOD(void*, allocate, (std::size_t));
-  MOCK_METHOD(void, deallocate, (void*, std::size_t));
+  MOCK_METHOD(void*, allocate, (std::size_t, std::size_t));
+  MOCK_METHOD(void, deallocate, (void*, std::size_t, std::size_t));
+  MOCK_METHOD(void*, allocate_async, (std::size_t, std::size_t, cuda::stream_ref));
+  MOCK_METHOD(void, deallocate_async, (void*, std::size_t, std::size_t, cuda::stream_ref));
+  bool operator==(mock_memory_resource const&) const noexcept { return true; }
+  bool operator!=(mock_memory_resource const&) const { return false; }
+  friend void get_property(mock_memory_resource const&, cuda::mr::device_accessible) noexcept {}
 };
 
+static_assert(cuda::mr::async_resource_with<mock_memory_resource, cuda::mr::device_accessible>);
+
 using rmm::mr::detail::arena::block;
 using rmm::mr::detail::arena::byte_span;
 using rmm::mr::detail::arena::superblock;
-using global_arena = rmm::mr::detail::arena::global_arena<mock_memory_resource>;
-using arena        = rmm::mr::detail::arena::arena<mock_memory_resource>;
+using global_arena = rmm::mr::detail::arena::global_arena;
+using arena        = rmm::mr::detail::arena::arena;
 using arena_mr     = rmm::mr::arena_memory_resource<rmm::mr::device_memory_resource>;
 using ::testing::Return;
 
@@ -59,9 +68,10 @@ auto const fake_address4 = reinterpret_cast<void*>(superblock::minimum_size * 2)
 struct ArenaTest : public ::testing::Test {
   void SetUp() override
   {
-    EXPECT_CALL(mock_mr, allocate(arena_size)).WillOnce(Return(fake_address3));
-    EXPECT_CALL(mock_mr, deallocate(fake_address3, arena_size));
-    global     = std::make_unique<global_arena>(&mock_mr, arena_size);
+    EXPECT_CALL(mock_mr, allocate(arena_size, ::testing::_)).WillOnce(Return(fake_address3));
+    EXPECT_CALL(mock_mr, deallocate(fake_address3, arena_size, ::testing::_));
+
+    global     = std::make_unique<global_arena>(mock_mr, arena_size);
     per_thread = std::make_unique<arena>(*global);
   }
 
@@ -293,13 +303,6 @@ TEST_F(ArenaTest, SuperblockMaxFreeSizeWhenFull)  // NOLINT
 /**
  * Test global_arena.
  */
-
-TEST_F(ArenaTest, GlobalArenaNullUpstream)  // NOLINT
-{
-  auto construct_nullptr = []() { global_arena global{nullptr, std::nullopt}; };
-  EXPECT_THROW(construct_nullptr(), rmm::logic_error);  // NOLINT(cppcoreguidelines-avoid-goto)
-}
-
 TEST_F(ArenaTest, GlobalArenaAcquire)  // NOLINT
 {
   auto const sblk = global->acquire(256);
@@ -378,7 +381,7 @@ TEST_F(ArenaTest, GlobalArenaDeallocate)  // NOLINT
 {
   auto* ptr = global->allocate(superblock::minimum_size * 2);
   EXPECT_EQ(ptr, fake_address3);
-  global->deallocate(ptr, superblock::minimum_size * 2, {});
+  global->deallocate_async(ptr, superblock::minimum_size * 2, {});
   ptr = global->allocate(superblock::minimum_size * 2);
   EXPECT_EQ(ptr, fake_address3);
 }
@@ -387,8 +390,8 @@ TEST_F(ArenaTest, GlobalArenaDeallocateAlignUp)  // NOLINT
 {
   auto* ptr  = global->allocate(superblock::minimum_size + 256);
   auto* ptr2 = global->allocate(superblock::minimum_size + 512);
-  global->deallocate(ptr, superblock::minimum_size + 256, {});
-  global->deallocate(ptr2, superblock::minimum_size + 512, {});
+  global->deallocate_async(ptr, superblock::minimum_size + 256, {});
+  global->deallocate_async(ptr2, superblock::minimum_size + 512, {});
   EXPECT_EQ(global->allocate(arena_size), fake_address3);
 }
 
diff --git a/tests/mr/device/callback_mr_tests.cpp b/tests/mr/device/callback_mr_tests.cpp
index 34a2cc8cc..445e0aed5 100644
--- a/tests/mr/device/callback_mr_tests.cpp
+++ b/tests/mr/device/callback_mr_tests.cpp
@@ -36,47 +36,50 @@ using ::testing::_;
 
 TEST(CallbackTest, TestCallbacksAreInvoked)
 {
-  auto base_mr = mock_resource();
+  auto base_mr  = mock_resource();
+  auto base_ref = device_async_resource_ref{base_mr};
   EXPECT_CALL(base_mr, do_allocate(10_MiB, cuda_stream_view{})).Times(1);
   EXPECT_CALL(base_mr, do_deallocate(_, 10_MiB, cuda_stream_view{})).Times(1);
 
   auto allocate_callback = [](std::size_t size, cuda_stream_view stream, void* arg) {
-    auto base_mr = static_cast<rmm::mr::device_memory_resource*>(arg);
-    return base_mr->allocate(size, stream);
+    auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
+    return base_mr.allocate_async(size, stream);
   };
   auto deallocate_callback = [](void* ptr, std::size_t size, cuda_stream_view stream, void* arg) {
-    auto base_mr = static_cast<rmm::mr::device_memory_resource*>(arg);
-    base_mr->deallocate(ptr, size, stream);
+    auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
+    base_mr.deallocate_async(ptr, size, stream);
   };
   auto mr =
-    rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, &base_mr, &base_mr);
-  auto ptr = mr.allocate(10_MiB);
-  mr.deallocate(ptr, 10_MiB);
+    rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, &base_ref, &base_ref);
+  auto const size = std::size_t{10_MiB};
+  auto* ptr       = mr.allocate(size);
+  mr.deallocate(ptr, size);
 }
 
 TEST(CallbackTest, LoggingTest)
 {
   testing::internal::CaptureStdout();
 
-  auto base_mr           = rmm::mr::get_current_device_resource();
+  auto base_mr           = rmm::mr::get_current_device_resource_ref();
   auto allocate_callback = [](std::size_t size, cuda_stream_view stream, void* arg) {
     std::cout << "Allocating " << size << " bytes" << std::endl;
-    auto base_mr = static_cast<rmm::mr::device_memory_resource*>(arg);
-    return base_mr->allocate(size, stream);
+    auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
+    return base_mr.allocate_async(size, stream);
   };
 
   auto deallocate_callback = [](void* ptr, std::size_t size, cuda_stream_view stream, void* arg) {
     std::cout << "Deallocating " << size << " bytes" << std::endl;
-    auto base_mr = static_cast<rmm::mr::device_memory_resource*>(arg);
-    base_mr->deallocate(ptr, size, stream);
+    auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
+    base_mr.deallocate_async(ptr, size, stream);
   };
   auto mr =
-    rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, base_mr, base_mr);
-  auto ptr = mr.allocate(10_MiB);
-  mr.deallocate(ptr, 10_MiB);
+    rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, &base_mr, &base_mr);
+  auto const size = std::size_t{10_MiB};
+  auto* ptr       = mr.allocate(size);
+  mr.deallocate(ptr, size);
 
   std::string output = testing::internal::GetCapturedStdout();
-  std::string expect = fmt::format("Allocating {} bytes\nDeallocating {} bytes\n", 10_MiB, 10_MiB);
+  std::string expect = fmt::format("Allocating {} bytes\nDeallocating {} bytes\n", size, size);
   ASSERT_EQ(expect, output);
 }
 
diff --git a/tests/mr/device/limiting_mr_tests.cpp b/tests/mr/device/limiting_mr_tests.cpp
index 777ce9428..3a6178c95 100644
--- a/tests/mr/device/limiting_mr_tests.cpp
+++ b/tests/mr/device/limiting_mr_tests.cpp
@@ -25,19 +25,19 @@
 namespace rmm::test {
 namespace {
 
-using Limiting_adaptor = rmm::mr::limiting_resource_adaptor<rmm::mr::device_memory_resource>;
+using limiting_adaptor = rmm::mr::limiting_resource_adaptor<rmm::mr::device_memory_resource>;
 
 TEST(LimitingTest, ThrowOnNullUpstream)
 {
   auto const max_size{5_MiB};
-  auto construct_nullptr = []() { Limiting_adaptor mr{nullptr, max_size}; };
+  auto construct_nullptr = []() { limiting_adaptor mr{nullptr, max_size}; };
   EXPECT_THROW(construct_nullptr(), rmm::logic_error);
 }
 
 TEST(LimitingTest, TooBig)
 {
   auto const max_size{5_MiB};
-  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
   EXPECT_THROW(mr.allocate(max_size + 1), rmm::out_of_memory);
 }
 
@@ -45,15 +45,15 @@ TEST(LimitingTest, UpstreamFailure)
 {
   auto const max_size_1{2_MiB};
   auto const max_size_2{5_MiB};
-  Limiting_adaptor mr1{rmm::mr::get_current_device_resource(), max_size_1};
-  Limiting_adaptor mr2{&mr1, max_size_2};
+  limiting_adaptor mr1{rmm::mr::get_current_device_resource(), max_size_1};
+  limiting_adaptor mr2{&mr1, max_size_2};
   EXPECT_THROW(mr2.allocate(4_MiB), rmm::out_of_memory);
 }
 
 TEST(LimitingTest, UnderLimitDueToFrees)
 {
   auto const max_size{10_MiB};
-  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
   auto const size1{4_MiB};
   auto* ptr1           = mr.allocate(size1);
   auto allocated_bytes = size1;
@@ -81,7 +81,7 @@ TEST(LimitingTest, UnderLimitDueToFrees)
 TEST(LimitingTest, OverLimit)
 {
   auto const max_size{10_MiB};
-  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
   auto const size1{4_MiB};
   auto* ptr1           = mr.allocate(size1);
   auto allocated_bytes = size1;
diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp
index c63a61844..9db63eb1b 100644
--- a/tests/mr/device/pool_mr_tests.cpp
+++ b/tests/mr/device/pool_mr_tests.cpp
@@ -49,19 +49,7 @@ TEST(PoolTest, ThrowMaxLessThanInitial)
   auto max_less_than_initial = []() {
     const auto initial{1024};
     const auto maximum{256};
-    pool_mr mr{rmm::mr::get_current_device_resource(), initial, maximum};
-  };
-  EXPECT_THROW(max_less_than_initial(), rmm::logic_error);
-}
-
-TEST(PoolTest, ReferenceThrowMaxLessThanInitial)
-{
-  // Make sure first argument is enough larger than the second that alignment rounding doesn't
-  // make them equal
-  auto max_less_than_initial = []() {
-    const auto initial{1024};
-    const auto maximum{256};
-    pool_mr mr{*rmm::mr::get_current_device_resource(), initial, maximum};
+    pool_mr mr{rmm::mr::get_current_device_resource_ref(), initial, maximum};
   };
   EXPECT_THROW(max_less_than_initial(), rmm::logic_error);
 }
@@ -72,7 +60,7 @@ TEST(PoolTest, AllocateNinetyPercent)
     auto const [free, total] = rmm::available_device_memory();
     (void)total;
     auto const ninety_percent_pool = rmm::percent_of_free_device_memory(90);
-    pool_mr mr{rmm::mr::get_current_device_resource(), ninety_percent_pool};
+    pool_mr mr{rmm::mr::get_current_device_resource_ref(), ninety_percent_pool};
   };
   EXPECT_NO_THROW(allocate_ninety());
 }
@@ -81,7 +69,7 @@ TEST(PoolTest, TwoLargeBuffers)
 {
   auto two_large = []() {
     [[maybe_unused]] auto const [free, total] = rmm::available_device_memory();
-    pool_mr mr{rmm::mr::get_current_device_resource(), rmm::percent_of_free_device_memory(50)};
+    pool_mr mr{rmm::mr::get_current_device_resource_ref(), rmm::percent_of_free_device_memory(50)};
     auto* ptr1 = mr.allocate(free / 4);
     auto* ptr2 = mr.allocate(free / 4);
     mr.deallocate(ptr1, free / 4);
@@ -116,7 +104,7 @@ TEST(PoolTest, ForceGrowth)
 
 TEST(PoolTest, DeletedStream)
 {
-  pool_mr mr{rmm::mr::get_current_device_resource(), 0};
+  pool_mr mr{rmm::mr::get_current_device_resource_ref(), 0};
   cudaStream_t stream{};  // we don't use rmm::cuda_stream here to make destruction more explicit
   const int size = 10000;
   EXPECT_EQ(cudaSuccess, cudaStreamCreate(&stream));
@@ -129,7 +117,7 @@ TEST(PoolTest, DeletedStream)
 TEST(PoolTest, InitialAndMaxPoolSizeEqual)
 {
   EXPECT_NO_THROW([]() {
-    pool_mr mr(rmm::mr::get_current_device_resource(), 1000192, 1000192);
+    pool_mr mr(rmm::mr::get_current_device_resource_ref(), 1000192, 1000192);
     mr.allocate(1000);
   }());
 }
@@ -138,14 +126,14 @@ TEST(PoolTest, NonAlignedPoolSize)
 {
   EXPECT_THROW(
     []() {
-      pool_mr mr(rmm::mr::get_current_device_resource(), 1000031, 1000192);
+      pool_mr mr(rmm::mr::get_current_device_resource_ref(), 1000031, 1000192);
       mr.allocate(1000);
     }(),
     rmm::logic_error);
 
   EXPECT_THROW(
     []() {
-      pool_mr mr(rmm::mr::get_current_device_resource(), 1000192, 1000200);
+      pool_mr mr(rmm::mr::get_current_device_resource_ref(), 1000192, 1000200);
       mr.allocate(1000);
     }(),
     rmm::logic_error);
@@ -203,18 +191,18 @@ namespace test_properties {
 class fake_async_resource {
  public:
   // To model `async_resource`
-  void* allocate(std::size_t, std::size_t) { return nullptr; }
-  void deallocate(void* ptr, std::size_t, std::size_t) {}
-  void* allocate_async(std::size_t, std::size_t, cuda::stream_ref) { return nullptr; }
-  void deallocate_async(void* ptr, std::size_t, std::size_t, cuda::stream_ref) {}
+  static void* allocate(std::size_t, std::size_t) { return nullptr; }
+  static void deallocate(void* ptr, std::size_t, std::size_t) {}
+  static void* allocate_async(std::size_t, std::size_t, cuda::stream_ref) { return nullptr; }
+  static void deallocate_async(void* ptr, std::size_t, std::size_t, cuda::stream_ref) {}
 
   bool operator==(const fake_async_resource& other) const { return true; }
   bool operator!=(const fake_async_resource& other) const { return false; }
 
  private:
-  void* do_allocate(std::size_t bytes, cuda_stream_view) { return nullptr; }
-  void do_deallocate(void* ptr, std::size_t, cuda_stream_view) {}
-  [[nodiscard]] bool do_is_equal(fake_async_resource const& other) const noexcept { return true; }
+  static void* do_allocate(std::size_t bytes, cuda_stream_view) { return nullptr; }
+  static void do_deallocate(void* ptr, std::size_t, cuda_stream_view) {}
+  [[nodiscard]] static bool do_is_equal(fake_async_resource const& other) noexcept { return true; }
 };
 static_assert(!cuda::has_property<fake_async_resource, cuda::mr::device_accessible>);
 static_assert(!cuda::has_property<rmm::mr::pool_memory_resource<fake_async_resource>,
diff --git a/tests/mr/device/statistics_mr_tests.cpp b/tests/mr/device/statistics_mr_tests.cpp
index 6c5700f0b..1fdf01aa5 100644
--- a/tests/mr/device/statistics_mr_tests.cpp
+++ b/tests/mr/device/statistics_mr_tests.cpp
@@ -40,7 +40,7 @@ TEST(StatisticsTest, ThrowOnNullUpstream)
 
 TEST(StatisticsTest, Empty)
 {
-  statistics_adaptor mr{rmm::mr::get_current_device_resource()};
+  statistics_adaptor mr{rmm::mr::get_current_device_resource_ref()};
 
   EXPECT_EQ(mr.get_bytes_counter().peak, 0);
   EXPECT_EQ(mr.get_bytes_counter().total, 0);
@@ -53,7 +53,7 @@ TEST(StatisticsTest, Empty)
 
 TEST(StatisticsTest, AllFreed)
 {
-  statistics_adaptor mr{rmm::mr::get_current_device_resource()};
+  statistics_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
 
   allocations.reserve(num_allocations);
@@ -71,7 +71,7 @@ TEST(StatisticsTest, AllFreed)
 
 TEST(StatisticsTest, PeakAllocations)
 {
-  statistics_adaptor mr{rmm::mr::get_current_device_resource()};
+  statistics_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
 
   for (std::size_t i = 0; i < num_allocations; ++i) {
@@ -127,9 +127,9 @@ TEST(StatisticsTest, PeakAllocations)
 
 TEST(StatisticsTest, MultiTracking)
 {
-  auto* orig_device_resource = rmm::mr::get_current_device_resource();
+  auto orig_device_resource = rmm::mr::get_current_device_resource_ref();
   statistics_adaptor mr{orig_device_resource};
-  rmm::mr::set_current_device_resource(&mr);
+  rmm::mr::set_current_device_resource_ref(&mr);
 
   std::vector<std::shared_ptr<rmm::device_buffer>> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
@@ -139,8 +139,8 @@ TEST(StatisticsTest, MultiTracking)
 
   EXPECT_EQ(mr.get_allocations_counter().value, 10);
 
-  statistics_adaptor inner_mr{rmm::mr::get_current_device_resource()};
-  rmm::mr::set_current_device_resource(&inner_mr);
+  statistics_adaptor inner_mr{rmm::mr::get_current_device_resource_ref()};
+  rmm::mr::set_current_device_resource_ref(&inner_mr);
 
   for (std::size_t i = 0; i < num_more_allocations; ++i) {
     allocations.emplace_back(
@@ -172,7 +172,7 @@ TEST(StatisticsTest, MultiTracking)
   EXPECT_EQ(inner_mr.get_allocations_counter().peak, 5);
 
   // Reset the current device resource
-  rmm::mr::set_current_device_resource(orig_device_resource);
+  rmm::mr::set_current_device_resource_ref(orig_device_resource);
 }
 
 TEST(StatisticsTest, NegativeInnerTracking)
@@ -180,7 +180,7 @@ TEST(StatisticsTest, NegativeInnerTracking)
   // This tests the unlikely scenario where pointers are deallocated on an inner
   // wrapped memory resource. This can happen if the MR is not saved with the
   // memory pointer
-  statistics_adaptor mr{rmm::mr::get_current_device_resource()};
+  statistics_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
     allocations.push_back(mr.allocate(ten_MiB));
@@ -236,7 +236,7 @@ TEST(StatisticsTest, NegativeInnerTracking)
 
 TEST(StatisticsTest, Nested)
 {
-  statistics_adaptor mr{rmm::mr::get_current_device_resource()};
+  statistics_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   void* a0 = mr.allocate(ten_MiB);
   EXPECT_EQ(mr.get_bytes_counter().value, ten_MiB);
   EXPECT_EQ(mr.get_allocations_counter().value, 1);
diff --git a/tests/mr/device/tracking_mr_tests.cpp b/tests/mr/device/tracking_mr_tests.cpp
index 7c2532c60..45284144f 100644
--- a/tests/mr/device/tracking_mr_tests.cpp
+++ b/tests/mr/device/tracking_mr_tests.cpp
@@ -42,14 +42,14 @@ TEST(TrackingTest, ThrowOnNullUpstream)
 
 TEST(TrackingTest, Empty)
 {
-  tracking_adaptor mr{rmm::mr::get_current_device_resource()};
+  tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   EXPECT_EQ(mr.get_outstanding_allocations().size(), 0);
   EXPECT_EQ(mr.get_allocated_bytes(), 0);
 }
 
 TEST(TrackingTest, AllFreed)
 {
-  tracking_adaptor mr{rmm::mr::get_current_device_resource()};
+  tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   allocations.reserve(num_allocations);
   for (int i = 0; i < num_allocations; ++i) {
@@ -64,7 +64,7 @@ TEST(TrackingTest, AllFreed)
 
 TEST(TrackingTest, AllocationsLeftWithStacks)
 {
-  tracking_adaptor mr{rmm::mr::get_current_device_resource(), true};
+  tracking_adaptor mr{rmm::mr::get_current_device_resource_ref(), true};
   std::vector<void*> allocations;
   allocations.reserve(num_allocations);
   for (int i = 0; i < num_allocations; ++i) {
@@ -82,7 +82,7 @@ TEST(TrackingTest, AllocationsLeftWithStacks)
 
 TEST(TrackingTest, AllocationsLeftWithoutStacks)
 {
-  tracking_adaptor mr{rmm::mr::get_current_device_resource()};
+  tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   allocations.reserve(num_allocations);
   for (int i = 0; i < num_allocations; ++i) {
@@ -101,9 +101,9 @@ TEST(TrackingTest, AllocationsLeftWithoutStacks)
 
 TEST(TrackingTest, MultiTracking)
 {
-  auto* orig_device_resource = rmm::mr::get_current_device_resource();
+  auto orig_device_resource = rmm::mr::get_current_device_resource_ref();
   tracking_adaptor mr{orig_device_resource, true};
-  rmm::mr::set_current_device_resource(&mr);
+  rmm::mr::set_current_device_resource_ref(&mr);
 
   std::vector<std::shared_ptr<rmm::device_buffer>> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
@@ -113,8 +113,8 @@ TEST(TrackingTest, MultiTracking)
 
   EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations);
 
-  tracking_adaptor inner_mr{rmm::mr::get_current_device_resource()};
-  rmm::mr::set_current_device_resource(&inner_mr);
+  tracking_adaptor inner_mr{rmm::mr::get_current_device_resource_ref()};
+  rmm::mr::set_current_device_resource_ref(&inner_mr);
 
   for (std::size_t i = 0; i < num_more_allocations; ++i) {
     allocations.emplace_back(
@@ -141,7 +141,7 @@ TEST(TrackingTest, MultiTracking)
   EXPECT_EQ(inner_mr.get_allocated_bytes(), 0);
 
   // Reset the current device resource
-  rmm::mr::set_current_device_resource(orig_device_resource);
+  rmm::mr::set_current_device_resource_ref(orig_device_resource);
 }
 
 TEST(TrackingTest, NegativeInnerTracking)
@@ -149,7 +149,7 @@ TEST(TrackingTest, NegativeInnerTracking)
   // This tests the unlikely scenario where pointers are deallocated on an inner
   // wrapped memory resource. This can happen if the MR is not saved with the
   // memory pointer
-  tracking_adaptor mr{rmm::mr::get_current_device_resource()};
+  tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
     allocations.push_back(mr.allocate(ten_MiB));
@@ -181,7 +181,7 @@ TEST(TrackingTest, NegativeInnerTracking)
 
 TEST(TrackingTest, DeallocWrongBytes)
 {
-  tracking_adaptor mr{rmm::mr::get_current_device_resource()};
+  tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
     allocations.push_back(mr.allocate(ten_MiB));
@@ -207,7 +207,7 @@ TEST(TrackingTest, LogOutstandingAllocations)
   rmm::logger().sinks().push_back(oss_sink);
   auto old_level = rmm::logger().level();
 
-  tracking_adaptor mr{rmm::mr::get_current_device_resource()};
+  tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
     allocations.push_back(mr.allocate(ten_MiB));
diff --git a/tests/mr/host/mr_ref_tests.cpp b/tests/mr/host/mr_ref_tests.cpp
index 8445ab1f5..071739575 100644
--- a/tests/mr/host/mr_ref_tests.cpp
+++ b/tests/mr/host/mr_ref_tests.cpp
@@ -233,14 +233,17 @@ TYPED_TEST(MRRefTest, UnsupportedAlignmentTest)
   for (std::size_t num_trials = 0; num_trials < NUM_TRIALS; ++num_trials) {
     for (std::size_t alignment = MinTestedAlignment; alignment <= MaxTestedAlignment;
          alignment *= TestedAlignmentMultiplier) {
+#ifdef NDEBUG
       auto allocation_size = size_distribution(generator);
       void* ptr{nullptr};
       // An unsupported alignment (like an odd number) should result in an
       // alignment of `alignof(std::max_align_t)`
       auto const bad_alignment = alignment + 1;
+
       EXPECT_NO_THROW(ptr = this->ref.allocate(allocation_size, bad_alignment));
       EXPECT_TRUE(is_aligned(ptr, alignof(std::max_align_t)));
       EXPECT_NO_THROW(this->ref.deallocate(ptr, allocation_size, bad_alignment));
+#endif
     }
   }
 }

From bd70c4284f1c8358f44d8935caaeea93f5d39834 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 22 Aug 2024 03:14:00 +0000
Subject: [PATCH 10/24] Use resource_ref in torch_allocator and
 polymorphic_allocator

---
 include/rmm/mr/device/polymorphic_allocator.hpp |  4 ++--
 python/rmm/rmm/_lib/_torch_allocator.cpp        | 12 ++++++++----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/include/rmm/mr/device/polymorphic_allocator.hpp b/include/rmm/mr/device/polymorphic_allocator.hpp
index 863ba6f62..a1152c77e 100644
--- a/include/rmm/mr/device/polymorphic_allocator.hpp
+++ b/include/rmm/mr/device/polymorphic_allocator.hpp
@@ -50,7 +50,7 @@ class polymorphic_allocator {
   using value_type = T;  ///< T, the value type of objects allocated by this allocator
   /**
    * @brief Construct a `polymorphic_allocator` using the return value of
-   * `rmm::mr::get_current_device_resource()` as the underlying memory resource.
+   * `rmm::mr::get_current_device_resource_ref()` as the underlying memory resource.
    *
    */
   polymorphic_allocator() = default;
@@ -114,7 +114,7 @@ class polymorphic_allocator {
 
  private:
   rmm::device_async_resource_ref mr_{
-    get_current_device_resource()};  ///< Underlying resource used for (de)allocation
+    get_current_device_resource_ref()};  ///< Underlying resource used for (de)allocation
 };
 
 /**
diff --git a/python/rmm/rmm/_lib/_torch_allocator.cpp b/python/rmm/rmm/_lib/_torch_allocator.cpp
index dc92e4639..64885cc2f 100644
--- a/python/rmm/rmm/_lib/_torch_allocator.cpp
+++ b/python/rmm/rmm/_lib/_torch_allocator.cpp
@@ -39,8 +39,9 @@ extern "C" void* allocate(std::size_t size, int device, void* stream)
 {
   rmm::cuda_device_id const device_id{device};
   rmm::cuda_set_device_raii with_device{device_id};
-  auto mr = rmm::mr::get_per_device_resource(device_id);
-  return mr->allocate(size, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
+  auto mr = rmm::mr::get_per_device_resource_ref(device_id);
+  return mr.allocate_async(
+    size, rmm::CUDA_ALLOCATION_ALIGNMENT, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
 }
 
 /**
@@ -55,6 +56,9 @@ extern "C" void deallocate(void* ptr, std::size_t size, int device, void* stream
 {
   rmm::cuda_device_id const device_id{device};
   rmm::cuda_set_device_raii with_device{device_id};
-  auto mr = rmm::mr::get_per_device_resource(device_id);
-  mr->deallocate(ptr, size, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
+  auto mr = rmm::mr::get_per_device_resource_ref(device_id);
+  mr.deallocate_async(ptr,
+                      size,
+                      rmm::CUDA_ALLOCATION_ALIGNMENT,
+                      rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
 }

From 00bd3ac131108ae868f173c56408f4165008dae4 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 22 Aug 2024 04:06:45 +0000
Subject: [PATCH 11/24] Make the raw pointer version of set_per_device_resource
 update the resource_ref state as well.

---
 include/rmm/mr/device/per_device_resource.hpp | 44 ++++++++++++++-----
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index 6b78ee463..bc4242874 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -184,6 +184,30 @@ inline device_memory_resource* get_per_device_resource(cuda_device_id device_id)
                               : found->second;
 }
 
+namespace detail {
+
+// The non-thread-safe implementation of `set_per_device_resource_ref`. This exists because
+// we need to call this function from two places: the thread-safe version of
+// `set_per_device_resource_ref` and the thread-safe version of `set_per_device_resource`,
+// both of which take the lock, so we need an implementation that doesn't take the lock.
+inline device_async_resource_ref set_per_device_resource_ref_unsafe(
+  cuda_device_id device_id, device_async_resource_ref new_resource_ref)
+{
+  auto& map          = detail::get_ref_map();
+  auto const old_itr = map.find(device_id.value());
+  // If a resource didn't previously exist for `device_id`, return pointer to initial_resource
+  // Note: because resource_ref is not default-constructible, we can't use std::map::operator[]
+  if (old_itr == map.end()) {
+    map.insert({device_id.value(), new_resource_ref});
+    return device_async_resource_ref{detail::initial_resource()};
+  }
+
+  auto old_resource_ref = old_itr->second;
+  old_itr->second       = new_resource_ref;  // update map directly via iterator
+  return old_resource_ref;
+}
+}  // namespace detail
+
 /**
  * @brief Set the `device_memory_resource` for the specified device.
  *
@@ -214,6 +238,13 @@ inline device_memory_resource* get_per_device_resource(cuda_device_id device_id)
 inline device_memory_resource* set_per_device_resource(cuda_device_id device_id,
                                                        device_memory_resource* new_mr)
 {
+  // Note: even though set_per_device_resource() and set_per_device_resource_ref() are not
+  // interchangeable, we call the latter from the former to maintain resource_ref
+  // state consistent with the resource pointer state. This is necessary because the
+  // Python API still uses the raw pointer API. Once the Python API is updated to use
+  // resource_ref, this call can be removed.
+  detail::set_per_device_resource_ref_unsafe(device_id, new_mr);
+
   std::lock_guard<std::mutex> lock{detail::map_lock()};
   auto& map          = detail::get_map();
   auto const old_itr = map.find(device_id.value());
@@ -341,18 +372,7 @@ inline device_async_resource_ref set_per_device_resource_ref(
   cuda_device_id device_id, device_async_resource_ref new_resource_ref)
 {
   std::lock_guard<std::mutex> lock{detail::ref_map_lock()};
-  auto& map          = detail::get_ref_map();
-  auto const old_itr = map.find(device_id.value());
-  // If a resource didn't previously exist for `device_id`, return pointer to initial_resource
-  // Note: because resource_ref is not default-constructible, we can't use std::map::operator[]
-  if (old_itr == map.end()) {
-    map.insert({device_id.value(), new_resource_ref});
-    return device_async_resource_ref{detail::initial_resource()};
-  }
-
-  auto old_resource_ref = old_itr->second;
-  old_itr->second       = new_resource_ref;  // update map directly via iterator
-  return old_resource_ref;
+  return detail::set_per_device_resource_ref_unsafe(device_id, new_resource_ref);
 }
 
 /**

From d7df60efde1f16f8dfb14dafe3ec83983b125384 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 22 Aug 2024 04:09:26 +0000
Subject: [PATCH 12/24] Update containers and tests to use
 get/set_current_device_resource_ref()

---
 .../device_uvector/device_uvector_bench.cu     | 18 +++++++++---------
 .../multi_stream_allocations_bench.cu          |  4 ++--
 include/rmm/device_buffer.hpp                  | 14 +++++++-------
 include/rmm/device_scalar.hpp                  |  6 +++---
 include/rmm/device_uvector.hpp                 |  4 ++--
 include/rmm/exec_policy.hpp                    |  4 ++--
 .../failure_callback_resource_adaptor.hpp      |  2 +-
 tests/device_buffer_tests.cu                   | 14 +++++++-------
 tests/device_check_resource_adaptor.hpp        |  2 +-
 tests/device_scalar_tests.cpp                  |  2 +-
 tests/device_uvector_tests.cpp                 |  2 +-
 tests/mr/device/aligned_mr_tests.cpp           |  2 +-
 tests/mr/device/arena_mr_tests.cpp             | 12 ++++++------
 tests/mr/device/failure_callback_mr_tests.cpp  |  3 ++-
 tests/mr/device/limiting_mr_tests.cpp          |  8 ++++----
 tests/mr/device/thrust_allocator_tests.cu      | 10 +++++-----
 16 files changed, 54 insertions(+), 53 deletions(-)

diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu
index 36c9183f9..e56926827 100644
--- a/benchmarks/device_uvector/device_uvector_bench.cu
+++ b/benchmarks/device_uvector/device_uvector_bench.cu
@@ -40,7 +40,7 @@ void BM_UvectorSizeConstruction(benchmark::State& state)
   rmm::mr::cuda_memory_resource cuda_mr{};
   rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{
     &cuda_mr, rmm::percent_of_free_device_memory(50)};
-  rmm::mr::set_current_device_resource(&mr);
+  rmm::mr::set_current_device_resource_ref(&mr);
 
   for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
     rmm::device_uvector<std::int32_t> vec(state.range(0), rmm::cuda_stream_view{});
@@ -49,7 +49,7 @@ void BM_UvectorSizeConstruction(benchmark::State& state)
 
   state.SetItemsProcessed(static_cast<std::int64_t>(state.iterations()));
 
-  rmm::mr::set_current_device_resource(nullptr);
+  rmm::mr::reset_current_device_resource_ref();
 }
 
 BENCHMARK(BM_UvectorSizeConstruction)
@@ -62,7 +62,7 @@ void BM_ThrustVectorSizeConstruction(benchmark::State& state)
   rmm::mr::cuda_memory_resource cuda_mr{};
   rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{
     &cuda_mr, rmm::percent_of_free_device_memory(50)};
-  rmm::mr::set_current_device_resource(&mr);
+  rmm::mr::set_current_device_resource_ref(&mr);
 
   for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
     rmm::device_vector<std::int32_t> vec(state.range(0));
@@ -71,7 +71,7 @@ void BM_ThrustVectorSizeConstruction(benchmark::State& state)
 
   state.SetItemsProcessed(static_cast<std::int64_t>(state.iterations()));
 
-  rmm::mr::set_current_device_resource(nullptr);
+  rmm::mr::reset_current_device_resource_ref();
 }
 
 BENCHMARK(BM_ThrustVectorSizeConstruction)
@@ -140,7 +140,7 @@ template <typename Vector>
 void BM_VectorWorkflow(benchmark::State& state)
 {
   rmm::mr::cuda_async_memory_resource cuda_async_mr{};
-  rmm::mr::set_current_device_resource(&cuda_async_mr);
+  rmm::mr::set_current_device_resource_ref(&cuda_async_mr);
 
   rmm::cuda_stream input_stream;
   std::vector<rmm::cuda_stream> streams(4);
@@ -158,7 +158,7 @@ void BM_VectorWorkflow(benchmark::State& state)
   auto const bytes            = num_elements * sizeof(std::int32_t) * num_accesses;
   state.SetBytesProcessed(static_cast<std::int64_t>(state.iterations() * bytes));
 
-  rmm::mr::set_current_device_resource(nullptr);
+  rmm::mr::reset_current_device_resource_ref();
 }
 
 BENCHMARK_TEMPLATE(BM_VectorWorkflow, thrust_vector)  // NOLINT
@@ -167,9 +167,9 @@ BENCHMARK_TEMPLATE(BM_VectorWorkflow, thrust_vector)  // NOLINT
   ->Unit(benchmark::kMicrosecond)
   ->UseManualTime();
 
-// The only difference here is that `rmm::device_vector` uses `rmm::current_device_resource()`
-// for allocation while `thrust::device_vector` uses cudaMalloc/cudaFree. In the benchmarks we use
-// `cuda_async_memory_resource`, which is faster.
+// The only difference here is that `rmm::device_vector` uses
+// `rmm::get_current_device_resource_ref()` for allocation while `thrust::device_vector` uses
+// cudaMalloc/cudaFree. In the benchmarks we use `cuda_async_memory_resource`, which is faster.
 BENCHMARK_TEMPLATE(BM_VectorWorkflow, rmm_vector)  // NOLINT
   ->RangeMultiplier(10)                            // NOLINT
   ->Range(100'000, 100'000'000)                    // NOLINT
diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
index b73ef54f8..86e761c80 100644
--- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
+++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
@@ -75,7 +75,7 @@ static void BM_MultiStreamAllocations(benchmark::State& state, MRFactoryFunc con
 {
   auto mr = factory();
 
-  rmm::mr::set_current_device_resource(mr.get());
+  rmm::mr::set_current_device_resource_ref(mr.get());
 
   auto num_streams = state.range(0);
   auto num_kernels = state.range(1);
@@ -92,7 +92,7 @@ static void BM_MultiStreamAllocations(benchmark::State& state, MRFactoryFunc con
 
   state.SetItemsProcessed(static_cast<int64_t>(state.iterations() * num_kernels));
 
-  rmm::mr::set_current_device_resource(nullptr);
+  rmm::mr::reset_current_device_resource_ref();
 }
 
 inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp
index 4663b5733..64940a78c 100644
--- a/include/rmm/device_buffer.hpp
+++ b/include/rmm/device_buffer.hpp
@@ -40,7 +40,7 @@ namespace rmm {
  *
  * This class allocates untyped and *uninitialized* device memory using a
  * `device_async_resource_ref`. If not explicitly specified, the memory resource
- * returned from `get_current_device_resource()` is used.
+ * returned from `get_current_device_resource_ref()` is used.
  *
  * @note Unlike `std::vector` or `thrust::device_vector`, the device memory
  * allocated by a `device_buffer` is uninitialized. Therefore, it is undefined
@@ -94,7 +94,7 @@ class device_buffer {
   // `__host__ __device__` specifiers to the defaulted constructor when it is called within the
   // context of both host and device functions. Specifically, the `cudf::type_dispatcher` is a host-
   // device function. This causes warnings/errors because this ctor invokes host-only functions.
-  device_buffer() : _mr{rmm::mr::get_current_device_resource()} {}
+  device_buffer() : _mr{rmm::mr::get_current_device_resource_ref()} {}
 
   /**
    * @brief Constructs a new device buffer of `size` uninitialized bytes
@@ -108,7 +108,7 @@ class device_buffer {
    */
   explicit device_buffer(std::size_t size,
                          cuda_stream_view stream,
-                         device_async_resource_ref mr = mr::get_current_device_resource())
+                         device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : _stream{stream}, _mr{mr}
   {
     cuda_set_device_raii dev{_device};
@@ -137,7 +137,7 @@ class device_buffer {
   device_buffer(void const* source_data,
                 std::size_t size,
                 cuda_stream_view stream,
-                device_async_resource_ref mr = mr::get_current_device_resource())
+                device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : _stream{stream}, _mr{mr}
   {
     cuda_set_device_raii dev{_device};
@@ -168,7 +168,7 @@ class device_buffer {
    */
   device_buffer(device_buffer const& other,
                 cuda_stream_view stream,
-                device_async_resource_ref mr = mr::get_current_device_resource())
+                device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : device_buffer{other.data(), other.size(), stream, mr}
   {
   }
@@ -418,8 +418,8 @@ class device_buffer {
   cuda_stream_view _stream{};  ///< Stream to use for device memory deallocation
 
   rmm::device_async_resource_ref _mr{
-    rmm::mr::get_current_device_resource()};  ///< The memory resource used to
-                                              ///< allocate/deallocate device memory
+    rmm::mr::get_current_device_resource_ref()};  ///< The memory resource used to
+                                                  ///< allocate/deallocate device memory
   cuda_device_id _device{get_current_cuda_device()};
 
   /**
diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp
index 47eed78b2..37c607e53 100644
--- a/include/rmm/device_scalar.hpp
+++ b/include/rmm/device_scalar.hpp
@@ -94,7 +94,7 @@ class device_scalar {
    * @param mr Optional, resource with which to allocate.
    */
   explicit device_scalar(cuda_stream_view stream,
-                         device_async_resource_ref mr = mr::get_current_device_resource())
+                         device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : _storage{1, stream, mr}
   {
   }
@@ -117,7 +117,7 @@ class device_scalar {
    */
   explicit device_scalar(value_type const& initial_value,
                          cuda_stream_view stream,
-                         device_async_resource_ref mr = mr::get_current_device_resource())
+                         device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : _storage{1, stream, mr}
   {
     set_value_async(initial_value, stream);
@@ -137,7 +137,7 @@ class device_scalar {
    */
   device_scalar(device_scalar const& other,
                 cuda_stream_view stream,
-                device_async_resource_ref mr = mr::get_current_device_resource())
+                device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : _storage{other._storage, stream, mr}
   {
   }
diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp
index ff6a7d837..111f194c0 100644
--- a/include/rmm/device_uvector.hpp
+++ b/include/rmm/device_uvector.hpp
@@ -125,7 +125,7 @@ class device_uvector {
    */
   explicit device_uvector(std::size_t size,
                           cuda_stream_view stream,
-                          device_async_resource_ref mr = mr::get_current_device_resource())
+                          device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : _storage{elements_to_bytes(size), stream, mr}
   {
   }
@@ -141,7 +141,7 @@ class device_uvector {
    */
   explicit device_uvector(device_uvector const& other,
                           cuda_stream_view stream,
-                          device_async_resource_ref mr = mr::get_current_device_resource())
+                          device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : _storage{other._storage, stream, mr}
   {
   }
diff --git a/include/rmm/exec_policy.hpp b/include/rmm/exec_policy.hpp
index a343afb8b..713094441 100644
--- a/include/rmm/exec_policy.hpp
+++ b/include/rmm/exec_policy.hpp
@@ -56,7 +56,7 @@ class exec_policy : public thrust_exec_policy_t {
    * @param mr The resource to use for allocating temporary memory
    */
   explicit exec_policy(cuda_stream_view stream      = cuda_stream_default,
-                       device_async_resource_ref mr = mr::get_current_device_resource())
+                       device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : thrust_exec_policy_t(
         thrust::cuda::par(mr::thrust_allocator<char>(stream, mr)).on(stream.value()))
   {
@@ -80,7 +80,7 @@ using thrust_exec_policy_nosync_t =
 class exec_policy_nosync : public thrust_exec_policy_nosync_t {
  public:
   explicit exec_policy_nosync(cuda_stream_view stream      = cuda_stream_default,
-                              device_async_resource_ref mr = mr::get_current_device_resource())
+                              device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : thrust_exec_policy_nosync_t(
         thrust::cuda::par_nosync(mr::thrust_allocator<char>(stream, mr)).on(stream.value()))
   {
diff --git a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
index f1a2178ee..0ed274a75 100644
--- a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
+++ b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
@@ -82,7 +82,7 @@ using failure_callback_t = std::function<bool(std::size_t, void*)>;
  *   failure_callback_adaptor mr{
  *     rmm::mr::get_current_device_resource_ref(), failure_handler, &retried
  *   };
- *   rmm::mr::set_current_device_resource(&mr);
+ *   rmm::mr::set_current_device_resource_ref(&mr);
  * }
  * @endcode
  *
diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu
index c095eecf8..5e48504d6 100644
--- a/tests/device_buffer_tests.cu
+++ b/tests/device_buffer_tests.cu
@@ -75,7 +75,7 @@ TYPED_TEST(DeviceBufferTest, DefaultMemoryResource)
   EXPECT_EQ(this->size, buff.size());
   EXPECT_EQ(this->size, buff.ssize());
   EXPECT_EQ(this->size, buff.capacity());
-  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()},
+  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()},
             buff.memory_resource());
   EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream());
 }
@@ -87,7 +87,7 @@ TYPED_TEST(DeviceBufferTest, DefaultMemoryResourceStream)
   EXPECT_NE(nullptr, buff.data());
   EXPECT_EQ(this->size, buff.size());
   EXPECT_EQ(this->size, buff.capacity());
-  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()},
+  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()},
             buff.memory_resource());
   EXPECT_EQ(this->stream, buff.stream());
 }
@@ -121,7 +121,7 @@ TYPED_TEST(DeviceBufferTest, CopyFromRawDevicePointer)
   EXPECT_NE(nullptr, buff.data());
   EXPECT_EQ(this->size, buff.size());
   EXPECT_EQ(this->size, buff.capacity());
-  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()},
+  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()},
             buff.memory_resource());
   EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream());
 
@@ -138,7 +138,7 @@ TYPED_TEST(DeviceBufferTest, CopyFromRawHostPointer)
   EXPECT_NE(nullptr, buff.data());
   EXPECT_EQ(this->size, buff.size());
   EXPECT_EQ(this->size, buff.capacity());
-  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()},
+  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()},
             buff.memory_resource());
   EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream());
   buff.stream().synchronize();
@@ -152,7 +152,7 @@ TYPED_TEST(DeviceBufferTest, CopyFromNullptr)
   EXPECT_EQ(nullptr, buff.data());
   EXPECT_EQ(0, buff.size());
   EXPECT_EQ(0, buff.capacity());
-  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()},
+  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()},
             buff.memory_resource());
   EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream());
 }
@@ -180,7 +180,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor)
   EXPECT_EQ(buff.size(), buff_copy.size());
   EXPECT_EQ(buff.capacity(), buff_copy.capacity());
   EXPECT_EQ(buff_copy.memory_resource(),
-            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()});
+            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()});
   EXPECT_EQ(buff_copy.stream(), rmm::cuda_stream_view{});
 
   EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default),
@@ -223,7 +223,7 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSize)
   // The capacity of the copy should be equal to the `size()` of the original
   EXPECT_EQ(new_size, buff_copy.capacity());
   EXPECT_EQ(buff_copy.memory_resource(),
-            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()});
+            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()});
   EXPECT_EQ(buff_copy.stream(), rmm::cuda_stream_view{});
 
   EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default),
diff --git a/tests/device_check_resource_adaptor.hpp b/tests/device_check_resource_adaptor.hpp
index c77c078db..23b0c4bb1 100644
--- a/tests/device_check_resource_adaptor.hpp
+++ b/tests/device_check_resource_adaptor.hpp
@@ -65,5 +65,5 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
   }
 
   rmm::cuda_device_id device_id;
-  rmm::device_async_resource_ref upstream_{rmm::mr::get_current_device_resource()};
+  rmm::device_async_resource_ref upstream_{rmm::mr::get_current_device_resource_ref()};
 };
diff --git a/tests/device_scalar_tests.cpp b/tests/device_scalar_tests.cpp
index 6f80a5de1..d0c5e327f 100644
--- a/tests/device_scalar_tests.cpp
+++ b/tests/device_scalar_tests.cpp
@@ -38,7 +38,7 @@ struct DeviceScalarTest : public ::testing::Test {
   std::default_random_engine generator{};
   T value{};
   rmm::cuda_stream stream{};
-  rmm::device_async_resource_ref mr{rmm::mr::get_current_device_resource()};
+  rmm::device_async_resource_ref mr{rmm::mr::get_current_device_resource_ref()};
 
   DeviceScalarTest() : value{random_value()} {}
 
diff --git a/tests/device_uvector_tests.cpp b/tests/device_uvector_tests.cpp
index 1c93ef138..90955c24c 100644
--- a/tests/device_uvector_tests.cpp
+++ b/tests/device_uvector_tests.cpp
@@ -39,7 +39,7 @@ TYPED_TEST(TypedUVectorTest, MemoryResource)
 {
   rmm::device_uvector<TypeParam> vec(128, this->stream());
   EXPECT_EQ(vec.memory_resource(),
-            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()});
+            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()});
 }
 
 TYPED_TEST(TypedUVectorTest, ZeroSizeConstructor)
diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp
index 85262c29d..9b90bf751 100644
--- a/tests/mr/device/aligned_mr_tests.cpp
+++ b/tests/mr/device/aligned_mr_tests.cpp
@@ -204,7 +204,7 @@ TEST(AlignedTest, AlignRealPointer)
 {
   auto const alignment{4096};
   auto const threshold{65536};
-  aligned_real mr{rmm::mr::get_current_device_resource(), alignment, threshold};
+  aligned_real mr{rmm::mr::get_current_device_resource_ref(), alignment, threshold};
   void* alloc = mr.allocate(threshold);
   EXPECT_TRUE(rmm::is_pointer_aligned(alloc, alignment));
   mr.deallocate(alloc, threshold);
diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp
index 6b7468d74..9db623394 100644
--- a/tests/mr/device/arena_mr_tests.cpp
+++ b/tests/mr/device/arena_mr_tests.cpp
@@ -482,7 +482,7 @@ TEST_F(ArenaTest, ThrowOnNullUpstream)  // NOLINT
 
 TEST_F(ArenaTest, SizeSmallerThanSuperblockSize)  // NOLINT
 {
-  auto construct_small = []() { arena_mr mr{rmm::mr::get_current_device_resource(), 256}; };
+  auto construct_small = []() { arena_mr mr{rmm::mr::get_current_device_resource_ref(), 256}; };
   // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto)
   EXPECT_THROW(construct_small(), rmm::logic_error);
 }
@@ -493,14 +493,14 @@ TEST_F(ArenaTest, AllocateNinetyPercent)  // NOLINT
     auto const free           = rmm::available_device_memory().first;
     auto const ninety_percent = rmm::align_up(
       static_cast<std::size_t>(static_cast<double>(free) * 0.9), rmm::CUDA_ALLOCATION_ALIGNMENT);
-    arena_mr mr(rmm::mr::get_current_device_resource(), ninety_percent);
+    arena_mr mr(rmm::mr::get_current_device_resource_ref(), ninety_percent);
   }());
 }
 
 TEST_F(ArenaTest, SmallMediumLarge)  // NOLINT
 {
   EXPECT_NO_THROW([]() {  // NOLINT(cppcoreguidelines-avoid-goto)
-    arena_mr mr(rmm::mr::get_current_device_resource());
+    arena_mr mr(rmm::mr::get_current_device_resource_ref());
     auto* small     = mr.allocate(256);
     auto* medium    = mr.allocate(64_MiB);
     auto const free = rmm::available_device_memory().first;
@@ -515,7 +515,7 @@ TEST_F(ArenaTest, Defragment)  // NOLINT
 {
   EXPECT_NO_THROW([]() {  // NOLINT(cppcoreguidelines-avoid-goto)
     auto const arena_size = superblock::minimum_size * 4;
-    arena_mr mr(rmm::mr::get_current_device_resource(), arena_size);
+    arena_mr mr(rmm::mr::get_current_device_resource_ref(), arena_size);
     std::vector<std::thread> threads;
     std::size_t num_threads{4};
     threads.reserve(num_threads);
@@ -542,7 +542,7 @@ TEST_F(ArenaTest, PerThreadToStreamDealloc)  // NOLINT
   // arena that then moved to global arena during a defragmentation
   // and then moved to a stream arena.
   auto const arena_size = superblock::minimum_size * 2;
-  arena_mr mr(rmm::mr::get_current_device_resource(), arena_size);
+  arena_mr mr(rmm::mr::get_current_device_resource_ref(), arena_size);
   // Create an allocation from a per thread arena
   void* thread_ptr = mr.allocate(256, rmm::cuda_stream_per_thread);
   // Create an allocation in a stream arena to force global arena
@@ -568,7 +568,7 @@ TEST_F(ArenaTest, PerThreadToStreamDealloc)  // NOLINT
 
 TEST_F(ArenaTest, DumpLogOnFailure)  // NOLINT
 {
-  arena_mr mr{rmm::mr::get_current_device_resource(), 1_MiB, true};
+  arena_mr mr{rmm::mr::get_current_device_resource_ref(), 1_MiB, true};
 
   {  // make the log interesting
     std::vector<std::thread> threads;
diff --git a/tests/mr/device/failure_callback_mr_tests.cpp b/tests/mr/device/failure_callback_mr_tests.cpp
index 683aee86e..4b3d084d5 100644
--- a/tests/mr/device/failure_callback_mr_tests.cpp
+++ b/tests/mr/device/failure_callback_mr_tests.cpp
@@ -47,7 +47,8 @@ bool failure_handler(std::size_t /*bytes*/, void* arg)
 TEST(FailureCallbackTest, RetryAllocationOnce)
 {
   bool retried{false};
-  failure_callback_adaptor<> mr{rmm::mr::get_current_device_resource(), failure_handler, &retried};
+  failure_callback_adaptor<> mr{
+    rmm::mr::get_current_device_resource_ref(), failure_handler, &retried};
   EXPECT_EQ(retried, false);
   EXPECT_THROW(mr.allocate(512_GiB), std::bad_alloc);
   EXPECT_EQ(retried, true);
diff --git a/tests/mr/device/limiting_mr_tests.cpp b/tests/mr/device/limiting_mr_tests.cpp
index 3a6178c95..58dbfd5f5 100644
--- a/tests/mr/device/limiting_mr_tests.cpp
+++ b/tests/mr/device/limiting_mr_tests.cpp
@@ -37,7 +37,7 @@ TEST(LimitingTest, ThrowOnNullUpstream)
 TEST(LimitingTest, TooBig)
 {
   auto const max_size{5_MiB};
-  limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  limiting_adaptor mr{rmm::mr::get_current_device_resource_ref(), max_size};
   EXPECT_THROW(mr.allocate(max_size + 1), rmm::out_of_memory);
 }
 
@@ -45,7 +45,7 @@ TEST(LimitingTest, UpstreamFailure)
 {
   auto const max_size_1{2_MiB};
   auto const max_size_2{5_MiB};
-  limiting_adaptor mr1{rmm::mr::get_current_device_resource(), max_size_1};
+  limiting_adaptor mr1{rmm::mr::get_current_device_resource_ref(), max_size_1};
   limiting_adaptor mr2{&mr1, max_size_2};
   EXPECT_THROW(mr2.allocate(4_MiB), rmm::out_of_memory);
 }
@@ -53,7 +53,7 @@ TEST(LimitingTest, UpstreamFailure)
 TEST(LimitingTest, UnderLimitDueToFrees)
 {
   auto const max_size{10_MiB};
-  limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  limiting_adaptor mr{rmm::mr::get_current_device_resource_ref(), max_size};
   auto const size1{4_MiB};
   auto* ptr1           = mr.allocate(size1);
   auto allocated_bytes = size1;
@@ -81,7 +81,7 @@ TEST(LimitingTest, UnderLimitDueToFrees)
 TEST(LimitingTest, OverLimit)
 {
   auto const max_size{10_MiB};
-  limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  limiting_adaptor mr{rmm::mr::get_current_device_resource_ref(), max_size};
   auto const size1{4_MiB};
   auto* ptr1           = mr.allocate(size1);
   auto allocated_bytes = size1;
diff --git a/tests/mr/device/thrust_allocator_tests.cu b/tests/mr/device/thrust_allocator_tests.cu
index 91ae396ed..84f599957 100644
--- a/tests/mr/device/thrust_allocator_tests.cu
+++ b/tests/mr/device/thrust_allocator_tests.cu
@@ -37,9 +37,9 @@ namespace {
 struct allocator_test : public mr_ref_test {};
 
 // Disable until we support resource_ref with set_current_device_resource
-/*TEST_P(allocator_test, first)
+TEST_P(allocator_test, first)
 {
-  rmm::mr::set_current_device_resource(this->mr.get());
+  rmm::mr::set_current_device_resource_ref(this->ref);
   auto const num_ints{100};
   rmm::device_vector<int> ints(num_ints, 1);
   EXPECT_EQ(num_ints, thrust::reduce(ints.begin(), ints.end()));
@@ -47,12 +47,12 @@ struct allocator_test : public mr_ref_test {};
 
 TEST_P(allocator_test, defaults)
 {
-  rmm::mr::set_current_device_resource(this->mr.get());
+  rmm::mr::set_current_device_resource_ref(this->ref);
   rmm::mr::thrust_allocator<int> allocator(rmm::cuda_stream_default);
   EXPECT_EQ(allocator.stream(), rmm::cuda_stream_default);
   EXPECT_EQ(allocator.get_upstream_resource(),
-            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()});
-}*/
+            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()});
+}
 
 TEST_P(allocator_test, multi_device)
 {

From 8d2bc627255e9a991092b0d8db269118d984e6b9 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 28 Aug 2024 01:28:02 +0000
Subject: [PATCH 13/24] Remove an utterance of device_memory_resource* in test.

---
 tests/mr/device/adaptor_tests.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/mr/device/adaptor_tests.cpp b/tests/mr/device/adaptor_tests.cpp
index a757a78b0..286daa841 100644
--- a/tests/mr/device/adaptor_tests.cpp
+++ b/tests/mr/device/adaptor_tests.cpp
@@ -129,8 +129,7 @@ TYPED_TEST(AdaptorTest, Equality)
   }
 
   {
-    rmm::mr::device_memory_resource* device_mr = &this->cuda;
-    auto other_mr = aligned_resource_adaptor<rmm::mr::device_memory_resource>{device_mr};
+    auto other_mr = aligned_resource_adaptor<rmm::mr::device_memory_resource>{&this->cuda};
     EXPECT_FALSE(this->mr->is_equal(other_mr));
   }
 }

From 3d2ef083801a70294500d52b444202d8b598b867 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 28 Aug 2024 01:28:42 +0000
Subject: [PATCH 14/24] Remove another unnecessary device_memory_resource*

---
 include/rmm/device_uvector.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp
index 2dcbc01ff..13f566150 100644
--- a/include/rmm/device_uvector.hpp
+++ b/include/rmm/device_uvector.hpp
@@ -48,7 +48,7 @@ namespace RMM_NAMESPACE {
  *
  * Example:
  * @code{.cpp}
- * rmm::mr::device_memory_resource * mr = new my_custom_resource();
+ * auto mr = new my_custom_resource();
  * rmm::cuda_stream_view s{};
  *
  * // Allocates *uninitialized* device memory on stream `s` sufficient for 100 ints using the

From 13172c5ab12d30f6573217802b97d2d58d95234d Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 28 Aug 2024 01:28:56 +0000
Subject: [PATCH 15/24] Copyrights

---
 python/rmm/rmm/_lib/_torch_allocator.cpp | 2 +-
 tests/cuda_stream_tests.cpp              | 2 +-
 tests/device_scalar_tests.cpp            | 2 +-
 tests/mr/device/callback_mr_tests.cpp    | 2 +-
 tests/mr/device/limiting_mr_tests.cpp    | 2 +-
 tests/mr/device/tracking_mr_tests.cpp    | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/rmm/rmm/_lib/_torch_allocator.cpp b/python/rmm/rmm/_lib/_torch_allocator.cpp
index 64885cc2f..bfe94c2d0 100644
--- a/python/rmm/rmm/_lib/_torch_allocator.cpp
+++ b/python/rmm/rmm/_lib/_torch_allocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/tests/cuda_stream_tests.cpp b/tests/cuda_stream_tests.cpp
index c4d0a6b9f..ec7e6c3e9 100644
--- a/tests/cuda_stream_tests.cpp
+++ b/tests/cuda_stream_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/tests/device_scalar_tests.cpp b/tests/device_scalar_tests.cpp
index d0c5e327f..323894a6a 100644
--- a/tests/device_scalar_tests.cpp
+++ b/tests/device_scalar_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/tests/mr/device/callback_mr_tests.cpp b/tests/mr/device/callback_mr_tests.cpp
index 445e0aed5..a56efa60c 100644
--- a/tests/mr/device/callback_mr_tests.cpp
+++ b/tests/mr/device/callback_mr_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/tests/mr/device/limiting_mr_tests.cpp b/tests/mr/device/limiting_mr_tests.cpp
index 58dbfd5f5..e6cc97029 100644
--- a/tests/mr/device/limiting_mr_tests.cpp
+++ b/tests/mr/device/limiting_mr_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/tests/mr/device/tracking_mr_tests.cpp b/tests/mr/device/tracking_mr_tests.cpp
index 45284144f..6eeb6ccc3 100644
--- a/tests/mr/device/tracking_mr_tests.cpp
+++ b/tests/mr/device/tracking_mr_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

From 4884f536fb22e5447ba1898fdf86b54ee2693904 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 29 Aug 2024 01:20:26 +0000
Subject: [PATCH 16/24] Add `to_device_async_resource_ref_checked()` to
 simplify adaptor constructors.

---
 include/rmm/resource_ref.hpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/include/rmm/resource_ref.hpp b/include/rmm/resource_ref.hpp
index 08942a040..6a34a6a47 100644
--- a/include/rmm/resource_ref.hpp
+++ b/include/rmm/resource_ref.hpp
@@ -65,5 +65,21 @@ using host_device_resource_ref =
 using host_device_async_resource_ref =
   cuda::mr::async_resource_ref<cuda::mr::host_accessible, cuda::mr::device_accessible>;
 
+/**
+ * @brief Convert pointer to memory resource into `device_async_resource_ref`, checking for
+ * `nullptr`
+ *
+ * @tparam Resource The type of the memory resource.
+ * @param res A pointer to the memory resource.
+ * @return A `device_async_resource_ref` to the memory resource.
+ * @throws std::logic_error if the memory resource pointer is null.
+ */
+template <class Resource>
+device_async_resource_ref to_device_async_resource_ref_checked(Resource* res)
+{
+  RMM_EXPECTS(nullptr != res, "Unexpected null resource pointer.");
+  return device_async_resource_ref{*res};
+}
+
 /** @} */  // end of group
 }  // namespace RMM_NAMESPACE

From 1bcb81ad80a07703ed06561f45374aa37f651531 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 29 Aug 2024 01:24:12 +0000
Subject: [PATCH 17/24] Use to_device_async_resource_ref_checked() and more
 adaptor tidy / conversion to refs

---
 .../mr/device/aligned_resource_adaptor.hpp    |  7 +-
 .../rmm/mr/device/arena_memory_resource.hpp   |  7 +-
 .../rmm/mr/device/binning_memory_resource.hpp | 28 ++----
 .../failure_callback_resource_adaptor.hpp     |  7 +-
 .../mr/device/fixed_size_memory_resource.hpp  | 50 +++++++---
 .../mr/device/limiting_resource_adaptor.hpp   | 33 +++----
 .../mr/device/logging_resource_adaptor.hpp    | 93 +++++++++++++++----
 include/rmm/mr/device/per_device_resource.hpp |  1 +
 .../rmm/mr/device/pool_memory_resource.hpp    |  5 +-
 .../mr/device/prefetch_resource_adaptor.hpp   | 28 +++---
 .../mr/device/statistics_resource_adaptor.hpp | 19 ++--
 .../device/thread_safe_resource_adaptor.hpp   | 28 +++---
 .../mr/device/tracking_resource_adaptor.hpp   | 16 ++--
 tests/device_check_resource_adaptor.hpp       |  2 +-
 14 files changed, 193 insertions(+), 131 deletions(-)

diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp
index 40d9e7c33..f99348906 100644
--- a/include/rmm/mr/device/aligned_resource_adaptor.hpp
+++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp
@@ -91,10 +91,7 @@ class aligned_resource_adaptor final : public device_memory_resource {
   explicit aligned_resource_adaptor(Upstream* upstream,
                                     std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,
                                     std::size_t alignment_threshold = default_alignment_threshold)
-    : upstream_{[upstream]() {
-        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
-        return device_async_resource_ref{*upstream};
-      }()},
+    : upstream_{to_device_async_resource_ref_checked(upstream)},
       alignment_{alignment},
       alignment_threshold_{alignment_threshold}
   {
@@ -209,7 +206,7 @@ class aligned_resource_adaptor final : public device_memory_resource {
   }
 
   /// The upstream resource used for satisfying allocation requests
-  device_async_resource_ref upstream_{rmm::mr::get_current_device_resource_ref()};
+  device_async_resource_ref upstream_;
   std::unordered_map<void*, void*> pointers_;  ///< Map of aligned pointers to upstream pointers.
   std::size_t alignment_;                      ///< The size used for allocation alignment
   std::size_t alignment_threshold_;  ///< The size above which allocations should be aligned
diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp
index 31d26e98d..417b7d2b4 100644
--- a/include/rmm/mr/device/arena_memory_resource.hpp
+++ b/include/rmm/mr/device/arena_memory_resource.hpp
@@ -116,12 +116,7 @@ class arena_memory_resource final : public device_memory_resource {
   explicit arena_memory_resource(Upstream* upstream_mr,
                                  std::optional<std::size_t> arena_size = std::nullopt,
                                  bool dump_log_on_failure              = false)
-    : global_arena_{[upstream_mr]() {
-                      RMM_EXPECTS(upstream_mr != nullptr,
-                                  "Unexpected null upstream memory resource.");
-                      return device_async_resource_ref{*upstream_mr};
-                    }(),
-                    arena_size},
+    : global_arena_{to_device_async_resource_ref_checked(upstream_mr), arena_size},
       dump_log_on_failure_{dump_log_on_failure}
   {
     if (dump_log_on_failure_) {
diff --git a/include/rmm/mr/device/binning_memory_resource.hpp b/include/rmm/mr/device/binning_memory_resource.hpp
index 773035231..7daa77328 100644
--- a/include/rmm/mr/device/binning_memory_resource.hpp
+++ b/include/rmm/mr/device/binning_memory_resource.hpp
@@ -57,10 +57,7 @@ class binning_memory_resource final : public device_memory_resource {
    * @param upstream_resource The upstream memory resource used to allocate bin pools.
    */
   explicit binning_memory_resource(Upstream* upstream_resource)
-    : upstream_mr_{[upstream_resource]() {
-        RMM_EXPECTS(nullptr != upstream_resource, "Unexpected null upstream pointer.");
-        return upstream_resource;
-      }()}
+    : upstream_mr_{to_device_async_resource_ref_checked(upstream_resource)}
   {
   }
 
@@ -79,10 +76,7 @@ class binning_memory_resource final : public device_memory_resource {
   binning_memory_resource(Upstream* upstream_resource,
                           int8_t min_size_exponent,  // NOLINT(bugprone-easily-swappable-parameters)
                           int8_t max_size_exponent)
-    : upstream_mr_{[upstream_resource]() {
-        RMM_EXPECTS(nullptr != upstream_resource, "Unexpected null upstream pointer.");
-        return upstream_resource;
-      }()}
+    : upstream_mr_{to_device_async_resource_ref_checked(upstream_resource)}
   {
     for (auto i = min_size_exponent; i <= max_size_exponent; i++) {
       add_bin(1 << i);
@@ -102,18 +96,13 @@ class binning_memory_resource final : public device_memory_resource {
   binning_memory_resource& operator=(binning_memory_resource&&)      = delete;
 
   /**
-   * @briefreturn{rmm::device_async_resource_ref to the upstream resource}
+   * @briefreturn{device_async_resource_ref to the upstream resource}
    */
-  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept
+  [[nodiscard]] device_async_resource_ref get_upstream_resource() const noexcept
   {
     return upstream_mr_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_mr_; }
-
   /**
    * @brief Add a bin allocator to this resource
    *
@@ -132,7 +121,7 @@ class binning_memory_resource final : public device_memory_resource {
    */
   void add_bin(std::size_t allocation_size, device_memory_resource* bin_resource = nullptr)
   {
-    allocation_size = rmm::align_up(allocation_size, rmm::CUDA_ALLOCATION_ALIGNMENT);
+    allocation_size = align_up(allocation_size, CUDA_ALLOCATION_ALIGNMENT);
 
     if (nullptr != bin_resource) {
       resource_bins_.insert({allocation_size, bin_resource});
@@ -153,10 +142,10 @@ class binning_memory_resource final : public device_memory_resource {
    * @param bytes Requested allocation size in bytes
    * @return Get the resource reference for the requested size.
    */
-  rmm::device_async_resource_ref get_resource_ref(std::size_t bytes)
+  device_async_resource_ref get_resource_ref(std::size_t bytes)
   {
     auto iter = resource_bins_.lower_bound(bytes);
-    return (iter != resource_bins_.cend()) ? rmm::device_async_resource_ref{iter->second}
+    return (iter != resource_bins_.cend()) ? device_async_resource_ref{iter->second}
                                            : get_upstream_resource();
   }
 
@@ -188,7 +177,8 @@ class binning_memory_resource final : public device_memory_resource {
     get_resource_ref(bytes).deallocate_async(ptr, bytes, stream);
   }
 
-  Upstream* upstream_mr_;  // The upstream memory_resource from which to allocate blocks.
+  device_async_resource_ref
+    upstream_mr_;  // The upstream memory_resource from which to allocate blocks.
 
   std::vector<std::unique_ptr<fixed_size_memory_resource<Upstream>>> owned_bin_resources_;
 
diff --git a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
index d8c90b564..264a5a532 100644
--- a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
+++ b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
@@ -126,10 +126,7 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
   failure_callback_resource_adaptor(Upstream* upstream,
                                     failure_callback_t callback,
                                     void* callback_arg)
-    : upstream_{[upstream]() {
-        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
-        return device_async_resource_ref{*upstream};
-      }()},
+    : upstream_{to_device_async_resource_ref_checked(upstream)},
       callback_{std::move(callback)},
       callback_arg_{callback_arg}
   {
@@ -207,7 +204,7 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
   }
 
   // the upstream resource used for satisfying allocation requests
-  device_async_resource_ref upstream_{rmm::mr::get_current_device_resource_ref()};
+  device_async_resource_ref upstream_;
   failure_callback_t callback_;
   void* callback_arg_;
 };
diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp
index 84cb3b0c6..249af77dd 100644
--- a/include/rmm/mr/device/fixed_size_memory_resource.hpp
+++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp
@@ -65,7 +65,31 @@ class fixed_size_memory_resource
 
   /**
    * @brief Construct a new `fixed_size_memory_resource` that allocates memory from
-   * `upstream_resource`.
+   * `upstream_mr`.
+   *
+   * When the pool of blocks is all allocated, grows the pool by allocating
+   * `blocks_to_preallocate` more blocks from `upstream_mr`.
+   *
+   * @param upstream_mr The device_async_resource_ref from which to allocate blocks for the pool.
+   * @param block_size The size of blocks to allocate.
+   * @param blocks_to_preallocate The number of blocks to allocate to initialize the pool.
+   */
+  explicit fixed_size_memory_resource(
+    device_async_resource_ref upstream_mr,
+    // NOLINTNEXTLINE bugprone-easily-swappable-parameters
+    std::size_t block_size            = default_block_size,
+    std::size_t blocks_to_preallocate = default_blocks_to_preallocate)
+    : upstream_mr_{upstream_mr},
+      block_size_{align_up(block_size, CUDA_ALLOCATION_ALIGNMENT)},
+      upstream_chunk_size_{block_size_ * blocks_to_preallocate}
+  {
+    // allocate initial blocks and insert into free list
+    this->insert_blocks(std::move(blocks_from_upstream(cuda_stream_legacy)), cuda_stream_legacy);
+  }
+
+  /**
+   * @brief Construct a new `fixed_size_memory_resource` that allocates memory from
+   * `upstream_mr`.
    *
    * When the pool of blocks is all allocated, grows the pool by allocating
    * `blocks_to_preallocate` more blocks from `upstream_mr`.
@@ -76,11 +100,12 @@ class fixed_size_memory_resource
    */
   explicit fixed_size_memory_resource(
     Upstream* upstream_mr,
+    // NOLINTNEXTLINE bugprone-easily-swappable-parameters
     std::size_t block_size            = default_block_size,
     std::size_t blocks_to_preallocate = default_blocks_to_preallocate)
-    : upstream_mr_{upstream_mr},
-      block_size_{rmm::align_up(block_size, rmm::CUDA_ALLOCATION_ALIGNMENT)},
-      upstream_chunk_size_{block_size * blocks_to_preallocate}
+    : upstream_mr_{to_device_async_resource_ref_checked(upstream_mr)},
+      block_size_{align_up(block_size, CUDA_ALLOCATION_ALIGNMENT)},
+      upstream_chunk_size_{block_size_ * blocks_to_preallocate}
   {
     // allocate initial blocks and insert into free list
     this->insert_blocks(std::move(blocks_from_upstream(cuda_stream_legacy)), cuda_stream_legacy);
@@ -99,18 +124,13 @@ class fixed_size_memory_resource
   fixed_size_memory_resource& operator=(fixed_size_memory_resource&&)      = delete;
 
   /**
-   * @briefreturn{rmm::device_async_resource_ref to the upstream resource}
+   * @briefreturn{device_async_resource_ref to the upstream resource}
    */
-  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept
+  [[nodiscard]] device_async_resource_ref get_upstream_resource() const noexcept
   {
     return upstream_mr_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_mr_; }
-
   /**
    * @brief Get the size of blocks allocated by this memory resource.
    *
@@ -200,7 +220,7 @@ class fixed_size_memory_resource
   {
     // Deallocating a fixed-size block just inserts it in the free list, which is
     // handled by the parent class
-    RMM_LOGGING_ASSERT(rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT) <= block_size_);
+    RMM_LOGGING_ASSERT(align_up(size, CUDA_ALLOCATION_ALIGNMENT) <= block_size_);
     return block_type{ptr};
   }
 
@@ -254,10 +274,10 @@ class fixed_size_memory_resource
   }
 
  private:
-  Upstream* upstream_mr_;  // The resource from which to allocate new blocks
+  device_async_resource_ref upstream_mr_;  // The resource from which to allocate new blocks
 
-  std::size_t const block_size_;           // size of blocks this MR allocates
-  std::size_t const upstream_chunk_size_;  // size of chunks allocated from heap MR
+  std::size_t block_size_;           // size of blocks this MR allocates
+  std::size_t upstream_chunk_size_;  // size of chunks allocated from heap MR
 
   // blocks allocated from heap: so they can be easily freed
   std::vector<block_type> upstream_blocks_;
diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp
index 2e9e08e22..eba352e78 100644
--- a/include/rmm/mr/device/limiting_resource_adaptor.hpp
+++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp
@@ -58,11 +58,11 @@ class limiting_resource_adaptor final : public device_memory_resource {
    */
   limiting_resource_adaptor(device_async_resource_ref upstream,
                             std::size_t allocation_limit,
-                            std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT)
-    : allocation_limit_{allocation_limit},
+                            std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT)
+    : upstream_{upstream},
+      allocation_limit_{allocation_limit},
       allocated_bytes_(0),
-      alignment_(alignment),
-      upstream_{upstream}
+      alignment_(alignment)
   {
   }
 
@@ -78,14 +78,11 @@ class limiting_resource_adaptor final : public device_memory_resource {
    */
   limiting_resource_adaptor(Upstream* upstream,
                             std::size_t allocation_limit,
-                            std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT)
-    : allocation_limit_{allocation_limit},
+                            std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT)
+    : upstream_{to_device_async_resource_ref_checked(upstream)},
+      allocation_limit_{allocation_limit},
       allocated_bytes_(0),
-      alignment_(alignment),
-      upstream_{[upstream]() {
-        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
-        return device_async_resource_ref{*upstream};
-      }()}
+      alignment_(alignment)
   {
   }
 
@@ -99,9 +96,9 @@ class limiting_resource_adaptor final : public device_memory_resource {
     default;  ///< @default_move_assignment{limiting_resource_adaptor}
 
   /**
-   * @briefreturn{rmm::device_async_resource_ref to the upstream resource}
+   * @briefreturn{device_async_resource_ref to the upstream resource}
    */
-  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept
+  [[nodiscard]] device_async_resource_ref get_upstream_resource() const noexcept
   {
     return upstream_;
   }
@@ -142,7 +139,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    auto const proposed_size = rmm::align_up(bytes, alignment_);
+    auto const proposed_size = align_up(bytes, alignment_);
     auto const old           = allocated_bytes_.fetch_add(proposed_size);
     if (old + proposed_size <= allocation_limit_) {
       try {
@@ -166,7 +163,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    std::size_t allocated_size = rmm::align_up(bytes, alignment_);
+    std::size_t allocated_size = align_up(bytes, alignment_);
     get_upstream_resource().deallocate_async(ptr, bytes, stream);
     allocated_bytes_ -= allocated_size;
   }
@@ -186,6 +183,9 @@ class limiting_resource_adaptor final : public device_memory_resource {
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
+  // The upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_;
+
   // maximum bytes this allocator is allowed to allocate.
   std::size_t allocation_limit_;
 
@@ -194,9 +194,6 @@ class limiting_resource_adaptor final : public device_memory_resource {
 
   // todo: should be some way to ask the upstream...
   std::size_t alignment_;
-
-  // The upstream resource used for satisfying allocation requests
-  device_async_resource_ref upstream_{rmm::mr::get_current_device_resource_ref()};
 };
 
 /**
diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp
index 6f6cd816a..08fe4dd88 100644
--- a/include/rmm/mr/device/logging_resource_adaptor.hpp
+++ b/include/rmm/mr/device/logging_resource_adaptor.hpp
@@ -77,10 +77,8 @@ class logging_resource_adaptor final : public device_memory_resource {
   logging_resource_adaptor(Upstream* upstream,
                            std::string const& filename = get_default_filename(),
                            bool auto_flush             = false)
-    : logger_{make_logger(filename)}, upstream_{upstream}
+    : logger_{make_logger(filename)}, upstream_{to_device_async_resource_ref_checked(upstream)}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
-
     init_logger(auto_flush);
   }
 
@@ -99,10 +97,8 @@ class logging_resource_adaptor final : public device_memory_resource {
    * performance.
    */
   logging_resource_adaptor(Upstream* upstream, std::ostream& stream, bool auto_flush = false)
-    : logger_{make_logger(stream)}, upstream_{upstream}
+    : logger_{make_logger(stream)}, upstream_{to_device_async_resource_ref_checked(upstream)}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
-
     init_logger(auto_flush);
   }
 
@@ -123,10 +119,76 @@ class logging_resource_adaptor final : public device_memory_resource {
   logging_resource_adaptor(Upstream* upstream,
                            spdlog::sinks_init_list sinks,
                            bool auto_flush = false)
-    : logger_{make_logger(sinks)}, upstream_{upstream}
+    : logger_{make_logger(sinks)}, upstream_{to_device_async_resource_ref_checked(upstream)}
+  {
+    init_logger(auto_flush);
+  }
+
+  /**
+   * @brief Construct a new logging resource adaptor using `upstream` to satisfy
+   * allocation requests and logging information about each allocation/free to
+   * the file specified by `filename`.
+   *
+   * The logfile will be written using CSV formatting.
+   *
+   * Clears the contents of `filename` if it already exists.
+   *
+   * Creating multiple `logging_resource_adaptor`s with the same `filename` will
+   * result in undefined behavior.
+   *
+   * @throws spdlog::spdlog_ex if opening `filename` failed
+   *
+   * @param upstream The resource_ref used for allocating/deallocating device memory.
+   * @param filename Name of file to write log info. If not specified, retrieves
+   * the file name from the environment variable "RMM_LOG_FILE".
+   * @param auto_flush If true, flushes the log for every (de)allocation. Warning, this will degrade
+   * performance.
+   */
+  logging_resource_adaptor(device_async_resource_ref upstream,
+                           std::string const& filename = get_default_filename(),
+                           bool auto_flush             = false)
+    : logger_{make_logger(filename)}, upstream_{upstream}
+  {
+    init_logger(auto_flush);
+  }
+
+  /**
+   * @brief Construct a new logging resource adaptor using `upstream` to satisfy
+   * allocation requests and logging information about each allocation/free to
+   * the ostream specified by `stream`.
+   *
+   * The logfile will be written using CSV formatting.
+   *
+   * @param upstream The resource_ref used for allocating/deallocating device memory.
+   * @param stream The ostream to write log info.
+   * @param auto_flush If true, flushes the log for every (de)allocation. Warning, this will degrade
+   * performance.
+   */
+  logging_resource_adaptor(device_async_resource_ref upstream,
+                           std::ostream& stream,
+                           bool auto_flush = false)
+    : logger_{make_logger(stream)}, upstream_{upstream}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+    init_logger(auto_flush);
+  }
 
+  /**
+   * @brief Construct a new logging resource adaptor using `upstream` to satisfy
+   * allocation requests and logging information about each allocation/free to
+   * the ostream specified by `stream`.
+   *
+   * The logfile will be written using CSV formatting.
+   *
+   * @param upstream The resource_ref used for allocating/deallocating device memory.
+   * @param sinks A list of logging sinks to which log output will be written.
+   * @param auto_flush If true, flushes the log for every (de)allocation. Warning, this will degrade
+   * performance.
+   */
+  logging_resource_adaptor(device_async_resource_ref upstream,
+                           spdlog::sinks_init_list sinks,
+                           bool auto_flush = false)
+    : logger_{make_logger(sinks)}, upstream_{upstream}
+  {
     init_logger(auto_flush);
   }
 
@@ -147,11 +209,6 @@ class logging_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
   /**
    * @brief Flush logger contents.
    */
@@ -239,7 +296,7 @@ class logging_resource_adaptor final : public device_memory_resource {
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
     try {
-      auto const ptr = upstream_->allocate(bytes, stream);
+      auto const ptr = get_upstream_resource().allocate_async(bytes, stream);
       logger_->info("allocate,{},{},{}", ptr, bytes, fmt::ptr(stream.value()));
       return ptr;
     } catch (...) {
@@ -265,7 +322,7 @@ class logging_resource_adaptor final : public device_memory_resource {
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
     logger_->info("free,{},{},{}", ptr, bytes, fmt::ptr(stream.value()));
-    upstream_->deallocate(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
   }
 
   /**
@@ -279,7 +336,7 @@ class logging_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto const* cast = dynamic_cast<logging_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
@@ -295,8 +352,8 @@ class logging_resource_adaptor final : public device_memory_resource {
 
   std::shared_ptr<spdlog::logger> logger_;  ///< spdlog logger object
 
-  Upstream* upstream_;  ///< The upstream resource used for satisfying
-                        ///< allocation requests
+  device_async_resource_ref upstream_;  ///< The upstream resource used for satisfying
+                                        ///< allocation requests
 };
 
 /**
diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index 9167c4fe6..4198397f4 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -191,6 +191,7 @@ namespace detail {
 // we need to call this function from two places: the thread-safe version of
 // `set_per_device_resource_ref` and the thread-safe version of `set_per_device_resource`,
 // both of which take the lock, so we need an implementation that doesn't take the lock.
+/// @private
 inline device_async_resource_ref set_per_device_resource_ref_unsafe(
   cuda_device_id device_id, device_async_resource_ref new_resource_ref)
 {
diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp
index 4daba5e8e..59a641f05 100644
--- a/include/rmm/mr/device/pool_memory_resource.hpp
+++ b/include/rmm/mr/device/pool_memory_resource.hpp
@@ -161,10 +161,7 @@ class pool_memory_resource final
   explicit pool_memory_resource(Upstream* upstream_mr,
                                 std::size_t initial_pool_size,
                                 std::optional<std::size_t> maximum_pool_size = std::nullopt)
-    : upstream_mr_{[upstream_mr]() {
-        RMM_EXPECTS(nullptr != upstream_mr, "Unexpected null upstream pointer.");
-        return device_async_resource_ref{*upstream_mr};
-      }()}
+    : upstream_mr_{to_device_async_resource_ref_checked(upstream_mr)}
   {
     RMM_EXPECTS(rmm::is_aligned(initial_pool_size, rmm::CUDA_ALLOCATION_ALIGNMENT),
                 "Error, Initial pool size required to be a multiple of 256 bytes");
diff --git a/include/rmm/mr/device/prefetch_resource_adaptor.hpp b/include/rmm/mr/device/prefetch_resource_adaptor.hpp
index 59ce8e036..b38bd0316 100644
--- a/include/rmm/mr/device/prefetch_resource_adaptor.hpp
+++ b/include/rmm/mr/device/prefetch_resource_adaptor.hpp
@@ -41,6 +41,16 @@ namespace mr {
 template <typename Upstream>
 class prefetch_resource_adaptor final : public device_memory_resource {
  public:
+  /**
+   * @brief Construct a new prefetch resource adaptor using `upstream` to satisfy
+   * allocation requests.
+   *
+   * @throws rmm::logic_error if `upstream == nullptr`
+   *
+   * @param upstream The resource_ref used for allocating/deallocating device memory
+   */
+  prefetch_resource_adaptor(device_async_resource_ref upstream) : upstream_{upstream} {}
+
   /**
    * @brief Construct a new prefetch resource adaptor using `upstream` to satisfy
    * allocation requests.
@@ -49,9 +59,9 @@ class prefetch_resource_adaptor final : public device_memory_resource {
    *
    * @param upstream The resource used for allocating/deallocating device memory
    */
-  prefetch_resource_adaptor(Upstream* upstream) : upstream_{upstream}
+  prefetch_resource_adaptor(Upstream* upstream)
+    : upstream_{to_device_async_resource_ref_checked(upstream)}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
   }
 
   prefetch_resource_adaptor()                                            = delete;
@@ -71,11 +81,6 @@ class prefetch_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
  private:
   /**
    * @brief Allocates memory of size at least `bytes` using the upstream
@@ -92,7 +97,7 @@ class prefetch_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* ptr = upstream_->allocate(bytes, stream);
+    void* ptr = get_upstream_resource().allocate_async(bytes, stream);
     rmm::prefetch(ptr, bytes, rmm::get_current_cuda_device(), stream);
     return ptr;
   }
@@ -106,7 +111,7 @@ class prefetch_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_->deallocate(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
   }
 
   /**
@@ -120,11 +125,12 @@ class prefetch_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto cast = dynamic_cast<prefetch_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
-  Upstream* upstream_;  // the upstream resource used for satisfying allocation requests
+  // the upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_;
 };
 
 /** @} */  // end of group
diff --git a/include/rmm/mr/device/statistics_resource_adaptor.hpp b/include/rmm/mr/device/statistics_resource_adaptor.hpp
index bb4361049..025c51aa7 100644
--- a/include/rmm/mr/device/statistics_resource_adaptor.hpp
+++ b/include/rmm/mr/device/statistics_resource_adaptor.hpp
@@ -114,6 +114,12 @@ class statistics_resource_adaptor final : public device_memory_resource {
     }
   };
 
+  /**
+   * @brief Construct a new statistics resource adaptor using `upstream` to satisfy
+   * allocation requests.
+   *
+   * @param upstream The resource_ref used for allocating/deallocating device memory.
+   */
   statistics_resource_adaptor(device_async_resource_ref upstream) : upstream_{upstream} {}
 
   /**
@@ -122,13 +128,10 @@ class statistics_resource_adaptor final : public device_memory_resource {
    *
    * @throws rmm::logic_error if `upstream == nullptr`
    *
-   * @param upstream The resource used for allocating/deallocating device memory
+   * @param upstream The resource used for allocating/deallocating device memory.
    */
   statistics_resource_adaptor(Upstream* upstream)
-    : upstream_{[upstream]() {
-        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
-        return device_async_resource_ref{*upstream};
-      }()}
+    : upstream_{to_device_async_resource_ref_checked(upstream)}
   {
   }
 
@@ -227,7 +230,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* ptr = upstream_.allocate_async(bytes, stream);
+    void* ptr = get_upstream_resource().allocate_async(bytes, stream);
 
     // increment the stats
     {
@@ -250,7 +253,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_.deallocate_async(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
 
     {
       write_lock_t lock(mtx_);
@@ -281,7 +284,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
   std::stack<std::pair<counter, counter>> counter_stack_{{std::make_pair(counter{}, counter{})}};
   std::shared_mutex mutable mtx_;  // mutex for thread safe access to allocations_
   // the upstream resource used for satisfying allocation requests
-  device_async_resource_ref upstream_{rmm::mr::get_current_device_resource_ref()};
+  device_async_resource_ref upstream_;
 };
 
 /**
diff --git a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
index 9979d1e08..6881aa19e 100644
--- a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
+++ b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
@@ -45,6 +45,16 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
  public:
   using lock_t = std::lock_guard<std::mutex>;  ///< Type of lock used to synchronize access
 
+  /**
+   * @brief Construct a new thread safe resource adaptor using `upstream` to satisfy
+   * allocation requests.
+   *
+   * All allocations and frees are protected by a mutex lock
+   *
+   * @param upstream The resource used for allocating/deallocating device memory.
+   */
+  thread_safe_resource_adaptor(device_async_resource_ref upstream) : upstream_{upstream} {}
+
   /**
    * @brief Construct a new thread safe resource adaptor using `upstream` to satisfy
    * allocation requests.
@@ -55,9 +65,9 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
    *
    * @param upstream The resource used for allocating/deallocating device memory.
    */
-  thread_safe_resource_adaptor(Upstream* upstream) : upstream_{upstream}
+  thread_safe_resource_adaptor(Upstream* upstream)
+    : upstream_{to_device_async_resource_ref_checked(upstream)}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
   }
 
   thread_safe_resource_adaptor()                                               = delete;
@@ -75,11 +85,6 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
  private:
   /**
    * @brief Allocates memory of size at least `bytes` using the upstream
@@ -95,7 +100,7 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
     lock_t lock(mtx);
-    return upstream_->allocate(bytes, stream);
+    return get_upstream_resource().allocate_async(bytes, stream);
   }
 
   /**
@@ -108,7 +113,7 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
     lock_t lock(mtx);
-    upstream_->deallocate(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
   }
 
   /**
@@ -122,12 +127,13 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto cast = dynamic_cast<thread_safe_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
   std::mutex mutable mtx;  // mutex for thread safe access to upstream
-  Upstream* upstream_;     ///< The upstream resource used for satisfying allocation requests
+  device_async_resource_ref
+    upstream_;  ///< The upstream resource used for satisfying allocation requests
 };
 
 /** @} */  // end of group
diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp
index 461b1832d..6a5916e5c 100644
--- a/include/rmm/mr/device/tracking_resource_adaptor.hpp
+++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp
@@ -88,8 +88,6 @@ class tracking_resource_adaptor final : public device_memory_resource {
    * @brief Construct a new tracking resource adaptor using `upstream` to satisfy
    * allocation requests.
    *
-   * @throws rmm::logic_error if `upstream == nullptr`
-   *
    * @param upstream The resource used for allocating/deallocating device memory
    * @param capture_stacks If true, capture stacks for allocation calls
    */
@@ -108,10 +106,9 @@ class tracking_resource_adaptor final : public device_memory_resource {
    * @param capture_stacks If true, capture stacks for allocation calls
    */
   tracking_resource_adaptor(Upstream* upstream, bool capture_stacks = false)
-    : capture_stacks_{capture_stacks}, allocated_bytes_{0}, upstream_{[upstream]() {
-        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
-        return device_async_resource_ref{*upstream};
-      }()}
+    : capture_stacks_{capture_stacks},
+      allocated_bytes_{0},
+      upstream_{to_device_async_resource_ref_checked(upstream)}
   {
   }
 
@@ -211,7 +208,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* ptr = upstream_.allocate_async(bytes, stream);
+    void* ptr = get_upstream_resource().allocate_async(bytes, stream);
     // track it.
     {
       write_lock_t lock(mtx_);
@@ -231,7 +228,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_.deallocate_async(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
     {
       write_lock_t lock(mtx_);
 
@@ -284,8 +281,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
   std::map<void*, allocation_info> allocations_;  // map of active allocations
   std::atomic<std::size_t> allocated_bytes_;      // number of bytes currently allocated
   std::shared_mutex mutable mtx_;                 // mutex for thread safe access to allocations_
-  device_async_resource_ref upstream_{
-    rmm::mr::get_current_device_resource_ref()};  // the upstream resource used for satisfying
+  device_async_resource_ref upstream_;            // the upstream resource used for satisfying
                                                   // allocation requests
 };
 
diff --git a/tests/device_check_resource_adaptor.hpp b/tests/device_check_resource_adaptor.hpp
index 23b0c4bb1..6780f56d7 100644
--- a/tests/device_check_resource_adaptor.hpp
+++ b/tests/device_check_resource_adaptor.hpp
@@ -65,5 +65,5 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
   }
 
   rmm::cuda_device_id device_id;
-  rmm::device_async_resource_ref upstream_{rmm::mr::get_current_device_resource_ref()};
+  rmm::device_async_resource_ref upstream_;
 };

From 56e5eaa6de50bfebfef2a36b2acdc51f551564ff Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 29 Aug 2024 01:24:36 +0000
Subject: [PATCH 18/24] Make test debug builds use -O0

---
 tests/CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 75b15a90b..97f13b01b 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -40,6 +40,7 @@ function(ConfigureTestInternal TEST_NAME)
                              PUBLIC "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}")
   target_compile_options(${TEST_NAME} PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wall -Werror
                                              -Wno-error=deprecated-declarations>)
+  target_compile_options(${TEST_NAME} PUBLIC "$<$<CONFIG:Debug>:-O0>")
 
   if(DISABLE_DEPRECATION_WARNING)
     target_compile_options(
@@ -48,6 +49,8 @@ function(ConfigureTestInternal TEST_NAME)
                            PUBLIC $<$<COMPILE_LANGUAGE:CXX>:-Wno-deprecated-declarations>)
   endif()
 
+  
+
   if(CODE_COVERAGE)
     if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
 

From 545323ba325a619b0d4d23fb15195a7cbbe51333 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 29 Aug 2024 01:26:03 +0000
Subject: [PATCH 19/24] Fix quoted include.

---
 tests/mr/device/arena_mr_tests.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp
index 9db623394..bdc0f2438 100644
--- a/tests/mr/device/arena_mr_tests.cpp
+++ b/tests/mr/device/arena_mr_tests.cpp
@@ -15,7 +15,6 @@
  */
 
 #include "../../byte_literals.hpp"
-#include "cuda/stream_ref"
 
 #include <rmm/aligned.hpp>
 #include <rmm/cuda_device.hpp>
@@ -26,6 +25,8 @@
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
+#include <cuda/stream_ref>
+
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 #include <sys/stat.h>

From 5598840307c36ed410c450529a1b52c67c9f5b1b Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 29 Aug 2024 02:26:23 +0000
Subject: [PATCH 20/24] cmake style

---
 tests/CMakeLists.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 97f13b01b..28093360a 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -49,8 +49,6 @@ function(ConfigureTestInternal TEST_NAME)
                            PUBLIC $<$<COMPILE_LANGUAGE:CXX>:-Wno-deprecated-declarations>)
   endif()
 
-  
-
   if(CODE_COVERAGE)
     if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
 

From c1d7b0d0e4d81acf9dc00a7907004cd548d28d8c Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Wed, 4 Sep 2024 07:43:37 +0000
Subject: [PATCH 21/24] Don't take the address when passing MR to resource_ref.

---
 benchmarks/device_uvector/device_uvector_bench.cu           | 6 +++---
 include/rmm/mr/device/failure_callback_resource_adaptor.hpp | 2 +-
 tests/container_multidevice_tests.cu                        | 6 +++---
 tests/mr/device/statistics_mr_tests.cpp                     | 4 ++--
 tests/mr/device/tracking_mr_tests.cpp                       | 4 ++--
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu
index e56926827..0eddb1d92 100644
--- a/benchmarks/device_uvector/device_uvector_bench.cu
+++ b/benchmarks/device_uvector/device_uvector_bench.cu
@@ -40,7 +40,7 @@ void BM_UvectorSizeConstruction(benchmark::State& state)
   rmm::mr::cuda_memory_resource cuda_mr{};
   rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{
     &cuda_mr, rmm::percent_of_free_device_memory(50)};
-  rmm::mr::set_current_device_resource_ref(&mr);
+  rmm::mr::set_current_device_resource_ref(mr);
 
   for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
     rmm::device_uvector<std::int32_t> vec(state.range(0), rmm::cuda_stream_view{});
@@ -62,7 +62,7 @@ void BM_ThrustVectorSizeConstruction(benchmark::State& state)
   rmm::mr::cuda_memory_resource cuda_mr{};
   rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{
     &cuda_mr, rmm::percent_of_free_device_memory(50)};
-  rmm::mr::set_current_device_resource_ref(&mr);
+  rmm::mr::set_current_device_resource_ref(mr);
 
   for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
     rmm::device_vector<std::int32_t> vec(state.range(0));
@@ -140,7 +140,7 @@ template <typename Vector>
 void BM_VectorWorkflow(benchmark::State& state)
 {
   rmm::mr::cuda_async_memory_resource cuda_async_mr{};
-  rmm::mr::set_current_device_resource_ref(&cuda_async_mr);
+  rmm::mr::set_current_device_resource_ref(cuda_async_mr);
 
   rmm::cuda_stream input_stream;
   std::vector<rmm::cuda_stream> streams(4);
diff --git a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
index 264a5a532..a08352d18 100644
--- a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
+++ b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
@@ -84,7 +84,7 @@ using failure_callback_t = std::function<bool(std::size_t, void*)>;
  *   failure_callback_adaptor mr{
  *     rmm::mr::get_current_device_resource_ref(), failure_handler, &retried
  *   };
- *   rmm::mr::set_current_device_resource_ref(&mr);
+ *   rmm::mr::set_current_device_resource_ref(mr);
  * }
  * @endcode
  *
diff --git a/tests/container_multidevice_tests.cu b/tests/container_multidevice_tests.cu
index 4d00173ac..55432feb0 100644
--- a/tests/container_multidevice_tests.cu
+++ b/tests/container_multidevice_tests.cu
@@ -71,7 +71,7 @@ TYPED_TEST(ContainerMultiDeviceTest, CreateMoveDestroyDifferentActiveDevice)
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
     auto orig_mr  = rmm::mr::get_current_device_resource_ref();
     auto check_mr = device_check_resource_adaptor{orig_mr};
-    rmm::mr::set_current_device_resource_ref(&check_mr);
+    rmm::mr::set_current_device_resource_ref(check_mr);
 
     {
       auto buf_1 = []() {
@@ -111,7 +111,7 @@ TYPED_TEST(ContainerMultiDeviceTest, ResizeDifferentActiveDevice)
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
     auto orig_mr  = rmm::mr::get_current_device_resource_ref();
     auto check_mr = device_check_resource_adaptor{orig_mr};
-    rmm::mr::set_current_device_resource_ref(&check_mr);
+    rmm::mr::set_current_device_resource_ref(check_mr);
 
     if constexpr (not std::is_same_v<TypeParam, rmm::device_scalar<int>>) {
       auto buf = TypeParam(128, rmm::cuda_stream_view{});
@@ -134,7 +134,7 @@ TYPED_TEST(ContainerMultiDeviceTest, ShrinkDifferentActiveDevice)
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
     auto orig_mr  = rmm::mr::get_current_device_resource_ref();
     auto check_mr = device_check_resource_adaptor{orig_mr};
-    rmm::mr::set_current_device_resource_ref(&check_mr);
+    rmm::mr::set_current_device_resource_ref(check_mr);
 
     if constexpr (not std::is_same_v<TypeParam, rmm::device_scalar<int>>) {
       auto buf = TypeParam(128, rmm::cuda_stream_view{});
diff --git a/tests/mr/device/statistics_mr_tests.cpp b/tests/mr/device/statistics_mr_tests.cpp
index 1fdf01aa5..f796a4c00 100644
--- a/tests/mr/device/statistics_mr_tests.cpp
+++ b/tests/mr/device/statistics_mr_tests.cpp
@@ -129,7 +129,7 @@ TEST(StatisticsTest, MultiTracking)
 {
   auto orig_device_resource = rmm::mr::get_current_device_resource_ref();
   statistics_adaptor mr{orig_device_resource};
-  rmm::mr::set_current_device_resource_ref(&mr);
+  rmm::mr::set_current_device_resource_ref(mr);
 
   std::vector<std::shared_ptr<rmm::device_buffer>> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
@@ -140,7 +140,7 @@ TEST(StatisticsTest, MultiTracking)
   EXPECT_EQ(mr.get_allocations_counter().value, 10);
 
   statistics_adaptor inner_mr{rmm::mr::get_current_device_resource_ref()};
-  rmm::mr::set_current_device_resource_ref(&inner_mr);
+  rmm::mr::set_current_device_resource_ref(inner_mr);
 
   for (std::size_t i = 0; i < num_more_allocations; ++i) {
     allocations.emplace_back(
diff --git a/tests/mr/device/tracking_mr_tests.cpp b/tests/mr/device/tracking_mr_tests.cpp
index 6eeb6ccc3..acd540ae6 100644
--- a/tests/mr/device/tracking_mr_tests.cpp
+++ b/tests/mr/device/tracking_mr_tests.cpp
@@ -103,7 +103,7 @@ TEST(TrackingTest, MultiTracking)
 {
   auto orig_device_resource = rmm::mr::get_current_device_resource_ref();
   tracking_adaptor mr{orig_device_resource, true};
-  rmm::mr::set_current_device_resource_ref(&mr);
+  rmm::mr::set_current_device_resource_ref(mr);
 
   std::vector<std::shared_ptr<rmm::device_buffer>> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
@@ -114,7 +114,7 @@ TEST(TrackingTest, MultiTracking)
   EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations);
 
   tracking_adaptor inner_mr{rmm::mr::get_current_device_resource_ref()};
-  rmm::mr::set_current_device_resource_ref(&inner_mr);
+  rmm::mr::set_current_device_resource_ref(inner_mr);
 
   for (std::size_t i = 0; i < num_more_allocations; ++i) {
     allocations.emplace_back(

From 4c113827371c4f1b79f7021609d5fcbaac5097dd Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 5 Sep 2024 02:36:24 +0000
Subject: [PATCH 22/24] Fix yoda code.

---
 include/rmm/resource_ref.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/rmm/resource_ref.hpp b/include/rmm/resource_ref.hpp
index 6a34a6a47..56049522f 100644
--- a/include/rmm/resource_ref.hpp
+++ b/include/rmm/resource_ref.hpp
@@ -77,7 +77,7 @@ using host_device_async_resource_ref =
 template <class Resource>
 device_async_resource_ref to_device_async_resource_ref_checked(Resource* res)
 {
-  RMM_EXPECTS(nullptr != res, "Unexpected null resource pointer.");
+  RMM_EXPECTS(res, "Unexpected null resource pointer.");
   return device_async_resource_ref{*res};
 }
 

From 6d2f09d862254840d726c1e932c25e28797077d2 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 5 Sep 2024 02:36:46 +0000
Subject: [PATCH 23/24] Use resource_ref upstreams in binning_mr

---
 .../rmm/mr/device/binning_memory_resource.hpp | 56 +++++++++++++++----
 1 file changed, 46 insertions(+), 10 deletions(-)

diff --git a/include/rmm/mr/device/binning_memory_resource.hpp b/include/rmm/mr/device/binning_memory_resource.hpp
index 7daa77328..a5ef64665 100644
--- a/include/rmm/mr/device/binning_memory_resource.hpp
+++ b/include/rmm/mr/device/binning_memory_resource.hpp
@@ -27,6 +27,7 @@
 #include <cassert>
 #include <map>
 #include <memory>
+#include <optional>
 #include <vector>
 
 namespace RMM_NAMESPACE {
@@ -52,7 +53,20 @@ class binning_memory_resource final : public device_memory_resource {
    * Initially has no bins, so simply uses the upstream_resource until bin resources are added
    * with `add_bin`.
    *
-   * @throws rmm::logic_error if size_base is not a power of two.
+   * @param upstream_resource The upstream memory resource used to allocate bin pools.
+   */
+  explicit binning_memory_resource(device_async_resource_ref upstream_resource)
+    : upstream_mr_{upstream_resource}
+  {
+  }
+
+  /**
+   * @brief Construct a new binning memory resource object.
+   *
+   * Initially has no bins, so simply uses the upstream_resource until bin resources are added
+   * with `add_bin`.
+   *
+   * @throws rmm::logic_error if upstream_resource is nullptr
    *
    * @param upstream_resource The upstream memory resource used to allocate bin pools.
    */
@@ -73,6 +87,30 @@ class binning_memory_resource final : public device_memory_resource {
    * @param min_size_exponent The minimum base-2 exponent bin size.
    * @param max_size_exponent The maximum base-2 exponent bin size.
    */
+  binning_memory_resource(device_async_resource_ref upstream_resource,
+                          int8_t min_size_exponent,  // NOLINT(bugprone-easily-swappable-parameters)
+                          int8_t max_size_exponent)
+    : upstream_mr_{upstream_resource}
+  {
+    for (auto i = min_size_exponent; i <= max_size_exponent; i++) {
+      add_bin(1 << i);
+    }
+  }
+
+  /**
+   * @brief Construct a new binning memory resource object with a range of initial bins.
+   *
+   * Constructs a new binning memory resource and adds bins backed by `fixed_size_memory_resource`
+   * in the range [2^min_size_exponent, 2^max_size_exponent]. For example if `min_size_exponent==18`
+   * and `max_size_exponent==22`, creates bins of sizes 256KiB, 512KiB, 1024KiB, 2048KiB and
+   * 4096KiB.
+   *
+   * @throws rmm::logic_error if upstream_resource is nullptr
+   *
+   * @param upstream_resource The upstream memory resource used to allocate bin pools.
+   * @param min_size_exponent The minimum base-2 exponent bin size.
+   * @param max_size_exponent The maximum base-2 exponent bin size.
+   */
   binning_memory_resource(Upstream* upstream_resource,
                           int8_t min_size_exponent,  // NOLINT(bugprone-easily-swappable-parameters)
                           int8_t max_size_exponent)
@@ -106,8 +144,7 @@ class binning_memory_resource final : public device_memory_resource {
   /**
    * @brief Add a bin allocator to this resource
    *
-   * Adds `bin_resource` if it is not null; otherwise constructs and adds a
-   * fixed_size_memory_resource.
+   * Adds `bin_resource` if provided; otherwise constructs and adds a fixed_size_memory_resource.
    *
    * This bin will be used for any allocation smaller than `allocation_size` that is larger than
    * the next smaller bin's allocation size.
@@ -119,14 +156,14 @@ class binning_memory_resource final : public device_memory_resource {
    * @param allocation_size The maximum size that this bin allocates
    * @param bin_resource The memory resource for the bin
    */
-  void add_bin(std::size_t allocation_size, device_memory_resource* bin_resource = nullptr)
+  void add_bin(std::size_t allocation_size,
+               std::optional<device_async_resource_ref> bin_resource = std::nullopt)
   {
     allocation_size = align_up(allocation_size, CUDA_ALLOCATION_ALIGNMENT);
 
-    if (nullptr != bin_resource) {
-      resource_bins_.insert({allocation_size, bin_resource});
+    if (bin_resource.has_value()) {
+      resource_bins_.insert({allocation_size, bin_resource.value()});
     } else if (resource_bins_.count(allocation_size) == 0) {  // do nothing if bin already exists
-
       owned_bin_resources_.push_back(
         std::make_unique<fixed_size_memory_resource<Upstream>>(upstream_mr_, allocation_size));
       resource_bins_.insert({allocation_size, owned_bin_resources_.back().get()});
@@ -145,8 +182,7 @@ class binning_memory_resource final : public device_memory_resource {
   device_async_resource_ref get_resource_ref(std::size_t bytes)
   {
     auto iter = resource_bins_.lower_bound(bytes);
-    return (iter != resource_bins_.cend()) ? device_async_resource_ref{iter->second}
-                                           : get_upstream_resource();
+    return (iter != resource_bins_.cend()) ? iter->second : get_upstream_resource();
   }
 
   /**
@@ -182,7 +218,7 @@ class binning_memory_resource final : public device_memory_resource {
 
   std::vector<std::unique_ptr<fixed_size_memory_resource<Upstream>>> owned_bin_resources_;
 
-  std::map<std::size_t, device_memory_resource*> resource_bins_;
+  std::map<std::size_t, device_async_resource_ref> resource_bins_;
 };
 
 /** @} */  // end of group

From a367af6954cbd7d025b8ab4ba4cef1abfa55a62e Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 5 Sep 2024 02:37:07 +0000
Subject: [PATCH 24/24] Remove incorrect @throws documentation

---
 include/rmm/mr/device/aligned_resource_adaptor.hpp          | 3 +--
 include/rmm/mr/device/detail/arena.hpp                      | 2 --
 include/rmm/mr/device/failure_callback_resource_adaptor.hpp | 2 --
 include/rmm/mr/device/limiting_resource_adaptor.hpp         | 2 --
 include/rmm/mr/device/pool_memory_resource.hpp              | 1 -
 include/rmm/mr/device/prefetch_resource_adaptor.hpp         | 2 --
 6 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp
index f99348906..4df2c4d2d 100644
--- a/include/rmm/mr/device/aligned_resource_adaptor.hpp
+++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp
@@ -60,7 +60,6 @@ class aligned_resource_adaptor final : public device_memory_resource {
   /**
    * @brief Construct an aligned resource adaptor using `upstream` to satisfy allocation requests.
    *
-   * @throws rmm::logic_error if `upstream == nullptr`
    * @throws rmm::logic_error if `allocation_alignment` is not a power of 2
    *
    * @param upstream The resource used for allocating/deallocating device memory.
@@ -81,7 +80,7 @@ class aligned_resource_adaptor final : public device_memory_resource {
    * @brief Construct an aligned resource adaptor using `upstream` to satisfy allocation requests.
    *
    * @throws rmm::logic_error if `upstream == nullptr`
-   * @throws rmm::logic_error if `allocation_alignment` is not a power of 2
+   * @throws rmm::logic_error if `alignment` is not a power of 2
    *
    * @param upstream The resource used for allocating/deallocating device memory.
    * @param alignment The size used for allocation alignment.
diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp
index f6049f441..6f8303c83 100644
--- a/include/rmm/mr/device/detail/arena.hpp
+++ b/include/rmm/mr/device/detail/arena.hpp
@@ -500,8 +500,6 @@ class global_arena final {
   /**
    * @brief Construct a global arena.
    *
-   * @throws rmm::logic_error if `upstream_mr == nullptr`.
-   *
    * @param upstream_mr The memory resource from which to allocate blocks for the pool
    * @param arena_size Size in bytes of the global arena. Defaults to half of the available memory
    * on the current device.
diff --git a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
index a08352d18..fdb385748 100644
--- a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
+++ b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
@@ -100,8 +100,6 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
    * @brief Construct a new `failure_callback_resource_adaptor` using `upstream` to satisfy
    * allocation requests.
    *
-   * @throws rmm::logic_error if `upstream == nullptr`
-   *
    * @param upstream The resource used for allocating/deallocating device memory
    * @param callback Callback function @see failure_callback_t
    * @param callback_arg Extra argument passed to `callback`
diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp
index eba352e78..d19fa3a0a 100644
--- a/include/rmm/mr/device/limiting_resource_adaptor.hpp
+++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp
@@ -50,8 +50,6 @@ class limiting_resource_adaptor final : public device_memory_resource {
    * @brief Construct a new limiting resource adaptor using `upstream` to satisfy
    * allocation requests and limiting the total allocation amount possible.
    *
-   * @throws rmm::logic_error if `upstream == nullptr`
-   *
    * @param upstream The resource used for allocating/deallocating device memory
    * @param allocation_limit Maximum memory allowed for this allocator
    * @param alignment Alignment in bytes for the start of each allocated buffer
diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp
index 59a641f05..f63de21ff 100644
--- a/include/rmm/mr/device/pool_memory_resource.hpp
+++ b/include/rmm/mr/device/pool_memory_resource.hpp
@@ -119,7 +119,6 @@ class pool_memory_resource final
    * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using
    * `upstream_mr`.
    *
-   * @throws rmm::logic_error if `upstream_mr == nullptr`
    * @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of
    * pool_memory_resource::allocation_alignment bytes.
    * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a
diff --git a/include/rmm/mr/device/prefetch_resource_adaptor.hpp b/include/rmm/mr/device/prefetch_resource_adaptor.hpp
index b38bd0316..d3a4c676a 100644
--- a/include/rmm/mr/device/prefetch_resource_adaptor.hpp
+++ b/include/rmm/mr/device/prefetch_resource_adaptor.hpp
@@ -45,8 +45,6 @@ class prefetch_resource_adaptor final : public device_memory_resource {
    * @brief Construct a new prefetch resource adaptor using `upstream` to satisfy
    * allocation requests.
    *
-   * @throws rmm::logic_error if `upstream == nullptr`
-   *
    * @param upstream The resource_ref used for allocating/deallocating device memory
    */
   prefetch_resource_adaptor(device_async_resource_ref upstream) : upstream_{upstream} {}