diff --git a/benchmarks/profiler_sample_loop_v2.rb b/benchmarks/profiler_sample_loop_v2.rb
index 0afc5d6cc70..4722a6c050f 100644
--- a/benchmarks/profiler_sample_loop_v2.rb
+++ b/benchmarks/profiler_sample_loop_v2.rb
@@ -16,7 +16,11 @@ class ProfilerSampleLoopBenchmark
   PROFILER_OVERHEAD_STACK_THREAD = Thread.new { sleep }
 
   def create_profiler
-    @recorder = Datadog::Profiling::StackRecorder.new(cpu_time_enabled: true, alloc_samples_enabled: true)
+    @recorder = Datadog::Profiling::StackRecorder.new(
+      cpu_time_enabled: true,
+      alloc_samples_enabled: false,
+      heap_samples_enabled: false
+    )
     @collector = Datadog::Profiling::Collectors::ThreadContext.new(
       recorder: @recorder, max_frames: 400, tracer: nil, endpoint_collection_enabled: false, timeline_enabled: false
     )
diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c
index ea56ba2a593..af9b0127e18 100644
--- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c
+++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c
@@ -80,10 +80,10 @@ struct cpu_and_wall_time_worker_state {
   // These are immutable after initialization
 
   bool gc_profiling_enabled;
-  bool allocation_counting_enabled;
   bool no_signals_workaround_enabled;
   bool dynamic_sampling_rate_enabled;
-  int allocation_sample_every; // Temporarily used for development/testing of allocation profiling
+  int allocation_sample_every;
+  bool allocation_profiling_enabled;
   VALUE self_instance;
   VALUE thread_context_collector_instance;
   VALUE idle_sampling_helper_instance;
@@ -149,11 +149,11 @@ static VALUE _native_initialize(
   VALUE thread_context_collector_instance,
   VALUE gc_profiling_enabled,
   VALUE idle_sampling_helper_instance,
-  VALUE allocation_counting_enabled,
   VALUE no_signals_workaround_enabled,
   VALUE dynamic_sampling_rate_enabled,
   VALUE dynamic_sampling_rate_overhead_target_percentage,
-  VALUE allocation_sample_every
+  VALUE allocation_sample_every,
+  VALUE allocation_profiling_enabled
 );
 static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
 static VALUE _native_sampling_loop(VALUE self, VALUE instance);
@@ -265,10 +265,10 @@ static VALUE _native_new(VALUE klass) {
   // being leaked.
 
   state->gc_profiling_enabled = false;
-  state->allocation_counting_enabled = false;
   state->no_signals_workaround_enabled = false;
   state->dynamic_sampling_rate_enabled = true;
   state->allocation_sample_every = 0;
+  state->allocation_profiling_enabled = false;
   state->thread_context_collector_instance = Qnil;
   state->idle_sampling_helper_instance = Qnil;
   state->owner_thread = Qnil;
@@ -293,31 +293,31 @@ static VALUE _native_initialize(
   VALUE thread_context_collector_instance,
   VALUE gc_profiling_enabled,
   VALUE idle_sampling_helper_instance,
-  VALUE allocation_counting_enabled,
   VALUE no_signals_workaround_enabled,
   VALUE dynamic_sampling_rate_enabled,
   VALUE dynamic_sampling_rate_overhead_target_percentage,
-  VALUE allocation_sample_every
+  VALUE allocation_sample_every,
+  VALUE allocation_profiling_enabled
 ) {
   ENFORCE_BOOLEAN(gc_profiling_enabled);
-  ENFORCE_BOOLEAN(allocation_counting_enabled);
   ENFORCE_BOOLEAN(no_signals_workaround_enabled);
   ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
   ENFORCE_TYPE(allocation_sample_every, T_FIXNUM);
   ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
+  ENFORCE_BOOLEAN(allocation_profiling_enabled);
 
   struct cpu_and_wall_time_worker_state *state;
   TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
 
   state->gc_profiling_enabled = (gc_profiling_enabled == Qtrue);
-  state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
   state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
   state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
   dynamic_sampling_rate_set_overhead_target_percentage(&state->dynamic_sampling_rate, NUM2DBL(dynamic_sampling_rate_overhead_target_percentage));
   state->allocation_sample_every = NUM2INT(allocation_sample_every);
+  state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
 
-  if (state->allocation_sample_every < 0) {
-    rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be >= 0.", state->allocation_sample_every);
+  if (state->allocation_sample_every <= 0) {
+    rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be > 0.", state->allocation_sample_every);
   }
 
   state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
@@ -636,7 +636,7 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
   // because they may raise exceptions.
   install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal");
   if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
-  if (state->allocation_counting_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
+  if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
 
   rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state);
 
@@ -892,9 +892,9 @@ static void sleep_for(uint64_t time_ns) {
 }
 
 static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
-  bool is_profiler_running = active_sampler_instance_state != NULL;
+  bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled;
 
-  return is_profiler_running ? ULL2NUM(allocation_count) : Qnil;
+  return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil;
 }
 
 // Implements memory-related profiling events. This function is called by Ruby via the `object_allocation_tracepoint`
@@ -928,7 +928,7 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
 
   // TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before
   // beta), and having something here allows us to test the rest of feature, sampling decision aside.
-  if (state->allocation_sample_every > 0 && ((allocation_count % state->allocation_sample_every) == 0)) {
+  if (allocation_count % state->allocation_sample_every == 0) {
     // Rescue against any exceptions that happen during sampling
     safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance);
   }
diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c
index 3b1c798260c..618cdb23f5d 100644
--- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c
+++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c
@@ -1210,6 +1210,8 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
     }
   }
 
+  track_object(state->recorder_instance, new_object, sample_weight);
+
   trigger_sample_for_thread(
     state,
     /* thread: */  current_thread,
diff --git a/ext/ddtrace_profiling_native_extension/extconf.rb b/ext/ddtrace_profiling_native_extension/extconf.rb
index 2151f6ddb50..10ceafdb4f0 100644
--- a/ext/ddtrace_profiling_native_extension/extconf.rb
+++ b/ext/ddtrace_profiling_native_extension/extconf.rb
@@ -114,6 +114,11 @@ def add_compiler_flag(flag)
 add_compiler_flag '-Wall'
 add_compiler_flag '-Wextra'
 
+if ENV['DDTRACE_DEBUG']
+  CONFIG['optflags'] = '-O0'
+  CONFIG['debugflags'] = '-ggdb3'
+end
+
 if RUBY_PLATFORM.include?('linux')
   # Supposedly, the correct way to do this is
   # ```
diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.c b/ext/ddtrace_profiling_native_extension/heap_recorder.c
new file mode 100644
index 00000000000..627567c3880
--- /dev/null
+++ b/ext/ddtrace_profiling_native_extension/heap_recorder.c
@@ -0,0 +1,117 @@
+#include "heap_recorder.h"
+#include <pthread.h>
+#include "ruby/st.h"
+#include "ruby/util.h"
+#include "ruby_helpers.h"
+#include <errno.h>
+
+// Allows storing data passed to ::start_heap_allocation_recording to make it accessible to
+// ::end_heap_allocation_recording.
+//
+// obj != Qnil flags this struct as holding a valid partial heap recording.
+typedef struct {
+  VALUE obj;
+  live_object_data object_data;
+} partial_heap_recording;
+
+struct heap_recorder {
+  // Data for a heap recording that was started but not yet ended
+  partial_heap_recording active_recording;
+};
+
+// ==========================
+// Heap Recorder External API
+//
+// WARN: All these APIs should support receiving a NULL heap_recorder, resulting in a noop.
+//
+// WARN: Except for ::heap_recorder_for_each_live_object, we always assume interaction with these APIs
+// happens under the GVL.
+//
+// ==========================
+heap_recorder* heap_recorder_new(void) {
+  heap_recorder* recorder = ruby_xmalloc(sizeof(heap_recorder));
+
+  recorder->active_recording = (partial_heap_recording) {
+    .obj = Qnil,
+    .object_data = {0},
+  };
+
+  return recorder;
+}
+
+void heap_recorder_free(struct heap_recorder* recorder) {
+  if (recorder == NULL) {
+    return;
+  }
+
+  ruby_xfree(recorder);
+}
+
+// TODO: Remove when things get implemented
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+
+void heap_recorder_after_fork(heap_recorder *heap_recorder) {
+  if (heap_recorder == NULL) {
+    return;
+  }
+
+  // TODO: Implement
+}
+
+void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight) {
+  if (heap_recorder == NULL) {
+    return;
+  }
+
+  heap_recorder->active_recording = (partial_heap_recording) {
+    .obj = new_obj,
+    .object_data = (live_object_data) {
+      .weight = weight,
+    },
+  };
+}
+
+void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
+  if (heap_recorder == NULL) {
+    return;
+  }
+
+  partial_heap_recording *active_recording = &heap_recorder->active_recording;
+
+  VALUE new_obj = active_recording->obj;
+  if (new_obj == Qnil) {
+    // Recording ended without having been started?
+    rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started");
+  }
+
+  // From now on, mark active recording as invalid so we can short-circuit at any point and
+  // not end up with a still active recording. new_obj still holds the object for this recording
+  active_recording->obj = Qnil;
+
+  // TODO: Implement
+}
+
+void heap_recorder_flush(heap_recorder *heap_recorder) {
+  if (heap_recorder == NULL) {
+    return;
+  }
+
+  // TODO: Implement
+}
+
+// WARN: If with_gvl = False, NO HEAP ALLOCATIONS, EXCEPTIONS or RUBY CALLS ARE ALLOWED.
+void heap_recorder_for_each_live_object(
+    heap_recorder *heap_recorder,
+    bool (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg),
+    void *for_each_callback_extra_arg,
+    bool with_gvl) {
+  if (heap_recorder == NULL) {
+    return;
+  }
+
+  // TODO: Implement
+}
+
+// TODO: Remove when things get implemented
+#pragma GCC diagnostic pop
diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.h b/ext/ddtrace_profiling_native_extension/heap_recorder.h
new file mode 100644
index 00000000000..add47d46734
--- /dev/null
+++ b/ext/ddtrace_profiling_native_extension/heap_recorder.h
@@ -0,0 +1,91 @@
+#pragma once
+
+#include <datadog/profiling.h>
+#include <ruby.h>
+
+// A heap recorder keeps track of a collection of live heap objects.
+//
+// All allocations observed by this recorder for which a corresponding free was
+// not yet observed are deemed as alive and can be iterated on to produce a
+// live heap profile.
+//
+// NOTE: All public APIs of heap_recorder support receiving a NULL heap_recorder
+//       in which case the behaviour will be a noop.
+//
+// WARN: Unless otherwise stated the heap recorder APIs assume calls are done
+// under the GVL.
+typedef struct heap_recorder heap_recorder;
+
+// Extra data associated with each live object being tracked.
+typedef struct live_object_data {
+  // The weight of this object from a sampling perspective.
+  //
+  // A notion of weight is preserved for each tracked object to allow for an approximate
+  // extrapolation to an unsampled view.
+  //
+  // Example: If we were sampling every 50 objects, then each sampled object
+  //          could be seen as being representative of 50 objects.
+  unsigned int weight;
+} live_object_data;
+
+// Data that is made available to iterators of heap recorder data for each live object
+// tracked therein.
+typedef struct {
+  ddog_prof_Slice_Location locations;
+  live_object_data object_data;
+} heap_recorder_iteration_data;
+
+// Initialize a new heap recorder.
+heap_recorder* heap_recorder_new(void);
+
+// Free a previously initialized heap recorder.
+void heap_recorder_free(heap_recorder *heap_recorder);
+
+// Do any cleanup needed after forking.
+void heap_recorder_after_fork(heap_recorder *heap_recorder);
+
+// Start a heap allocation recording on the heap recorder for a new object.
+//
+// This heap allocation recording needs to be ended via ::end_heap_allocation_recording
+// before it will become fully committed and able to be iterated on.
+//
+// @param new_obj
+//   The newly allocated Ruby object/value.
+// @param weight
+//   The sampling weight of this object.
+//
+// WARN: It needs to be paired with a ::end_heap_allocation_recording call.
+void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight);
+
+// End a previously started heap allocation recording on the heap recorder.
+//
+// It is at this point that an allocated object will become fully tracked and able to be iterated on.
+//
+// @param locations The stacktrace representing the location of the allocation.
+//
+// WARN: It is illegal to call this without previously having called ::start_heap_allocation_recording.
+void end_heap_allocation_recording(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations);
+
+// Flush any intermediate state that might be queued inside the heap recorder.
+//
+// NOTE: This should usually be called before iteration to ensure data is as little stale as possible.
+void heap_recorder_flush(heap_recorder *heap_recorder);
+
+// Iterate over each live object being tracked by the heap recorder.
+//
+// @param for_each_callback
+//   A callback function that shall be called for each live object being tracked
+//   by the heap recorder. Alongside the iteration_data for each live object,
+//   a second argument will be forwarded with the contents of the optional
+//   for_each_callback_extra_arg. Iteration will continue until the callback
+//   returns false or we run out of objects.
+// @param for_each_callback_extra_arg
+//   Optional (NULL if empty) extra data that should be passed to the
+//   callback function alongside the data for each live tracked object.
+// @param with_gvl
+//   True if we're calling this while holding the GVL, false otherwise.
+void heap_recorder_for_each_live_object(
+    heap_recorder *heap_recorder,
+    bool (*for_each_callback)(heap_recorder_iteration_data data, void* extra_arg),
+    void *for_each_callback_extra_arg,
+    bool with_gvl);
diff --git a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c
index 43bd02c4a07..f6f8b69b1b9 100644
--- a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c
+++ b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c
@@ -40,3 +40,23 @@ ddog_CharSlice ruby_value_type_to_char_slice(enum ruby_value_type type) {
                   default: return DDOG_CHARSLICE_C("BUG: Unknown value for ruby_value_type");
   }
 }
+
+size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capacity) {
+  if (capacity == 0 || string == NULL) {
+    // short-circuit, we can't write anything
+    ddog_Error_drop(error);
+    return 0;
+  }
+
+  ddog_CharSlice error_msg_slice = ddog_Error_message(error);
+  size_t error_msg_size = error_msg_slice.len;
+  // Account for extra null char for proper cstring
+  if (error_msg_size >= capacity) {
+    // Error message too big, lets truncate it to capacity - 1 to allow for extra null at end
+    error_msg_size = capacity - 1;
+  }
+  strncpy(string, error_msg_slice.ptr, error_msg_size);
+  string[error_msg_size] = '\0';
+  ddog_Error_drop(error);
+  return error_msg_size;
+}
diff --git a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h
index edd11e31954..7d4593fa550 100644
--- a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h
+++ b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h
@@ -24,6 +24,11 @@ inline static VALUE get_error_details_and_drop(ddog_Error *error) {
   return result;
 }
 
+// Utility function to be able to extract an error cstring from a ddog_Error.
+// Returns the amount of characters written to string (which are necessarily
+// bounded by capacity - 1 since the string will be null-terminated).
+size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capacity);
+
 // Used for pretty printing this Ruby enum. Returns "T_UNKNOWN_OR_MISSING_RUBY_VALUE_TYPE_ENTRY" for unknown elements.
 // In practice, there's a few types that the profiler will probably never encounter, but I've added all entries of
 // ruby_value_type that Ruby uses so that we can also use this for debugging.
diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c
index a192a8d61ca..4918f8caea8 100644
--- a/ext/ddtrace_profiling_native_extension/stack_recorder.c
+++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c
@@ -7,6 +7,7 @@
 #include "libdatadog_helpers.h"
 #include "ruby_helpers.h"
 #include "time_helpers.h"
+#include "heap_recorder.h"
 
 // Used to wrap a ddog_prof_Profile in a Ruby object and expose Ruby-level serialization APIs
 // This file implements the native bits of the Datadog::Profiling::StackRecorder class
@@ -150,18 +151,23 @@ static VALUE error_symbol = Qnil; // :error in Ruby
 #define WALL_TIME_VALUE_ID 2
 #define ALLOC_SAMPLES_VALUE     {.type_ = VALUE_STRING("alloc-samples"),     .unit = VALUE_STRING("count")}
 #define ALLOC_SAMPLES_VALUE_ID 3
+#define HEAP_SAMPLES_VALUE     {.type_ = VALUE_STRING("heap-live-samples"),     .unit = VALUE_STRING("count")}
+#define HEAP_SAMPLES_VALUE_ID 4
 
-static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE};
+static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE};
 
 // This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it
 // occupies on the all_value_types array.
 // E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types.
-static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID};
+static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID};
 
 #define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType))
 
 // Contains native state for each instance
 struct stack_recorder_state {
+  // Heap recorder instance
+  heap_recorder *heap_recorder;
+
   pthread_mutex_t slot_one_mutex;
   ddog_prof_Profile slot_one_profile;
 
@@ -197,11 +203,11 @@ static VALUE _native_new(VALUE klass);
 static void initialize_slot_concurrency_control(struct stack_recorder_state *state);
 static void initialize_profiles(struct stack_recorder_state *state, ddog_prof_Slice_ValueType sample_types);
 static void stack_recorder_typed_data_free(void *data);
-static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled);
+static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled, VALUE heap_samples_enabled);
 static VALUE _native_serialize(VALUE self, VALUE recorder_instance);
 static VALUE ruby_time_from(ddog_Timespec ddprof_time);
 static void *call_serialize_without_gvl(void *call_args);
-static struct active_slot_pair sampler_lock_active_profile();
+static struct active_slot_pair sampler_lock_active_profile(struct stack_recorder_state *state);
 static void sampler_unlock_active_profile(struct active_slot_pair active_slot);
 static ddog_prof_Profile *serializer_flip_active_and_inactive_slots(struct stack_recorder_state *state);
 static VALUE _native_active_slot(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
@@ -213,6 +219,8 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_
 static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time);
 static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint);
 static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */);
+static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight);
+
 
 void stack_recorder_init(VALUE profiling_module) {
   VALUE stack_recorder_class = rb_define_class_under(profiling_module, "StackRecorder", rb_cObject);
@@ -229,13 +237,14 @@ void stack_recorder_init(VALUE profiling_module) {
   // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
   rb_define_alloc_func(stack_recorder_class, _native_new);
 
-  rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 3);
+  rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 4);
   rb_define_singleton_method(stack_recorder_class, "_native_serialize",  _native_serialize, 1);
   rb_define_singleton_method(stack_recorder_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
   rb_define_singleton_method(testing_module, "_native_active_slot", _native_active_slot, 1);
   rb_define_singleton_method(testing_module, "_native_slot_one_mutex_locked?", _native_is_slot_one_mutex_locked, 1);
   rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1);
   rb_define_singleton_method(testing_module, "_native_record_endpoint", _native_record_endpoint, 3);
+  rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 3);
 
   ok_symbol = ID2SYM(rb_intern_const("ok"));
   error_symbol = ID2SYM(rb_intern_const("error"));
@@ -270,6 +279,12 @@ static VALUE _native_new(VALUE klass) {
 
   VALUE stack_recorder = TypedData_Wrap_Struct(klass, &stack_recorder_typed_data, state);
 
+  // NOTE: We initialize this because we want a new recorder to be operational even without initialization and our
+  //       default is everything enabled. However, if during recording initialization it turns out we don't want
+  //       heap samples, we will free and reset heap_recorder to NULL, effectively disabling all behaviour specific
+  //       to heap profiling (all calls to heap_recorder_* with a NULL heap recorder are noops).
+  state->heap_recorder = heap_recorder_new();
+
   // Note: Don't raise exceptions after this point, since it'll lead to libdatadog memory leaking!
 
   initialize_profiles(state, sample_types);
@@ -318,23 +333,29 @@ static void stack_recorder_typed_data_free(void *state_ptr) {
   pthread_mutex_destroy(&state->slot_two_mutex);
   ddog_prof_Profile_drop(&state->slot_two_profile);
 
+  heap_recorder_free(state->heap_recorder);
+
   ruby_xfree(state);
 }
 
-static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled) {
+static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled, VALUE heap_samples_enabled) {
   ENFORCE_BOOLEAN(cpu_time_enabled);
   ENFORCE_BOOLEAN(alloc_samples_enabled);
+  ENFORCE_BOOLEAN(heap_samples_enabled);
 
   struct stack_recorder_state *state;
   TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
 
-  if (cpu_time_enabled == Qtrue && alloc_samples_enabled == Qtrue) return Qtrue; // Nothing to do, this is the default
+  if (cpu_time_enabled == Qtrue && alloc_samples_enabled == Qtrue && heap_samples_enabled == Qtrue) return Qtrue; // Nothing to do, this is the default
 
   // When some sample types are disabled, we need to reconfigure libdatadog to record less types,
   // as well as reconfigure the position_for array to push the disabled types to the end so they don't get recorded.
   // See record_sample for details on the use of position_for.
 
-  state->enabled_values_count = ALL_VALUE_TYPES_COUNT - (cpu_time_enabled == Qtrue ? 0 : 1) - (alloc_samples_enabled == Qtrue? 0 : 1);
+  state->enabled_values_count = ALL_VALUE_TYPES_COUNT -
+    (cpu_time_enabled == Qtrue ? 0 : 1) -
+    (alloc_samples_enabled == Qtrue? 0 : 1) -
+    (heap_samples_enabled == Qtrue ? 0 : 1);
 
   ddog_prof_ValueType enabled_value_types[ALL_VALUE_TYPES_COUNT];
   uint8_t next_enabled_pos = 0;
@@ -362,6 +383,18 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_insta
     state->position_for[ALLOC_SAMPLES_VALUE_ID] = next_disabled_pos++;
   }
 
+  if (heap_samples_enabled == Qtrue) {
+    enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SAMPLES_VALUE;
+    state->position_for[HEAP_SAMPLES_VALUE_ID] = next_enabled_pos++;
+  } else {
+    state->position_for[HEAP_SAMPLES_VALUE_ID] = next_disabled_pos++;
+
+    // Turns out heap sampling is disabled but we initialized everything in _native_new
+    // assuming all samples were enabled. We need to deinitialize the heap recorder.
+    heap_recorder_free(state->heap_recorder);
+    state->heap_recorder = NULL;
+  }
+
   ddog_prof_Profile_drop(&state->slot_one_profile);
   ddog_prof_Profile_drop(&state->slot_two_profile);
 
@@ -379,6 +412,10 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
   // Need to do this while still holding on to the Global VM Lock; see comments on method for why
   serializer_set_start_timestamp_for_next_profile(state, finish_timestamp);
 
+  // Flush any pending data in the heap recorder prior to doing the iteration during serialization.
+  // This needs to happen while holding on to the GVL
+  heap_recorder_flush(state->heap_recorder);
+
   // We'll release the Global VM Lock while we're calling serialize, so that the Ruby VM can continue to work while this
   // is pending
   struct call_serialize_without_gvl_arguments args = {.state = state, .finish_timestamp = finish_timestamp, .serialize_ran = false};
@@ -440,6 +477,14 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
   metric_values[position_for[WALL_TIME_VALUE_ID]]     = values.wall_time_ns;
   metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples;
 
+  if (values.alloc_samples != 0) {
+    // If we got an allocation sample end the heap allocation recording to commit the heap sample.
+    // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening
+    //        very late in the allocation-sampling path (which is shared with the cpu sampling path). This can
+    //        be fixed with some refactoring but for now this leads to a less impactful change.
+    end_heap_allocation_recording(state->heap_recorder, locations);
+  }
+
   ddog_prof_Profile_Result result = ddog_prof_Profile_add(
     active_slot.profile,
     (ddog_prof_Sample) {
@@ -457,6 +502,15 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
   }
 }
 
+void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight) {
+  struct stack_recorder_state *state;
+  TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
+  // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening
+  //        very late in the allocation-sampling path (which is shared with the cpu sampling path). This can
+  //        be fixed with some refactoring but for now this leads to a less impactful change.
+  start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight);
+}
+
 void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint) {
   struct stack_recorder_state *state;
   TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
@@ -472,10 +526,73 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_
   }
 }
 
+#define MAX_LEN_HEAP_ITERATION_ERROR_MSG 256
+
+// Heap recorder iteration context allows us access to stack recorder state and profile being serialized
+// during iteration of heap recorder live objects.
+typedef struct heap_recorder_iteration_context {
+  struct stack_recorder_state *state;
+  ddog_prof_Profile *profile;
+  bool error;
+  char error_msg[MAX_LEN_HEAP_ITERATION_ERROR_MSG];
+} heap_recorder_iteration_context;
+
+static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) {
+  heap_recorder_iteration_context *context = (heap_recorder_iteration_context*) extra_arg;
+
+  int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0};
+  uint8_t *position_for = context->state->position_for;
+
+  metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = iteration_data.object_data.weight;
+
+  ddog_prof_Profile_Result result = ddog_prof_Profile_add(
+    context->profile,
+    (ddog_prof_Sample) {
+      .locations = iteration_data.locations,
+      .values = (ddog_Slice_I64) {.ptr = metric_values, .len = context->state->enabled_values_count},
+      .labels = {0},
+    },
+    0
+  );
+
+  if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) {
+    read_ddogerr_string_and_drop(&result.err, context->error_msg, MAX_LEN_HEAP_ITERATION_ERROR_MSG);
+    context->error = true;
+    // By returning false we cancel the iteration
+    return false;
+  }
+
+  // Keep on iterating to next item!
+  return true;
+}
+
+static void build_heap_profile_without_gvl(struct stack_recorder_state *state, ddog_prof_Profile *profile) {
+  heap_recorder_iteration_context iteration_context = {
+    .state = state,
+    .profile = profile,
+    .error = false,
+    .error_msg = {0},
+  };
+  heap_recorder_for_each_live_object(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context, false);
+  if (iteration_context.error) {
+    // We wait until we're out of the iteration to grab the gvl and raise. This is important because during
+    // iteration we first grab the records_mutex and raising requires grabbing the GVL. When sampling, we are
+    // in the opposite situation: we have the GVL and may need to grab the records_mutex for mutation. This
+    // different ordering can lead to deadlocks. By delaying the raise here until after we no longer hold
+    // records_mutex, we prevent this different-lock-acquisition-order situation.
+    grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: %s", iteration_context.error_msg);
+  }
+}
+
 static void *call_serialize_without_gvl(void *call_args) {
   struct call_serialize_without_gvl_arguments *args = (struct call_serialize_without_gvl_arguments *) call_args;
 
   args->profile = serializer_flip_active_and_inactive_slots(args->state);
+
+  // Now that we have the inactive profile with all but heap samples, lets fill it with heap data
+  // without needing to race with the active sampler
+  build_heap_profile_without_gvl(args->state, args->profile);
+
   // Note: The profile gets reset by the serialize call
   args->result = ddog_prof_Profile_serialize(args->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */);
   args->serialize_ran = true;
@@ -597,6 +714,8 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_
   reset_profile(&state->slot_one_profile, /* start_time: */ NULL);
   reset_profile(&state->slot_two_profile, /* start_time: */ NULL);
 
+  heap_recorder_after_fork(state->heap_recorder);
+
   return Qtrue;
 }
 
@@ -614,6 +733,12 @@ static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_
   return Qtrue;
 }
 
+static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight) {
+  ENFORCE_TYPE(weight, T_FIXNUM);
+  track_object(recorder_instance, new_obj, NUM2UINT(weight));
+  return Qtrue;
+}
+
 static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */) {
   ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(profile, start_time);
   if (reset_result.tag == DDOG_PROF_PROFILE_RESULT_ERR) {
diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.h b/ext/ddtrace_profiling_native_extension/stack_recorder.h
index 2e871069019..9190cc89a25 100644
--- a/ext/ddtrace_profiling_native_extension/stack_recorder.h
+++ b/ext/ddtrace_profiling_native_extension/stack_recorder.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <datadog/profiling.h>
+#include <ruby.h>
 
 typedef struct {
   int64_t cpu_time_ns;
@@ -21,4 +22,5 @@ typedef struct sample_labels {
 
 void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels);
 void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint);
+void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight);
 VALUE enforce_recorder_instance(VALUE object);
diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb
index 558e45cabef..43f6f259a69 100644
--- a/lib/datadog/core/configuration/settings.rb
+++ b/lib/datadog/core/configuration/settings.rb
@@ -314,32 +314,58 @@ def initialize(*_)
 
             # Can be used to enable/disable the Datadog::Profiling.allocation_count feature.
             #
-            # This feature is safe and enabled by default on Ruby 2.x, but has a few caveats on Ruby 3.x.
+            # This feature is now controlled via {:experimental_allocation_enabled}
+            option :allocation_counting_enabled do |o|
+              o.after_set do
+                Datadog.logger.warn(
+                  'The profiling.advanced.allocation_counting_enabled setting has been deprecated for removal and no ' \
+                  'longer does anything. Please remove it from your Datadog.configure block. ' \
+                  'Allocation counting is now controlled by the `experimental_allocation_enabled` setting instead.'
+                )
+              end
+            end
+
+            # Can be used to enable/disable collection of allocation profiles.
             #
-            # Caveat 1 (severe):
-            # On Ruby versions 3.0 (all), 3.1.0 to 3.1.3, and 3.2.0 to 3.2.2 this is disabled by default because it
-            # can trigger a VM bug that causes a segmentation fault during garbage collection of Ractors
-            # (https://bugs.ruby-lang.org/issues/18464). We don't recommend using this feature on such Rubies.
-            # This bug is fixed on Ruby versions 3.1.4, 3.2.3 and 3.3.0.
+            # This feature is alpha and disabled by default
             #
-            # Caveat 2 (annoyance):
-            # On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets
-            # garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on.
-            # Thus this feature is only usable if you're not using Ractors.
+            # @warn This feature is not supported/safe in all Rubies. Details in {Datadog::Profiling::Component} but
+            #       in summary, this should be supported on Ruby 2.x, 3.1.4+, 3.2.3+ and 3.3.0+. Enabling it on
+            #       unsupported Rubies may result in unexpected behaviour, including crashes.
             #
-            # Caveat 3 (severe):
-            # Ruby 3.2.0 to 3.2.2 have a bug in the newobj tracepoint (https://bugs.ruby-lang.org/issues/19482,
-            # https://github.com/ruby/ruby/pull/7464) so that's an extra reason why it's not safe on those Rubies.
-            # This bug is fixed on Ruby versions 3.2.3 and 3.3.0.
+            # @note Allocation profiles are not yet GA in the Datadog UI, get in touch if you want to help us test it.
             #
-            # @default `true` on Ruby 2.x and 3.1.4+, 3.2.3+ and 3.3.0+; `false` for Ruby 3.0 and unpatched Rubies.
-            option :allocation_counting_enabled do |o|
-              o.default do
-                RUBY_VERSION.start_with?('2.') ||
-                  (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION >= '3.1.4') ||
-                  (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION >= '3.2.3') ||
-                  RUBY_VERSION >= '3.3.'
-              end
+            # @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED` environment variable as a boolean, otherwise `false`
+            option :experimental_allocation_enabled do |o|
+              o.type :bool
+              o.env 'DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED'
+              o.default false
+            end
+
+            # Can be used to enable/disable the collection of heap profiles.
+            #
+            # This feature is alpha and disabled by default
+            #
+            # @warn To enable heap profiling you are required to also enable allocation profiling.
+            # @note Heap profiles are not yet GA in the Datadog UI, get in touch if you want to help us test it.
+            #
+            # @default `DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED` environment variable as a boolean, otherwise `false`
+            option :experimental_heap_enabled do |o|
+              o.type :bool
+              o.env 'DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED'
+              o.default false
+            end
+
+            # Can be used to configure the allocation sampling rate: a sample will be collected every x allocations.
+            #
+            # The lower the value, the more accuracy in allocation and heap tracking but the bigger the overhead. In
+            # particular, a value of 1 will sample ALL allocations.
+            #
+            # @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE` environment variable, otherwise `50`.
+            option :experimental_allocation_sample_rate do |o|
+              o.type :int
+              o.env 'DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE'
+              o.default 50
             end
 
             # Can be used to disable checking which version of `libmysqlclient` is being used by the `mysql2` gem.
diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb
index d1a06a13964..3507a53dd56 100644
--- a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb
+++ b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb
@@ -15,15 +15,15 @@ class CpuAndWallTimeWorker
 
         def initialize(
           gc_profiling_enabled:,
-          allocation_counting_enabled:,
           no_signals_workaround_enabled:,
           thread_context_collector:,
           dynamic_sampling_rate_overhead_target_percentage:,
-          idle_sampling_helper: IdleSamplingHelper.new,
+          allocation_sample_every:,
+          allocation_profiling_enabled:,
           # **NOTE**: This should only be used for testing; disabling the dynamic sampling rate will increase the
           # profiler overhead!
           dynamic_sampling_rate_enabled: true,
-          allocation_sample_every: 0 # Currently only for testing; Setting this to > 0 can add a lot of overhead!
+          idle_sampling_helper: IdleSamplingHelper.new
         )
           unless dynamic_sampling_rate_enabled
             Datadog.logger.warn(
@@ -31,23 +31,16 @@ def initialize(
             )
           end
 
-          if allocation_counting_enabled && allocation_sample_every > 0
-            Datadog.logger.warn(
-              "Enabled experimental allocation profiling: allocation_sample_every=#{allocation_sample_every}. This is " \
-              'experimental, not recommended, and will increase overhead!'
-            )
-          end
-
           self.class._native_initialize(
             self,
             thread_context_collector,
             gc_profiling_enabled,
             idle_sampling_helper,
-            allocation_counting_enabled,
             no_signals_workaround_enabled,
             dynamic_sampling_rate_enabled,
             dynamic_sampling_rate_overhead_target_percentage,
             allocation_sample_every,
+            allocation_profiling_enabled,
           )
           @worker_thread = nil
           @failure_exception = nil
diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb
index 5c50d602c94..228fd01f7a9 100644
--- a/lib/datadog/profiling/component.rb
+++ b/lib/datadog/profiling/component.rb
@@ -7,7 +7,7 @@ module Component
       # Passing in a `nil` tracer is supported and will disable the following profiling features:
       # * Code Hotspots panel in the trace viewer, as well as scoping a profile down to a span
       # * Endpoint aggregation in the profiler UX, including normalization (resource per endpoint call)
-      def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) # rubocop:disable Metrics/MethodLength
+      def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
         require_relative '../profiling/diagnostics/environment_logger'
 
         Profiling::Diagnostics::EnvironmentLogger.collect_and_log!
@@ -41,32 +41,28 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
 
         no_signals_workaround_enabled = no_signals_workaround_enabled?(settings)
         timeline_enabled = settings.profiling.advanced.experimental_timeline_enabled
+        allocation_sample_every = get_allocation_sample_every(settings)
+        allocation_profiling_enabled = enable_allocation_profiling?(settings, allocation_sample_every)
+        heap_profiling_enabled = enable_heap_profiling?(settings, allocation_profiling_enabled)
+
         overhead_target_percentage = valid_overhead_target(settings.profiling.advanced.overhead_target_percentage)
         upload_period_seconds = [60, settings.profiling.advanced.upload_period_seconds].max
 
-        recorder = Datadog::Profiling::StackRecorder.new(
-          cpu_time_enabled: RUBY_PLATFORM.include?('linux'), # Only supported on Linux currently
-          alloc_samples_enabled: false, # Always disabled for now -- work in progress
-        )
-        thread_context_collector = Datadog::Profiling::Collectors::ThreadContext.new(
-          recorder: recorder,
-          max_frames: settings.profiling.advanced.max_frames,
-          tracer: optional_tracer,
-          endpoint_collection_enabled: settings.profiling.advanced.endpoint.collection.enabled,
-          timeline_enabled: timeline_enabled,
-        )
+        recorder = build_recorder(allocation_profiling_enabled, heap_profiling_enabled)
+        thread_context_collector = build_thread_context_collector(settings, recorder, optional_tracer, timeline_enabled)
         worker = Datadog::Profiling::Collectors::CpuAndWallTimeWorker.new(
           gc_profiling_enabled: enable_gc_profiling?(settings),
-          allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled,
           no_signals_workaround_enabled: no_signals_workaround_enabled,
           thread_context_collector: thread_context_collector,
           dynamic_sampling_rate_overhead_target_percentage: overhead_target_percentage,
-          allocation_sample_every: 0,
+          allocation_sample_every: allocation_sample_every,
+          allocation_profiling_enabled: allocation_profiling_enabled,
         )
 
         internal_metadata = {
           no_signals_workaround_enabled: no_signals_workaround_enabled,
           timeline_enabled: timeline_enabled,
+          allocation_sample_every: allocation_sample_every,
         }.freeze
 
         exporter = build_profiler_exporter(settings, recorder, internal_metadata: internal_metadata)
@@ -76,6 +72,24 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
         Profiling::Profiler.new(worker: worker, scheduler: scheduler)
       end
 
+      private_class_method def self.build_recorder(allocation_profiling_enabled, heap_profiling_enabled)
+        Datadog::Profiling::StackRecorder.new(
+          cpu_time_enabled: RUBY_PLATFORM.include?('linux'), # Only supported on Linux currently
+          alloc_samples_enabled: allocation_profiling_enabled,
+          heap_samples_enabled: heap_profiling_enabled,
+        )
+      end
+
+      private_class_method def self.build_thread_context_collector(settings, recorder, optional_tracer, timeline_enabled)
+        Datadog::Profiling::Collectors::ThreadContext.new(
+          recorder: recorder,
+          max_frames: settings.profiling.advanced.max_frames,
+          tracer: optional_tracer,
+          endpoint_collection_enabled: settings.profiling.advanced.endpoint.collection.enabled,
+          timeline_enabled: timeline_enabled,
+        )
+      end
+
       private_class_method def self.build_profiler_exporter(settings, recorder, internal_metadata:)
         code_provenance_collector =
           (Profiling::Collectors::CodeProvenance.new if settings.profiling.advanced.code_provenance_enabled)
@@ -113,6 +127,85 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
         end
       end
 
+      private_class_method def self.get_allocation_sample_every(settings)
+        allocation_sample_rate = settings.profiling.advanced.experimental_allocation_sample_rate
+
+        if allocation_sample_rate <= 0
+          raise ArgumentError, "Allocation sample rate must be a positive integer. Was #{allocation_sample_rate}"
+        end
+
+        allocation_sample_rate
+      end
+
+      private_class_method def self.enable_allocation_profiling?(settings, allocation_sample_every)
+        unless settings.profiling.advanced.experimental_allocation_enabled
+          # Allocation profiling disabled, short-circuit out
+          return false
+        end
+
+        # Allocation sampling is safe and supported on Ruby 2.x, but has a few caveats on Ruby 3.x.
+
+        # SEVERE - All configurations
+        # Ruby 3.2.0 to 3.2.2 have a bug in the newobj tracepoint (https://bugs.ruby-lang.org/issues/19482,
+        # https://github.com/ruby/ruby/pull/7464) that makes this crash in any configuration. This bug is
+        # fixed on Ruby versions 3.2.3 and 3.3.0.
+        if RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION < '3.2.3'
+          Datadog.logger.warn(
+            'Allocation profiling is not supported in Ruby versions 3.2.0, 3.2.1 and 3.2.2 and will be forcibly '\
+            'disabled. This is due to a VM bug that can lead to crashes (https://bugs.ruby-lang.org/issues/19482). '\
+            'Other Ruby versions do not suffer from this issue.'
+          )
+          return false
+        end
+
+        # SEVERE - Only with Ractors
+        # On Ruby versions 3.0 (all), 3.1.0 to 3.1.3, and 3.2.0 to 3.2.2 allocation profiling can trigger a VM bug
+        # that causes a segmentation fault during garbage collection of Ractors
+        # (https://bugs.ruby-lang.org/issues/18464). We don't recommend using this feature on such Rubies.
+        # This bug is fixed on Ruby versions 3.1.4, 3.2.3 and 3.3.0.
+        if RUBY_VERSION.start_with?('3.0.') ||
+            (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION < '3.1.4') ||
+            (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION < '3.2.3')
+          Datadog.logger.warn(
+            "Current Ruby version (#{RUBY_VERSION}) has a VM bug where enabling allocation profiling while using "\
+            'Ractors may cause unexpected issues, including crashes (https://bugs.ruby-lang.org/issues/18464). '\
+            'This does not happen if Ractors are not used.'
+          )
+        # ANNOYANCE - Only with Ractors
+        # On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets
+        # garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on.
+        elsif RUBY_VERSION.start_with?('3.')
+          Datadog.logger.warn(
+            'In all known versions of Ruby 3.x, using Ractors may result in allocation profiling unexpectedly ' \
+            'stopping (https://bugs.ruby-lang.org/issues/19112). Note that this stop has no impact in your ' \
+            'application stability or performance. This does not happen if Ractors are not used.'
+          )
+        end
+
+        Datadog.logger.warn(
+          "Enabled experimental allocation profiling: allocation_sample_rate=#{allocation_sample_every}. This is " \
+          'experimental, not recommended, and will increase overhead!'
+        )
+
+        true
+      end
+
+      private_class_method def self.enable_heap_profiling?(settings, allocation_profiling_enabled)
+        heap_profiling_enabled = settings.profiling.advanced.experimental_heap_enabled
+
+        if heap_profiling_enabled && !allocation_profiling_enabled
+          raise ArgumentError, 'Heap profiling requires allocation profiling to be enabled'
+        end
+
+        if heap_profiling_enabled
+          Datadog.logger.warn(
+            'Enabled experimental heap profiling. This is experimental, not recommended, and will increase overhead!'
+          )
+        end
+
+        heap_profiling_enabled
+      end
+
       private_class_method def self.no_signals_workaround_enabled?(settings) # rubocop:disable Metrics/MethodLength
         setting_value = settings.profiling.advanced.no_signals_workaround_enabled
         legacy_ruby_that_should_use_workaround = RUBY_VERSION.start_with?('2.3.', '2.4.', '2.5.')
diff --git a/lib/datadog/profiling/stack_recorder.rb b/lib/datadog/profiling/stack_recorder.rb
index ccbe710dafd..3153a4982c5 100644
--- a/lib/datadog/profiling/stack_recorder.rb
+++ b/lib/datadog/profiling/stack_recorder.rb
@@ -4,7 +4,7 @@ module Profiling
     # Note that `record_sample` is only accessible from native code.
     # Methods prefixed with _native_ are implemented in `stack_recorder.c`
     class StackRecorder
-      def initialize(cpu_time_enabled:, alloc_samples_enabled:)
+      def initialize(cpu_time_enabled:, alloc_samples_enabled:, heap_samples_enabled:)
         # This mutex works in addition to the fancy C-level mutexes we have in the native side (see the docs there).
         # It prevents multiple Ruby threads calling serialize at the same time -- something like
         # `10.times { Thread.new { stack_recorder.serialize } }`.
@@ -13,7 +13,7 @@ def initialize(cpu_time_enabled:, alloc_samples_enabled:)
         # accidentally happening.
         @no_concurrent_synchronize_mutex = Mutex.new
 
-        self.class._native_initialize(self, cpu_time_enabled, alloc_samples_enabled)
+        self.class._native_initialize(self, cpu_time_enabled, alloc_samples_enabled, heap_samples_enabled)
       end
 
       def serialize
diff --git a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs
index f819fcdbdff..cc887d59ce8 100644
--- a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs
+++ b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs
@@ -9,13 +9,13 @@ module Datadog
 
         def initialize: (
           gc_profiling_enabled: bool,
-          allocation_counting_enabled: bool,
           no_signals_workaround_enabled: bool,
           thread_context_collector: Datadog::Profiling::Collectors::ThreadContext,
           dynamic_sampling_rate_overhead_target_percentage: Float,
           ?idle_sampling_helper: Datadog::Profiling::Collectors::IdleSamplingHelper,
           ?dynamic_sampling_rate_enabled: bool,
-          ?allocation_sample_every: Integer,
+          allocation_sample_every: Integer,
+          allocation_profiling_enabled: bool,
         ) -> void
 
         def self._native_initialize: (
@@ -23,11 +23,11 @@ module Datadog
           ThreadContext thread_context_collector,
           bool gc_profiling_enabled,
           IdleSamplingHelper idle_sampling_helper,
-          bool allocation_counting_enabled,
           bool no_signals_workaround_enabled,
           bool dynamic_sampling_rate_enabled,
           Float dynamic_sampling_rate_overhead_target_percentage,
           ::Integer allocation_sample_every,
+          bool allocation_profiling_enabled,
         ) -> true
 
         def start: (?on_failure_proc: ::Proc?) -> bool?
diff --git a/sig/datadog/profiling/component.rbs b/sig/datadog/profiling/component.rbs
index 675143e397c..2b32dbe88fe 100644
--- a/sig/datadog/profiling/component.rbs
+++ b/sig/datadog/profiling/component.rbs
@@ -7,6 +7,18 @@ module Datadog
         optional_tracer: Datadog::Tracing::Tracer?,
       ) -> Datadog::Profiling::Profiler?
 
+      def self.build_recorder: (
+        bool allocation_profiling_enabled,
+        bool heap_profiling_enabled,
+      ) -> Datadog::Profiling::StackRecorder
+
+      def self.build_thread_context_collector: (
+        untyped settings,
+        Datadog::Profiling::StackRecorder recorder,
+        Datadog::Tracing::Tracer? optional_tracer,
+        bool timeline_enabled,
+      ) -> Datadog::Profiling::Collectors::ThreadContext
+
       def self.build_profiler_exporter: (
         untyped settings,
         Datadog::Profiling::StackRecorder recorder,
@@ -19,6 +31,9 @@ module Datadog
       ) -> untyped
 
       def self.enable_gc_profiling?: (untyped settings) -> bool
+      def self.get_allocation_sample_every: (untyped settings) -> ::Integer
+      def self.enable_allocation_profiling?: (untyped settings, ::Integer allocation_sample_every) -> bool
+      def self.enable_heap_profiling?: (untyped settings, bool allocation_profiling_enabled) -> bool
 
       def self.no_signals_workaround_enabled?: (untyped settings) -> bool
 
diff --git a/sig/datadog/profiling/stack_recorder.rbs b/sig/datadog/profiling/stack_recorder.rbs
index a3ff67b7e38..0f61f9c8ab9 100644
--- a/sig/datadog/profiling/stack_recorder.rbs
+++ b/sig/datadog/profiling/stack_recorder.rbs
@@ -3,12 +3,13 @@ module Datadog
     class StackRecorder
       @no_concurrent_synchronize_mutex: ::Thread::Mutex
 
-      def initialize: (cpu_time_enabled: bool, alloc_samples_enabled: bool) -> void
+      def initialize: (cpu_time_enabled: bool, alloc_samples_enabled: bool, heap_samples_enabled: bool) -> void
 
       def self._native_initialize: (
         Datadog::Profiling::StackRecorder recorder_instance,
         bool cpu_time_enabled,
         bool alloc_samples_enabled,
+        bool heap_samples_enabled,
       ) -> true
 
       def serialize: () -> untyped
diff --git a/spec/datadog/core/configuration/settings_spec.rb b/spec/datadog/core/configuration/settings_spec.rb
index a1e180c83c0..743a5294241 100644
--- a/spec/datadog/core/configuration/settings_spec.rb
+++ b/spec/datadog/core/configuration/settings_spec.rb
@@ -497,39 +497,116 @@
         end
       end
 
-      describe '#allocation_counting_enabled' do
-        subject(:allocation_counting_enabled) { settings.profiling.advanced.allocation_counting_enabled }
-
-        before { stub_const('RUBY_VERSION', testing_version) }
+      describe '#allocation_counting_enabled=' do
+        it 'logs a warning informing customers this no longer does anything' do
+          expect(Datadog.logger).to receive(:warn).with(/no longer does anything/)
 
-        context 'on Ruby 2.x' do
-          let(:testing_version) { '2.3.0 ' }
-          it { is_expected.to be true }
+          settings.profiling.advanced.allocation_counting_enabled = false
         end
+      end
+
+      describe '#experimental_allocation_enabled' do
+        subject(:experimental_allocation_enabled) { settings.profiling.advanced.experimental_allocation_enabled }
+
+        context 'when DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED' do
+          around do |example|
+            ClimateControl.modify('DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED' => environment) do
+              example.run
+            end
+          end
+
+          context 'is not defined' do
+            let(:environment) { nil }
 
-        ['3.0.0', '3.1.0', '3.1.3', '3.2.0', '3.2.2'].each do |broken_ruby|
-          context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/18464 (#{broken_ruby})" do
-            let(:testing_version) { broken_ruby }
             it { is_expected.to be false }
           end
+
+          [true, false].each do |value|
+            context "is defined as #{value}" do
+              let(:environment) { value.to_s }
+
+              it { is_expected.to be value }
+            end
+          end
         end
+      end
 
-        ['3.1.4', '3.2.3', '3.3.0'].each do |fixed_ruby|
-          context "on a Ruby 3 version where https://bugs.ruby-lang.org/issues/18464 is fixed (#{fixed_ruby})" do
-            let(:testing_version) { fixed_ruby }
-            it { is_expected.to be true }
+      describe '#experimental_allocation_enabled=' do
+        it 'updates the #experimental_allocation_enabled setting' do
+          expect { settings.profiling.advanced.experimental_allocation_enabled = true }
+            .to change { settings.profiling.advanced.experimental_allocation_enabled }
+            .from(false)
+            .to(true)
+        end
+      end
+
+      describe '#experimental_heap_enabled' do
+        subject(:experimental_heap_enabled) { settings.profiling.advanced.experimental_heap_enabled }
+
+        context 'when DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED' do
+          around do |example|
+            ClimateControl.modify('DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED' => environment) do
+              example.run
+            end
+          end
+
+          context 'is not defined' do
+            let(:environment) { nil }
+
+            it { is_expected.to be false }
+          end
+
+          [true, false].each do |value|
+            context "is defined as #{value}" do
+              let(:environment) { value.to_s }
+
+              it { is_expected.to be value }
+            end
           end
         end
       end
 
-      describe '#allocation_counting_enabled=' do
-        it 'updates the #allocation_counting_enabled setting' do
-          settings.profiling.advanced.allocation_counting_enabled = true
+      describe '#experimental_heap_enabled=' do
+        it 'updates the #experimental_heap_enabled setting' do
+          expect { settings.profiling.advanced.experimental_heap_enabled = true }
+            .to change { settings.profiling.advanced.experimental_heap_enabled }
+            .from(false)
+            .to(true)
+        end
+      end
 
-          expect { settings.profiling.advanced.allocation_counting_enabled = false }
-            .to change { settings.profiling.advanced.allocation_counting_enabled }
-            .from(true)
-            .to(false)
+      describe '#experimental_allocation_sample_rate' do
+        subject(:experimental_allocation_sample_rate) { settings.profiling.advanced.experimental_allocation_sample_rate }
+
+        context 'when DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE' do
+          around do |example|
+            ClimateControl.modify('DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE' => environment) do
+              example.run
+            end
+          end
+
+          context 'is not defined' do
+            let(:environment) { nil }
+
+            it { is_expected.to be 50 }
+          end
+
+          [100, 30.5].each do |value|
+            context "is defined as #{value}" do
+              let(:environment) { value.to_s }
+
+              it { is_expected.to be value.to_i }
+            end
+          end
+        end
+      end
+
+      describe '#experimental_allocation_sample_rate=' do
+        it 'updates the #experimental_allocation_sample_rate setting' do
+          expect { settings.profiling.advanced.experimental_allocation_sample_rate = 100 }
+            .to change { settings.profiling.advanced.experimental_allocation_sample_rate }
+            .from(50)
+            .to(100)
         end
       end
 
diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
index 43c63adfc24..21888900e7c 100644
--- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
+++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
@@ -7,20 +7,24 @@
   # drops support for these Rubies globally
   let(:described_class) { Datadog::Profiling::Collectors::CpuAndWallTimeWorker }
 
-  let(:recorder) { build_stack_recorder }
   let(:endpoint_collection_enabled) { true }
   let(:gc_profiling_enabled) { true }
-  let(:allocation_counting_enabled) { true }
+  let(:allocation_sample_every) { 50 }
+  let(:allocation_profiling_enabled) { false }
+  let(:heap_profiling_enabled) { false }
+  let(:recorder) { build_stack_recorder(heap_samples_enabled: heap_profiling_enabled) }
   let(:no_signals_workaround_enabled) { false }
   let(:timeline_enabled) { false }
   let(:options) { {} }
   let(:worker_settings) do
     {
       gc_profiling_enabled: gc_profiling_enabled,
-      allocation_counting_enabled: allocation_counting_enabled,
       no_signals_workaround_enabled: no_signals_workaround_enabled,
       thread_context_collector: build_thread_context_collector(recorder),
       dynamic_sampling_rate_overhead_target_percentage: 2.0,
+      allocation_sample_every: allocation_sample_every,
+      allocation_profiling_enabled: allocation_profiling_enabled,
+      **options
     }
   end
 
@@ -57,6 +61,8 @@
   end
 
   describe '#start' do
+    let(:expected_worker_initialization_error) { nil }
+
     subject(:start) do
       cpu_and_wall_time_worker.start
       wait_until_running
@@ -442,25 +448,22 @@
       end
     end
 
-    context 'when allocation sampling is enabled' do
-      let(:options) { { allocation_sample_every: 1 } }
+    context 'when allocation profiling is enabled' do
+      let(:allocation_profiling_enabled) { true }
+      let(:test_num_allocated_object) { 123 }
+      # Sample all allocations to get easy-to-reason about numbers that are also not flaky in CI.
+      let(:allocation_sample_every) { 1 }
 
       before do
         allow(Datadog.logger).to receive(:warn)
       end
 
-      it 'logs a warning message mentioning this is experimental' do
-        expect(Datadog.logger).to receive(:warn).with(/Enabled experimental allocation profiling/)
-
-        start
-      end
-
       it 'records allocated objects' do
         stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo))
 
         start
 
-        123.times { CpuAndWallTimeWorkerSpec::TestStruct.new }
+        test_num_allocated_object.times { CpuAndWallTimeWorkerSpec::TestStruct.new }
         allocation_line = __LINE__ - 1
 
         cpu_and_wall_time_worker.stop
@@ -469,7 +472,7 @@
           samples_for_thread(samples_from_pprof(recorder.serialize!), Thread.current)
             .find { |s| s.labels[:'allocation class'] == 'CpuAndWallTimeWorkerSpec::TestStruct' }
 
-        expect(allocation_sample.values).to include(:'alloc-samples' => 123)
+        expect(allocation_sample.values).to include(:'alloc-samples' => test_num_allocated_object)
         expect(allocation_sample.locations.first.lineno).to eq allocation_line
       end
 
@@ -540,7 +543,7 @@
     end
 
     context 'when allocation sampling is disabled' do
-      let(:options) { { allocation_sample_every: 0 } }
+      let(:allocation_profiling_enabled) { false }
 
       it 'does not record allocations' do
         stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo))
@@ -555,6 +558,52 @@
       end
     end
 
+    context 'when heap profiling is enabled' do
+      let(:allocation_profiling_enabled) { true }
+      let(:heap_profiling_enabled) { true }
+      let(:test_num_allocated_object) { 123 }
+      # Sample all allocations to get easy-to-reason about numbers that are also not flaky in CI.
+      let(:allocation_sample_every) { 1 }
+
+      before do
+        allow(Datadog.logger).to receive(:warn)
+      end
+
+      it 'records live heap objects' do
+        pending "heap profiling isn't actually implemented just yet"
+
+        stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo))
+
+        start
+
+        test_num_allocated_object.times { CpuAndWallTimeWorkerSpec::TestStruct.new }
+        allocation_line = __LINE__ - 1
+
+        cpu_and_wall_time_worker.stop
+
+        relevant_sample = samples_for_thread(samples_from_pprof(recorder.serialize!), Thread.current)
+          .find { |s| s.locations.first.lineno == allocation_line && s.locations.first.path == __FILE__ }
+
+        expect(relevant_sample.values[':heap-live-samples']).to eq test_num_allocated_object
+      end
+    end
+
+    context 'when heap profiling is disabled' do
+      let(:heap_profiling_enabled) { false }
+
+      it 'does not record heap samples' do
+        stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo))
+
+        start
+
+        123.times { CpuAndWallTimeWorkerSpec::TestStruct.new }
+
+        cpu_and_wall_time_worker.stop
+
+        expect(samples_from_pprof(recorder.serialize!).select { |s| s.values.key?(:'heap-live-samples') }).to be_empty
+      end
+    end
+
     context 'Process::Waiter crash regression tests' do
       # On Ruby 2.3 to 2.6, there's a crash when accessing instance variables of the `process_waiter_thread`,
       # see https://bugs.ruby-lang.org/issues/17807 .
@@ -633,7 +682,7 @@
       # so this must be disabled when interacting with Ractors.
       let(:gc_profiling_enabled) { false }
       # ...same thing for the tracepoint for allocation counting/profiling :(
-      let(:allocation_counting_enabled) { false }
+      let(:allocation_profiling_enabled) { false }
 
       describe 'handle_sampling_signal' do
         include_examples 'does not trigger a sample',
@@ -815,51 +864,68 @@
         cpu_and_wall_time_worker.stop
       end
 
-      it 'returns the number of allocations between two calls of the method' do
-        # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all
-        # it needs to
-        new_object = proc { Object.new }
-        1.times(&new_object)
+      context 'when allocation profiling is enabled' do
+        let(:allocation_profiling_enabled) { true }
 
-        before_allocations = described_class._native_allocation_count
-        100.times(&new_object)
-        after_allocations = described_class._native_allocation_count
+        it 'always returns a >= 0 value' do
+          expect(described_class._native_allocation_count).to be >= 0
+        end
 
-        expect(after_allocations - before_allocations).to be 100
-      end
+        it 'returns the number of allocations between two calls of the method' do
+          # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all
+          # it needs to
+          new_object = proc { Object.new }
+          1.times(&new_object)
 
-      it 'returns different numbers of allocations for different threads' do
-        # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all
-        # it needs to
-        new_object = proc { Object.new }
-        1.times(&new_object)
+          before_allocations = described_class._native_allocation_count
+          100.times(&new_object)
+          after_allocations = described_class._native_allocation_count
 
-        t1_can_run = Queue.new
-        t1_has_run = Queue.new
-        before_t1 = nil
-        after_t1 = nil
+          expect(after_allocations - before_allocations).to be 100
+        end
 
-        background_t1 = Thread.new do
-          before_t1 = described_class._native_allocation_count
-          t1_can_run.pop
+        it 'returns different numbers of allocations for different threads' do
+          # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all
+          # it needs to
+          new_object = proc { Object.new }
+          1.times(&new_object)
 
-          100.times(&new_object)
-          after_t1 = described_class._native_allocation_count
-          t1_has_run << true
-        end
+          t1_can_run = Queue.new
+          t1_has_run = Queue.new
+          before_t1 = nil
+          after_t1 = nil
+
+          background_t1 = Thread.new do
+            before_t1 = described_class._native_allocation_count
+            t1_can_run.pop
+
+            100.times(&new_object)
+            after_t1 = described_class._native_allocation_count
+            t1_has_run << true
+          end
+
+          before_allocations = described_class._native_allocation_count
+          t1_can_run << true
+          t1_has_run.pop
+          after_allocations = described_class._native_allocation_count
+
+          background_t1.join
 
-        before_allocations = described_class._native_allocation_count
-        t1_can_run << true
-        t1_has_run.pop
-        after_allocations = described_class._native_allocation_count
+          # This test checks that even though we observed 100 allocations in a background thread t1, the counters for
+          # the current thread were not affected by this change
 
-        background_t1.join
+          expect(after_t1 - before_t1).to be 100
+          expect(after_allocations - before_allocations).to be < 10
+        end
+      end
 
-        # This test checks that even though we observed 100 allocations in a background thread t1, the counters for
-        # the current thread were not affected by this change
+      context 'when allocation profiling is disabled' do
+        let(:allocation_profiling_enabled) { false }
+        it 'always returns a nil value' do
+          100.times { Object.new }
 
-        expect(after_t1 - before_t1).to be 100
-        expect(after_allocations - before_allocations).to be < 10
+          expect(described_class._native_allocation_count).to be nil
+        end
       end
     end
   end
diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb
index 7d7a28d4931..77ac09732b4 100644
--- a/spec/datadog/profiling/component_spec.rb
+++ b/spec/datadog/profiling/component_spec.rb
@@ -80,11 +80,11 @@
 
           expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with(
             gc_profiling_enabled: anything,
-            allocation_counting_enabled: anything,
             no_signals_workaround_enabled: :no_signals_result,
             thread_context_collector: instance_of(Datadog::Profiling::Collectors::ThreadContext),
             dynamic_sampling_rate_overhead_target_percentage: :overhead_target_percentage_config,
-            allocation_sample_every: 0,
+            allocation_sample_every: kind_of(Integer),
+            allocation_profiling_enabled: false,
           )
 
           build_profiler_component
@@ -124,29 +124,153 @@
           end
         end
 
-        context 'when allocation_counting_enabled is enabled' do
+        context 'when allocation profiling is enabled' do
           before do
-            settings.profiling.advanced.allocation_counting_enabled = true
+            settings.profiling.advanced.experimental_allocation_enabled = true
+            stub_const('RUBY_VERSION', testing_version)
           end
 
-          it 'initializes a CpuAndWallTimeWorker collector with allocation_counting_enabled set to true' do
+          context 'on Ruby 2.x' do
+            let(:testing_version) { '2.3.0 ' }
+
+            it 'initializes CpuAndWallTimeWorker and StackRecorder with allocation sampling support and warns' do
+              expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including(
+                allocation_profiling_enabled: true,
+              )
+              expect(Datadog::Profiling::StackRecorder).to receive(:new)
+                .with(hash_including(alloc_samples_enabled: true))
+                .and_call_original
+
+              expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/)
+              expect(Datadog.logger).to_not receive(:warn).with(/Ractor/)
+
+              build_profiler_component
+            end
+          end
+
+          ['3.2.0', '3.2.1', '3.2.2'].each do |broken_ruby|
+            context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/19482 (#{broken_ruby})" do
+              let(:testing_version) { broken_ruby }
+
+              it 'initializes a CpuAndWallTimeWorker and StackRecorder with allocation sampling force-disabled and warns' do
+                expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including(
+                  allocation_profiling_enabled: false,
+                )
+                expect(Datadog::Profiling::StackRecorder).to receive(:new)
+                  .with(hash_including(alloc_samples_enabled: false))
+                  .and_call_original
+
+                expect(Datadog.logger).to receive(:warn).with(/forcibly disabled/)
+                expect(Datadog.logger).to_not receive(:warn).with(/Ractor/)
+                expect(Datadog.logger).to_not receive(:warn).with(/experimental allocation profiling/)
+
+                build_profiler_component
+              end
+            end
+          end
+
+          ['3.0.0', '3.1.0', '3.1.3'].each do |broken_ractors_ruby|
+            context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/18464 (#{broken_ractors_ruby})" do
+              let(:testing_version) { broken_ractors_ruby }
+
+              it 'initializes CpuAndWallTimeWorker and StackRecorder with allocation sampling support and warns' do
+                expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including(
+                  allocation_profiling_enabled: true,
+                )
+                expect(Datadog::Profiling::StackRecorder).to receive(:new)
+                  .with(hash_including(alloc_samples_enabled: true))
+                  .and_call_original
+
+                expect(Datadog.logger).to receive(:warn).with(/Ractors.+crashes/)
+                expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/)
+
+                build_profiler_component
+              end
+            end
+          end
+
+          ['3.1.4', '3.2.3', '3.3.0'].each do |fixed_ruby|
+            context "on a Ruby 3 version where https://bugs.ruby-lang.org/issues/18464 is fixed (#{fixed_ruby})" do
+              let(:testing_version) { fixed_ruby }
+              it 'initializes CpuAndWallTimeWorker and StackRecorder with allocation sampling support and warns' do
+                expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including(
+                  allocation_profiling_enabled: true,
+                )
+                expect(Datadog::Profiling::StackRecorder).to receive(:new)
+                  .with(hash_including(alloc_samples_enabled: true))
+                  .and_call_original
+
+                expect(Datadog.logger).to receive(:warn).with(/Ractors.+stopping/)
+                expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/)
+
+                build_profiler_component
+              end
+            end
+          end
+        end
+
+        context 'when allocation profiling is disabled' do
+          before do
+            settings.profiling.advanced.experimental_allocation_enabled = false
+          end
+
+          it 'initializes CpuAndWallTimeWorker and StackRecorder without allocation sampling support' do
             expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including(
-              allocation_counting_enabled: true,
+              allocation_profiling_enabled: false,
             )
+            expect(Datadog::Profiling::StackRecorder).to receive(:new)
+              .with(hash_including(alloc_samples_enabled: false))
+              .and_call_original
 
             build_profiler_component
           end
         end
 
-        context 'when allocation_counting_enabled is disabled' do
+        context 'when heap profiling is enabled' do
           before do
-            settings.profiling.advanced.allocation_counting_enabled = false
+            settings.profiling.advanced.experimental_heap_enabled = true
+            # Universally supported ruby version for allocation profiling, we don't want to test those
+            # edge cases here
+            stub_const('RUBY_VERSION', '2.7.2')
           end
 
-          it 'initializes a CpuAndWallTimeWorker collector with allocation_counting_enabled set to false' do
-            expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including(
-              allocation_counting_enabled: false,
-            )
+          context 'and allocation profiling disabled' do
+            before do
+              settings.profiling.advanced.experimental_allocation_enabled = false
+            end
+
+            it 'raises an ArgumentError during component initialization' do
+              expect { build_profiler_component }.to raise_error(ArgumentError, /requires allocation profiling/)
+            end
+          end
+
+          context 'and allocation profiling enabled and supported' do
+            before do
+              settings.profiling.advanced.experimental_allocation_enabled = true
+            end
+
+            it 'initializes StackRecorder with heap sampling support and warns' do
+              expect(Datadog::Profiling::StackRecorder).to receive(:new)
+                .with(hash_including(heap_samples_enabled: true))
+                .and_call_original
+
+              expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/)
+              expect(Datadog.logger).to receive(:warn).with(/experimental heap profiling/)
+
+              build_profiler_component
+            end
+          end
+        end
+
+        context 'when heap profiling is disabled' do
+          before do
+            settings.profiling.advanced.experimental_heap_enabled = false
+          end
+
+          it 'initializes StackRecorder without heap sampling support' do
+            expect(Datadog::Profiling::StackRecorder).to receive(:new)
+              .with(hash_including(heap_samples_enabled: false))
+              .and_call_original
 
             build_profiler_component
           end
@@ -168,17 +292,20 @@
           build_profiler_component
         end
 
-        it 'sets up the Exporter internal_metadata with no_signals_workaround_enabled and timeline_enabled settings' do
+        it 'sets up the Exporter internal_metadata with relevant settings' do
           allow(Datadog::Profiling::Collectors::ThreadContext).to receive(:new)
           allow(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new)
 
           expect(described_class).to receive(:no_signals_workaround_enabled?).and_return(:no_signals_result)
           expect(settings.profiling.advanced).to receive(:experimental_timeline_enabled).and_return(:timeline_result)
+          expect(settings.profiling.advanced).to receive(:experimental_allocation_sample_rate)
+            .and_return(123)
           expect(Datadog::Profiling::Exporter).to receive(:new).with(
             hash_including(
               internal_metadata: {
                 no_signals_workaround_enabled: :no_signals_result,
                 timeline_enabled: :timeline_result,
+                allocation_sample_every: 123,
               }
             )
           )
@@ -186,13 +313,6 @@
           build_profiler_component
         end
 
-        it 'sets up the StackRecorder with alloc_samples_enabled: false' do
-          expect(Datadog::Profiling::StackRecorder)
-            .to receive(:new).with(hash_including(alloc_samples_enabled: false)).and_call_original
-
-          build_profiler_component
-        end
-
         context 'when on Linux' do
           before { stub_const('RUBY_PLATFORM', 'some-linux-based-platform') }
 
diff --git a/spec/datadog/profiling/spec_helper.rb b/spec/datadog/profiling/spec_helper.rb
index 6d15cf22170..e02c1af213b 100644
--- a/spec/datadog/profiling/spec_helper.rb
+++ b/spec/datadog/profiling/spec_helper.rb
@@ -72,8 +72,14 @@ def samples_for_thread(samples, thread)
     samples.select { |sample| object_id_from(sample.labels.fetch(:'thread id')) == thread.object_id }
   end
 
-  def build_stack_recorder
-    Datadog::Profiling::StackRecorder.new(cpu_time_enabled: true, alloc_samples_enabled: true)
+  # We disable heap_sample collection by default in tests since it requires some extra mocking/
+  # setup for it to properly work.
+  def build_stack_recorder(heap_samples_enabled: false)
+    Datadog::Profiling::StackRecorder.new(
+      cpu_time_enabled: true,
+      alloc_samples_enabled: true,
+      heap_samples_enabled: heap_samples_enabled
+    )
   end
 end
 
diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb
index 585d41f9d44..8652ceb5e05 100644
--- a/spec/datadog/profiling/stack_recorder_spec.rb
+++ b/spec/datadog/profiling/stack_recorder_spec.rb
@@ -7,9 +7,16 @@
   let(:numeric_labels) { [] }
   let(:cpu_time_enabled) { true }
   let(:alloc_samples_enabled) { true }
+  # Disabling these by default since they require some extra setup and produce separate samples.
+  # Enabling this is tested in a particular context below.
+  let(:heap_samples_enabled) { false }
 
   subject(:stack_recorder) do
-    described_class.new(cpu_time_enabled: cpu_time_enabled, alloc_samples_enabled: alloc_samples_enabled)
+    described_class.new(
+      cpu_time_enabled: cpu_time_enabled,
+      alloc_samples_enabled: alloc_samples_enabled,
+      heap_samples_enabled: heap_samples_enabled
+    )
   end
 
   # NOTE: A lot of libdatadog integration behaviors are tested in the Collectors::Stack specs, since we need actual
@@ -114,6 +121,7 @@ def slot_two_mutex_locked?
       context 'when all profile types are enabled' do
         let(:cpu_time_enabled) { true }
         let(:alloc_samples_enabled) { true }
+        let(:heap_samples_enabled) { true }
 
         it 'returns a pprof with the configured sample types' do
           expect(sample_types_from(decoded_profile)).to eq(
@@ -121,6 +129,7 @@ def slot_two_mutex_locked?
             'cpu-samples' => 'count',
             'wall-time' => 'nanoseconds',
             'alloc-samples' => 'count',
+            'heap-live-samples' => 'count',
           )
         end
       end
@@ -128,12 +137,14 @@ def slot_two_mutex_locked?
       context 'when cpu-time is disabled' do
         let(:cpu_time_enabled) { false }
         let(:alloc_samples_enabled) { true }
+        let(:heap_samples_enabled) { true }
 
         it 'returns a pprof without the cpu-type type' do
           expect(sample_types_from(decoded_profile)).to eq(
             'cpu-samples' => 'count',
             'wall-time' => 'nanoseconds',
             'alloc-samples' => 'count',
+            'heap-live-samples' => 'count',
           )
         end
       end
@@ -141,12 +152,29 @@ def slot_two_mutex_locked?
       context 'when alloc-samples is disabled' do
         let(:cpu_time_enabled) { true }
         let(:alloc_samples_enabled) { false }
+        let(:heap_samples_enabled) { true }
 
         it 'returns a pprof without the alloc-samples type' do
           expect(sample_types_from(decoded_profile)).to eq(
             'cpu-time' => 'nanoseconds',
             'cpu-samples' => 'count',
             'wall-time' => 'nanoseconds',
+            'heap-live-samples' => 'count',
+          )
+        end
+      end
+
+      context 'when heap-live-samples is disabled' do
+        let(:cpu_time_enabled) { true }
+        let(:alloc_samples_enabled) { true }
+        let(:heap_samples_enabled) { false }
+
+        it 'returns a pprof without the heap-live-samples type' do
+          expect(sample_types_from(decoded_profile)).to eq(
+            'cpu-time' => 'nanoseconds',
+            'cpu-samples' => 'count',
+            'wall-time' => 'nanoseconds',
+            'alloc-samples' => 'count',
           )
         end
       end
@@ -154,8 +182,9 @@ def slot_two_mutex_locked?
       context 'when all optional types are disabled' do
         let(:cpu_time_enabled) { false }
         let(:alloc_samples_enabled) { false }
+        let(:heap_samples_enabled) { false }
 
-        it 'returns a pprof with without the optional types' do
+        it 'returns a pprof without the optional types' do
           expect(sample_types_from(decoded_profile)).to eq(
             'cpu-samples' => 'count',
             'wall-time' => 'nanoseconds',
@@ -198,7 +227,12 @@ def sample_types_from(decoded_profile)
 
       it 'encodes the sample with the metrics provided' do
         expect(samples.first.values)
-          .to eq(:'cpu-time' => 123, :'cpu-samples' => 456, :'wall-time' => 789, :'alloc-samples' => 4242)
+          .to eq(
+            :'cpu-time' => 123,
+            :'cpu-samples' => 456,
+            :'wall-time' => 789,
+            :'alloc-samples' => 4242,
+          )
       end
 
       context 'when disabling an optional profile sample type' do
@@ -302,6 +336,94 @@ def sample_types_from(decoded_profile)
       end
     end
 
+    describe 'heap samples' do
+      let(:sample_rate) { 50 }
+      let(:metric_values) { { 'cpu-time' => 101, 'cpu-samples' => 1, 'wall-time' => 789, 'alloc-samples' => sample_rate } }
+      let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a }
+
+      let(:a_string) { 'a beautiful string' }
+      let(:an_array) { [1..10] }
+      let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true } }
+
+      let(:samples) { samples_from_pprof(encoded_pprof) }
+
+      before do
+        allocations = [a_string, an_array, 'a fearsome string', [-10..-1], a_hash, { 'z' => -1, 'y' => '-2', 'x' => false }]
+        @num_allocations = 0
+        allocations.each_with_index do |obj, i|
+          # Heap sampling currently requires this 2-step process to first pass data about the allocated object...
+          described_class::Testing._native_track_object(stack_recorder, obj, sample_rate)
+          # ...and then pass data about the allocation stacktrace (with 2 distinct stacktraces)
+          if i.even?
+            Datadog::Profiling::Collectors::Stack::Testing
+              ._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 400, false)
+          else
+            # 401 used instead of 400 here just to make the branches different and appease Rubocop
+            Datadog::Profiling::Collectors::Stack::Testing
+              ._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 401, false)
+          end
+          @num_allocations += 1
+        end
+
+        allocations.clear # The literals in the previous array are now dangling
+        GC.start # And this will clear them, leaving only the non-literals which are still pointed to by the lets
+      end
+
+      context 'when disabled' do
+        let(:heap_samples_enabled) { false }
+
+        it 'are ommitted from the profile' do
+          # We sample from 2 distinct locations
+          expect(samples.size).to eq(2)
+          expect(samples.select { |s| s.values.key?('heap-live-samples') }).to be_empty
+        end
+      end
+
+      context 'when enabled' do
+        let(:heap_samples_enabled) { true }
+
+        let(:heap_samples) do
+          samples.select { |s| s.values[:'heap-live-samples'] > 0 }
+        end
+
+        let(:non_heap_samples) do
+          samples.select { |s| s.values[:'heap-live-samples'] == 0 }
+        end
+
+        it 'include the stack and sample counts for the objects still left alive' do
+          pending 'heap_recorder implementation is currently missing'
+
+          # We sample from 2 distinct locations
+          expect(heap_samples.size).to eq(2)
+
+          sum_heap_samples = 0
+          heap_samples.each { |s| sum_heap_samples += s.values[:'heap-live-samples'] }
+          expect(sum_heap_samples).to eq([a_string, an_array, a_hash].size * sample_rate)
+        end
+
+        it 'keeps on reporting accurate samples for other profile types' do
+          expect(non_heap_samples.size).to eq(2)
+
+          summed_values = {}
+          non_heap_samples.each do |s|
+            s.values.each_pair do |k, v|
+              summed_values[k] = (summed_values[k] || 0) + v
+            end
+          end
+
+          # We use the same metric_values in all sample calls in before. So we'd expect
+          # the summed values to match `@num_allocations * metric_values[profile-type]`
+          # for each profile-type there in.
+          expected_summed_values = { :'heap-live-samples' => 0 }
+          metric_values.each_pair do |k, v|
+            expected_summed_values[k.to_sym] = v * @num_allocations
+          end
+
+          expect(summed_values).to eq(expected_summed_values)
+        end
+      end
+    end
+
     context 'when there is a failure during serialization' do
       before do
         allow(Datadog.logger).to receive(:error)