[PROF-8667] Heap Profiling - Part 5 - Size

DataDog · Dec 21, 2023 · e73b8d7 · e73b8d7
1 parent 581b912
commit e73b8d7
Show file tree

Hide file tree

Showing 7 changed files with 93 additions and 15 deletions.
diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.c b/ext/ddtrace_profiling_native_extension/heap_recorder.c
@@ -126,7 +126,7 @@ static heap_record* get_or_create_heap_record(heap_recorder*, ddog_prof_Slice_Lo
 static void cleanup_heap_record_if_unused(heap_recorder*, heap_record*);
 static int st_heap_record_entry_free(st_data_t, st_data_t, st_data_t);
 static int st_object_record_entry_free(st_data_t, st_data_t, st_data_t);
-static int st_object_record_entry_free_if_invalid(st_data_t, st_data_t, st_data_t);
+static int st_object_record_update(st_data_t, st_data_t, st_data_t);
 static int st_object_records_iterate(st_data_t, st_data_t, st_data_t);
 static int st_object_records_debug(st_data_t key, st_data_t value, st_data_t extra);
 static int update_object_record_entry(st_data_t*, st_data_t*, st_data_t, int);
@@ -257,7 +257,7 @@ void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) {
     rb_raise(rb_eRuntimeError, "New heap recorder iteration prepared without the previous one having been finished.");
   }
 
-  st_foreach(heap_recorder->object_records, st_object_record_entry_free_if_invalid, (st_data_t) heap_recorder);
+  st_foreach(heap_recorder->object_records, st_object_record_update, (st_data_t) heap_recorder);
 
   heap_recorder->object_records_snapshot = st_copy(heap_recorder->object_records);
   if (heap_recorder->object_records_snapshot == NULL) {
@@ -358,13 +358,15 @@ static int st_object_record_entry_free(DDTRACE_UNUSED st_data_t key, st_data_t v
   return ST_DELETE;
 }
 
-static int st_object_record_entry_free_if_invalid(st_data_t key, st_data_t value, st_data_t extra_arg) {
+static int st_object_record_update(st_data_t key, st_data_t value, st_data_t extra_arg) {
   long obj_id = (long) key;
   object_record *record = (object_record*) value;
   heap_recorder *recorder = (heap_recorder*) extra_arg;
 
-  if (!ruby_ref_from_id(LONG2NUM(obj_id), NULL)) {
-    // Id no longer associated with a valid ref. Need to clean things up!
+  VALUE ref;
+
+  if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) {
+    // Id no longer associated with a valid ref. Need to delete this object record!
 
     // Starting with the associated heap record. There will now be one less tracked object pointing to it
     heap_record *heap_record = record->heap_record;
@@ -377,6 +379,9 @@ static int st_object_record_entry_free_if_invalid(st_data_t key, st_data_t value
     return ST_DELETE;
   }
 
+  // If we got this far, entry is still valid so lets update its size
+  record->object_data.size = ruby_obj_memsize_of(ref);
+
   return ST_CONTINUE;
 }
 

diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.h b/ext/ddtrace_profiling_native_extension/heap_recorder.h
@@ -27,6 +27,9 @@ typedef struct live_object_data {
   //          could be seen as being representative of 50 objects.
   unsigned int weight;
 
+  // Size of this object on last flush/update.
+  size_t size;
+
   // The class of the object that we're tracking.
   // NOTE: This is optional and will be set to NULL if not set.
   char* class;

diff --git a/ext/ddtrace_profiling_native_extension/ruby_helpers.c b/ext/ddtrace_profiling_native_extension/ruby_helpers.c
@@ -166,3 +166,39 @@ bool ruby_ref_from_id(VALUE obj_id, VALUE *value) {
 
   return true;
 }
+
+// Not part of public headers but is externed from Ruby
+size_t rb_obj_memsize_of(VALUE obj);
+
+// Wrapper around rb_obj_memsize_of to avoid hitting crashing paths.
+size_t ruby_obj_memsize_of(VALUE obj) {
+  switch (BUILTIN_TYPE(obj)) {
+    case T_OBJECT:
+    case T_MODULE:
+    case T_CLASS:
+    case T_ICLASS:
+    case T_STRING:
+    case T_ARRAY:
+    case T_HASH:
+    case T_REGEXP:
+    case T_DATA:
+    case T_MATCH:
+    case T_FILE:
+    case T_RATIONAL:
+    case T_COMPLEX:
+    case T_IMEMO:
+    case T_FLOAT:
+    case T_SYMBOL:
+    case T_BIGNUM:
+    // case T_NODE: -> Throws exception in rb_obj_memsize_of
+    case T_STRUCT:
+    case T_ZOMBIE:
+    #ifndef NO_T_MOVED
+    case T_MOVED:
+    #endif
+      return rb_obj_memsize_of(obj);
+    default:
+      // Unsupported, return 0 instead of erroring like rb_obj_memsize_of likes doing
+      return 0;
+  }
+}
diff --git a/ext/ddtrace_profiling_native_extension/ruby_helpers.h b/ext/ddtrace_profiling_native_extension/ruby_helpers.h
@@ -106,3 +106,8 @@ char* ruby_strndup(const char *str, size_t size);
 // writes the ref to the value pointer parameter if !NULL. False if id doesn't
 // reference a valid object (in which case value is not changed).
 bool ruby_ref_from_id(size_t id, VALUE *value);
+
+// Native wrapper to get the approximate/estimated current size of the passed
+// object.
+size_t ruby_obj_memsize_of(VALUE obj);
+
diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c
@@ -153,17 +153,19 @@ static VALUE error_symbol = Qnil; // :error in Ruby
 #define ALLOC_SAMPLES_VALUE_ID 3
 #define HEAP_SAMPLES_VALUE      {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")}
 #define HEAP_SAMPLES_VALUE_ID 4
+#define HEAP_SIZE_VALUE         {.type_ = VALUE_STRING("heap-live-size"),    .unit = VALUE_STRING("bytes")}
+#define HEAP_SIZE_VALUE_ID 5
 #define TIMELINE_VALUE          {.type_ = VALUE_STRING("timeline"),          .unit = VALUE_STRING("nanoseconds")}
-#define TIMELINE_VALUE_ID 5
+#define TIMELINE_VALUE_ID 6
 
 static const ddog_prof_ValueType all_value_types[] =
-  {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, TIMELINE_VALUE};
+  {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, HEAP_SIZE_VALUE, TIMELINE_VALUE};
 
 // This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it
 // occupies on the all_value_types array.
 // E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types.
 static const uint8_t all_value_types_positions[] =
-  {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, TIMELINE_VALUE_ID};
+  {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, HEAP_SIZE_VALUE_ID, TIMELINE_VALUE_ID};
 
 #define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType))
 
@@ -379,7 +381,7 @@ static VALUE _native_initialize(
   uint8_t requested_values_count = ALL_VALUE_TYPES_COUNT -
     (cpu_time_enabled == Qtrue ? 0 : 1) -
     (alloc_samples_enabled == Qtrue? 0 : 1) -
-    (heap_samples_enabled == Qtrue ? 0 : 1) -
+    (heap_samples_enabled == Qtrue ? 0 : 2) -
     (timeline_enabled == Qtrue ? 0 : 1);
 
   if (requested_values_count == ALL_VALUE_TYPES_COUNT) return Qtrue; // Nothing to do, this is the default
@@ -419,8 +421,11 @@ static VALUE _native_initialize(
   if (heap_samples_enabled == Qtrue) {
     enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SAMPLES_VALUE;
     state->position_for[HEAP_SAMPLES_VALUE_ID] = next_enabled_pos++;
+    enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SIZE_VALUE;
+    state->position_for[HEAP_SIZE_VALUE_ID] = next_enabled_pos++;
   } else {
     state->position_for[HEAP_SAMPLES_VALUE_ID] = next_disabled_pos++;
+    state->position_for[HEAP_SIZE_VALUE_ID] = next_disabled_pos++;
 
     // Turns out heap sampling is disabled but we initialized everything in _native_new
     // assuming all samples were enabled. We need to deinitialize the heap recorder.
@@ -593,6 +598,7 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio
   uint8_t *position_for = context->state->position_for;
 
   metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = object_data->weight;
+  metric_values[position_for[HEAP_SIZE_VALUE_ID]] = object_data->size * object_data->weight;
 
   ddog_prof_Label labels[2];
   size_t label_offset = 0;

diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
@@ -594,6 +594,8 @@
           .find(&test_struct_heap_sample)
 
         expect(relevant_sample.values[:'heap-live-samples']).to eq test_num_allocated_object
+        # 40 is the size of a basic object and we have test_num_allocated_object of them
+        expect(relevant_sample.values[:'heap-live-size']).to eq test_num_allocated_object * 40
       end
     end
 

diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb
@@ -132,12 +132,13 @@ def slot_two_mutex_locked?
             'wall-time' => 'nanoseconds',
             'alloc-samples' => 'count',
             'heap-live-samples' => 'count',
+            'heap-live-size' => 'bytes',
             'timeline' => 'nanoseconds',
           }
         end
 
-        def profile_types_without(type)
-          all_profile_types.dup.tap { |it| it.delete(type) { raise 'Missing key' } }
+        def profile_types_without(*types)
+          all_profile_types.dup.tap { |it| it.delete_if { |k| types.include?(k) }}
         end
 
         context 'when all profile types are enabled' do
@@ -166,7 +167,7 @@ def profile_types_without(type)
           let(:heap_samples_enabled) { false }
 
           it 'returns a pprof without the heap-live-samples type' do
-            expect(sample_types_from(decoded_profile)).to eq(profile_types_without('heap-live-samples'))
+            expect(sample_types_from(decoded_profile)).to eq(profile_types_without('heap-live-samples', 'heap-live-size'))
           end
         end
 
@@ -348,8 +349,8 @@ def sample_types_from(decoded_profile)
       let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a }
 
       let(:a_string) { 'a beautiful string' }
-      let(:an_array) { (1..10).to_a }
-      let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true } }
+      let(:an_array) { (1..100).to_a }
+      let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true, 'd' => Object.new } }
 
       let(:samples) { samples_from_pprof(encoded_pprof) }
 
@@ -404,6 +405,7 @@ def sample_allocation(obj)
           # We sample from 2 distinct locations
           expect(samples.size).to eq(2)
           expect(samples.select { |s| s.values.key?('heap-live-samples') }).to be_empty
+          expect(samples.select { |s| s.values.key?('heap-live-size') }).to be_empty
         end
       end
 
@@ -426,6 +428,12 @@ def sample_allocation(obj)
           expect(string_sample.values).to match(
             hash_including(
               :"heap-live-samples" => sample_rate,
+              :"heap-live-size" => be_between(
+                # Basic object + 18 UTF-8 characters at minimum.
+                (40 + 18 * 2) * sample_rate,
+                # Add some extra padding for extra data (length for instance?).
+                (40 + 18 * 2 + 10) * sample_rate
+              ),
             )
           )
           expect(string_sample.labels).to match(
@@ -439,6 +447,13 @@ def sample_allocation(obj)
           expect(array_sample.values).to match(
             hash_including(
               :"heap-live-samples" => sample_rate,
+              # Basic object + 100 FIXNUMs (32 or 64 bits)
+              :"heap-live-size" => be_between(
+                # Basic object + 100 FIXNUMs (32 bits)
+                (40 + 100 * 4) * sample_rate,
+                # Basic object + 128 FIXNUMs (64 bits and round to nearest power of 2) and eventual extra data
+                (40 + 128 * 8 + 10) * sample_rate,
+              )
             )
           )
           expect(array_sample.labels).to match(
@@ -452,6 +467,12 @@ def sample_allocation(obj)
           expect(hash_sample.values).to match(
             hash_including(
               :"heap-live-samples" => sample_rate,
+              :"heap-live-size" => be_between(
+                # Basic object + 4 table entries + no bins
+                (40 + 4 * 16) * sample_rate,
+                # Add extra padding to hash itself as well as each entry and  8 bins
+                (80 + 4 * 32 + 8 * 4) * sample_rate,
+              )
             )
           )
           expect(hash_sample.labels).to match(
@@ -500,7 +521,7 @@ def sample_allocation(obj)
           # We use the same metric_values in all sample calls in before. So we'd expect
           # the summed values to match `@num_allocations * metric_values[profile-type]`
           # for each profile-type there in.
-          expected_summed_values = { :'heap-live-samples' => 0 }
+          expected_summed_values = { :'heap-live-samples' => 0, :'heap-live-size' => 0, }
           metric_values.each_pair do |k, v|
             expected_summed_values[k.to_sym] = v * @num_allocations
           end