From e73b8d751ad8c119eb174dbef3265e1e3d332417 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Fri, 15 Dec 2023 18:09:27 +0000 Subject: [PATCH] [PROF-8667] Heap Profiling - Part 5 - Size --- .../heap_recorder.c | 15 +++++--- .../heap_recorder.h | 3 ++ .../ruby_helpers.c | 36 +++++++++++++++++++ .../ruby_helpers.h | 5 +++ .../stack_recorder.c | 14 +++++--- .../cpu_and_wall_time_worker_spec.rb | 2 ++ spec/datadog/profiling/stack_recorder_spec.rb | 33 +++++++++++++---- 7 files changed, 93 insertions(+), 15 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.c b/ext/ddtrace_profiling_native_extension/heap_recorder.c index 1545f165200..75fee4b9b8c 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.c +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.c @@ -126,7 +126,7 @@ static heap_record* get_or_create_heap_record(heap_recorder*, ddog_prof_Slice_Lo static void cleanup_heap_record_if_unused(heap_recorder*, heap_record*); static int st_heap_record_entry_free(st_data_t, st_data_t, st_data_t); static int st_object_record_entry_free(st_data_t, st_data_t, st_data_t); -static int st_object_record_entry_free_if_invalid(st_data_t, st_data_t, st_data_t); +static int st_object_record_update(st_data_t, st_data_t, st_data_t); static int st_object_records_iterate(st_data_t, st_data_t, st_data_t); static int st_object_records_debug(st_data_t key, st_data_t value, st_data_t extra); static int update_object_record_entry(st_data_t*, st_data_t*, st_data_t, int); @@ -257,7 +257,7 @@ void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) { rb_raise(rb_eRuntimeError, "New heap recorder iteration prepared without the previous one having been finished."); } - st_foreach(heap_recorder->object_records, st_object_record_entry_free_if_invalid, (st_data_t) heap_recorder); + st_foreach(heap_recorder->object_records, st_object_record_update, (st_data_t) heap_recorder); heap_recorder->object_records_snapshot = st_copy(heap_recorder->object_records); if (heap_recorder->object_records_snapshot == NULL) { @@ -358,13 +358,15 @@ static int st_object_record_entry_free(DDTRACE_UNUSED st_data_t key, st_data_t v return ST_DELETE; } -static int st_object_record_entry_free_if_invalid(st_data_t key, st_data_t value, st_data_t extra_arg) { +static int st_object_record_update(st_data_t key, st_data_t value, st_data_t extra_arg) { long obj_id = (long) key; object_record *record = (object_record*) value; heap_recorder *recorder = (heap_recorder*) extra_arg; - if (!ruby_ref_from_id(LONG2NUM(obj_id), NULL)) { - // Id no longer associated with a valid ref. Need to clean things up! + VALUE ref; + + if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) { + // Id no longer associated with a valid ref. Need to delete this object record! // Starting with the associated heap record. There will now be one less tracked object pointing to it heap_record *heap_record = record->heap_record; @@ -377,6 +379,9 @@ static int st_object_record_entry_free_if_invalid(st_data_t key, st_data_t value return ST_DELETE; } + // If we got this far, entry is still valid so lets update its size + record->object_data.size = ruby_obj_memsize_of(ref); + return ST_CONTINUE; } diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.h b/ext/ddtrace_profiling_native_extension/heap_recorder.h index 768b24522fd..787c668e2dd 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.h +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.h @@ -27,6 +27,9 @@ typedef struct live_object_data { // could be seen as being representative of 50 objects. unsigned int weight; + // Size of this object on last flush/update. + size_t size; + // The class of the object that we're tracking. // NOTE: This is optional and will be set to NULL if not set. char* class; diff --git a/ext/ddtrace_profiling_native_extension/ruby_helpers.c b/ext/ddtrace_profiling_native_extension/ruby_helpers.c index 99dc6097d62..883297472c1 100644 --- a/ext/ddtrace_profiling_native_extension/ruby_helpers.c +++ b/ext/ddtrace_profiling_native_extension/ruby_helpers.c @@ -166,3 +166,39 @@ bool ruby_ref_from_id(VALUE obj_id, VALUE *value) { return true; } + +// Not part of public headers but is externed from Ruby +size_t rb_obj_memsize_of(VALUE obj); + +// Wrapper around rb_obj_memsize_of to avoid hitting crashing paths. +size_t ruby_obj_memsize_of(VALUE obj) { + switch (BUILTIN_TYPE(obj)) { + case T_OBJECT: + case T_MODULE: + case T_CLASS: + case T_ICLASS: + case T_STRING: + case T_ARRAY: + case T_HASH: + case T_REGEXP: + case T_DATA: + case T_MATCH: + case T_FILE: + case T_RATIONAL: + case T_COMPLEX: + case T_IMEMO: + case T_FLOAT: + case T_SYMBOL: + case T_BIGNUM: + // case T_NODE: -> Throws exception in rb_obj_memsize_of + case T_STRUCT: + case T_ZOMBIE: + #ifndef NO_T_MOVED + case T_MOVED: + #endif + return rb_obj_memsize_of(obj); + default: + // Unsupported, return 0 instead of erroring like rb_obj_memsize_of likes doing + return 0; + } +} diff --git a/ext/ddtrace_profiling_native_extension/ruby_helpers.h b/ext/ddtrace_profiling_native_extension/ruby_helpers.h index d0bf3cfcb0e..1ca01652b4a 100644 --- a/ext/ddtrace_profiling_native_extension/ruby_helpers.h +++ b/ext/ddtrace_profiling_native_extension/ruby_helpers.h @@ -106,3 +106,8 @@ char* ruby_strndup(const char *str, size_t size); // writes the ref to the value pointer parameter if !NULL. False if id doesn't // reference a valid object (in which case value is not changed). bool ruby_ref_from_id(size_t id, VALUE *value); + +// Native wrapper to get the approximate/estimated current size of the passed +// object. +size_t ruby_obj_memsize_of(VALUE obj); + diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index c77b93ebce9..5d69fb172ae 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -153,17 +153,19 @@ static VALUE error_symbol = Qnil; // :error in Ruby #define ALLOC_SAMPLES_VALUE_ID 3 #define HEAP_SAMPLES_VALUE {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")} #define HEAP_SAMPLES_VALUE_ID 4 +#define HEAP_SIZE_VALUE {.type_ = VALUE_STRING("heap-live-size"), .unit = VALUE_STRING("bytes")} +#define HEAP_SIZE_VALUE_ID 5 #define TIMELINE_VALUE {.type_ = VALUE_STRING("timeline"), .unit = VALUE_STRING("nanoseconds")} -#define TIMELINE_VALUE_ID 5 +#define TIMELINE_VALUE_ID 6 static const ddog_prof_ValueType all_value_types[] = - {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, TIMELINE_VALUE}; + {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, HEAP_SIZE_VALUE, TIMELINE_VALUE}; // This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it // occupies on the all_value_types array. // E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types. static const uint8_t all_value_types_positions[] = - {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, TIMELINE_VALUE_ID}; + {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, HEAP_SIZE_VALUE_ID, TIMELINE_VALUE_ID}; #define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType)) @@ -379,7 +381,7 @@ static VALUE _native_initialize( uint8_t requested_values_count = ALL_VALUE_TYPES_COUNT - (cpu_time_enabled == Qtrue ? 0 : 1) - (alloc_samples_enabled == Qtrue? 0 : 1) - - (heap_samples_enabled == Qtrue ? 0 : 1) - + (heap_samples_enabled == Qtrue ? 0 : 2) - (timeline_enabled == Qtrue ? 0 : 1); if (requested_values_count == ALL_VALUE_TYPES_COUNT) return Qtrue; // Nothing to do, this is the default @@ -419,8 +421,11 @@ static VALUE _native_initialize( if (heap_samples_enabled == Qtrue) { enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SAMPLES_VALUE; state->position_for[HEAP_SAMPLES_VALUE_ID] = next_enabled_pos++; + enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SIZE_VALUE; + state->position_for[HEAP_SIZE_VALUE_ID] = next_enabled_pos++; } else { state->position_for[HEAP_SAMPLES_VALUE_ID] = next_disabled_pos++; + state->position_for[HEAP_SIZE_VALUE_ID] = next_disabled_pos++; // Turns out heap sampling is disabled but we initialized everything in _native_new // assuming all samples were enabled. We need to deinitialize the heap recorder. @@ -593,6 +598,7 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio uint8_t *position_for = context->state->position_for; metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = object_data->weight; + metric_values[position_for[HEAP_SIZE_VALUE_ID]] = object_data->size * object_data->weight; ddog_prof_Label labels[2]; size_t label_offset = 0; diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index b4e18d9fa98..3e303a97d5b 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -594,6 +594,8 @@ .find(&test_struct_heap_sample) expect(relevant_sample.values[:'heap-live-samples']).to eq test_num_allocated_object + # 40 is the size of a basic object and we have test_num_allocated_object of them + expect(relevant_sample.values[:'heap-live-size']).to eq test_num_allocated_object * 40 end end diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 8458d176f0b..ac546da83de 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -132,12 +132,13 @@ def slot_two_mutex_locked? 'wall-time' => 'nanoseconds', 'alloc-samples' => 'count', 'heap-live-samples' => 'count', + 'heap-live-size' => 'bytes', 'timeline' => 'nanoseconds', } end - def profile_types_without(type) - all_profile_types.dup.tap { |it| it.delete(type) { raise 'Missing key' } } + def profile_types_without(*types) + all_profile_types.dup.tap { |it| it.delete_if { |k| types.include?(k) }} end context 'when all profile types are enabled' do @@ -166,7 +167,7 @@ def profile_types_without(type) let(:heap_samples_enabled) { false } it 'returns a pprof without the heap-live-samples type' do - expect(sample_types_from(decoded_profile)).to eq(profile_types_without('heap-live-samples')) + expect(sample_types_from(decoded_profile)).to eq(profile_types_without('heap-live-samples', 'heap-live-size')) end end @@ -348,8 +349,8 @@ def sample_types_from(decoded_profile) let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a } let(:a_string) { 'a beautiful string' } - let(:an_array) { (1..10).to_a } - let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true } } + let(:an_array) { (1..100).to_a } + let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true, 'd' => Object.new } } let(:samples) { samples_from_pprof(encoded_pprof) } @@ -404,6 +405,7 @@ def sample_allocation(obj) # We sample from 2 distinct locations expect(samples.size).to eq(2) expect(samples.select { |s| s.values.key?('heap-live-samples') }).to be_empty + expect(samples.select { |s| s.values.key?('heap-live-size') }).to be_empty end end @@ -426,6 +428,12 @@ def sample_allocation(obj) expect(string_sample.values).to match( hash_including( :"heap-live-samples" => sample_rate, + :"heap-live-size" => be_between( + # Basic object + 18 UTF-8 characters at minimum. + (40 + 18 * 2) * sample_rate, + # Add some extra padding for extra data (length for instance?). + (40 + 18 * 2 + 10) * sample_rate + ), ) ) expect(string_sample.labels).to match( @@ -439,6 +447,13 @@ def sample_allocation(obj) expect(array_sample.values).to match( hash_including( :"heap-live-samples" => sample_rate, + # Basic object + 100 FIXNUMs (32 or 64 bits) + :"heap-live-size" => be_between( + # Basic object + 100 FIXNUMs (32 bits) + (40 + 100 * 4) * sample_rate, + # Basic object + 128 FIXNUMs (64 bits and round to nearest power of 2) and eventual extra data + (40 + 128 * 8 + 10) * sample_rate, + ) ) ) expect(array_sample.labels).to match( @@ -452,6 +467,12 @@ def sample_allocation(obj) expect(hash_sample.values).to match( hash_including( :"heap-live-samples" => sample_rate, + :"heap-live-size" => be_between( + # Basic object + 4 table entries + no bins + (40 + 4 * 16) * sample_rate, + # Add extra padding to hash itself as well as each entry and 8 bins + (80 + 4 * 32 + 8 * 4) * sample_rate, + ) ) ) expect(hash_sample.labels).to match( @@ -500,7 +521,7 @@ def sample_allocation(obj) # We use the same metric_values in all sample calls in before. So we'd expect # the summed values to match `@num_allocations * metric_values[profile-type]` # for each profile-type there in. - expected_summed_values = { :'heap-live-samples' => 0 } + expected_summed_values = { :'heap-live-samples' => 0, :'heap-live-size' => 0, } metric_values.each_pair do |k, v| expected_summed_values[k.to_sym] = v * @num_allocations end