Skip to content

Commit

Permalink
[PROF-8667] Heap Profiling - Part 5 - Size
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexJF committed Dec 21, 2023
1 parent 581b912 commit e73b8d7
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 15 deletions.
15 changes: 10 additions & 5 deletions ext/ddtrace_profiling_native_extension/heap_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ static heap_record* get_or_create_heap_record(heap_recorder*, ddog_prof_Slice_Lo
static void cleanup_heap_record_if_unused(heap_recorder*, heap_record*);
static int st_heap_record_entry_free(st_data_t, st_data_t, st_data_t);
static int st_object_record_entry_free(st_data_t, st_data_t, st_data_t);
static int st_object_record_entry_free_if_invalid(st_data_t, st_data_t, st_data_t);
static int st_object_record_update(st_data_t, st_data_t, st_data_t);
static int st_object_records_iterate(st_data_t, st_data_t, st_data_t);
static int st_object_records_debug(st_data_t key, st_data_t value, st_data_t extra);
static int update_object_record_entry(st_data_t*, st_data_t*, st_data_t, int);
Expand Down Expand Up @@ -257,7 +257,7 @@ void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) {
rb_raise(rb_eRuntimeError, "New heap recorder iteration prepared without the previous one having been finished.");
}

st_foreach(heap_recorder->object_records, st_object_record_entry_free_if_invalid, (st_data_t) heap_recorder);
st_foreach(heap_recorder->object_records, st_object_record_update, (st_data_t) heap_recorder);

heap_recorder->object_records_snapshot = st_copy(heap_recorder->object_records);
if (heap_recorder->object_records_snapshot == NULL) {
Expand Down Expand Up @@ -358,13 +358,15 @@ static int st_object_record_entry_free(DDTRACE_UNUSED st_data_t key, st_data_t v
return ST_DELETE;
}

static int st_object_record_entry_free_if_invalid(st_data_t key, st_data_t value, st_data_t extra_arg) {
static int st_object_record_update(st_data_t key, st_data_t value, st_data_t extra_arg) {
long obj_id = (long) key;
object_record *record = (object_record*) value;
heap_recorder *recorder = (heap_recorder*) extra_arg;

if (!ruby_ref_from_id(LONG2NUM(obj_id), NULL)) {
// Id no longer associated with a valid ref. Need to clean things up!
VALUE ref;

if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) {
// Id no longer associated with a valid ref. Need to delete this object record!

// Starting with the associated heap record. There will now be one less tracked object pointing to it
heap_record *heap_record = record->heap_record;
Expand All @@ -377,6 +379,9 @@ static int st_object_record_entry_free_if_invalid(st_data_t key, st_data_t value
return ST_DELETE;
}

// If we got this far, entry is still valid so lets update its size
record->object_data.size = ruby_obj_memsize_of(ref);

return ST_CONTINUE;
}

Expand Down
3 changes: 3 additions & 0 deletions ext/ddtrace_profiling_native_extension/heap_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ typedef struct live_object_data {
// could be seen as being representative of 50 objects.
unsigned int weight;

// Size of this object on last flush/update.
size_t size;

// The class of the object that we're tracking.
// NOTE: This is optional and will be set to NULL if not set.
char* class;
Expand Down
36 changes: 36 additions & 0 deletions ext/ddtrace_profiling_native_extension/ruby_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,39 @@ bool ruby_ref_from_id(VALUE obj_id, VALUE *value) {

return true;
}

// Not part of public headers but is externed from Ruby
size_t rb_obj_memsize_of(VALUE obj);

// Wrapper around rb_obj_memsize_of to avoid hitting crashing paths.
size_t ruby_obj_memsize_of(VALUE obj) {
switch (BUILTIN_TYPE(obj)) {
case T_OBJECT:
case T_MODULE:
case T_CLASS:
case T_ICLASS:
case T_STRING:
case T_ARRAY:
case T_HASH:
case T_REGEXP:
case T_DATA:
case T_MATCH:
case T_FILE:
case T_RATIONAL:
case T_COMPLEX:
case T_IMEMO:
case T_FLOAT:
case T_SYMBOL:
case T_BIGNUM:
// case T_NODE: -> Throws exception in rb_obj_memsize_of
case T_STRUCT:
case T_ZOMBIE:
#ifndef NO_T_MOVED
case T_MOVED:
#endif
return rb_obj_memsize_of(obj);
default:
// Unsupported, return 0 instead of erroring like rb_obj_memsize_of likes doing
return 0;
}
}
5 changes: 5 additions & 0 deletions ext/ddtrace_profiling_native_extension/ruby_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,8 @@ char* ruby_strndup(const char *str, size_t size);
// writes the ref to the value pointer parameter if !NULL. False if id doesn't
// reference a valid object (in which case value is not changed).
bool ruby_ref_from_id(size_t id, VALUE *value);

// Native wrapper to get the approximate/estimated current size of the passed
// object.
size_t ruby_obj_memsize_of(VALUE obj);

14 changes: 10 additions & 4 deletions ext/ddtrace_profiling_native_extension/stack_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,17 +153,19 @@ static VALUE error_symbol = Qnil; // :error in Ruby
#define ALLOC_SAMPLES_VALUE_ID 3
#define HEAP_SAMPLES_VALUE {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")}
#define HEAP_SAMPLES_VALUE_ID 4
#define HEAP_SIZE_VALUE {.type_ = VALUE_STRING("heap-live-size"), .unit = VALUE_STRING("bytes")}
#define HEAP_SIZE_VALUE_ID 5
#define TIMELINE_VALUE {.type_ = VALUE_STRING("timeline"), .unit = VALUE_STRING("nanoseconds")}
#define TIMELINE_VALUE_ID 5
#define TIMELINE_VALUE_ID 6

static const ddog_prof_ValueType all_value_types[] =
{CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, TIMELINE_VALUE};
{CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, HEAP_SIZE_VALUE, TIMELINE_VALUE};

// This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it
// occupies on the all_value_types array.
// E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types.
static const uint8_t all_value_types_positions[] =
{CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, TIMELINE_VALUE_ID};
{CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, HEAP_SIZE_VALUE_ID, TIMELINE_VALUE_ID};

#define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType))

Expand Down Expand Up @@ -379,7 +381,7 @@ static VALUE _native_initialize(
uint8_t requested_values_count = ALL_VALUE_TYPES_COUNT -
(cpu_time_enabled == Qtrue ? 0 : 1) -
(alloc_samples_enabled == Qtrue? 0 : 1) -
(heap_samples_enabled == Qtrue ? 0 : 1) -
(heap_samples_enabled == Qtrue ? 0 : 2) -
(timeline_enabled == Qtrue ? 0 : 1);

if (requested_values_count == ALL_VALUE_TYPES_COUNT) return Qtrue; // Nothing to do, this is the default
Expand Down Expand Up @@ -419,8 +421,11 @@ static VALUE _native_initialize(
if (heap_samples_enabled == Qtrue) {
enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SAMPLES_VALUE;
state->position_for[HEAP_SAMPLES_VALUE_ID] = next_enabled_pos++;
enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SIZE_VALUE;
state->position_for[HEAP_SIZE_VALUE_ID] = next_enabled_pos++;
} else {
state->position_for[HEAP_SAMPLES_VALUE_ID] = next_disabled_pos++;
state->position_for[HEAP_SIZE_VALUE_ID] = next_disabled_pos++;

// Turns out heap sampling is disabled but we initialized everything in _native_new
// assuming all samples were enabled. We need to deinitialize the heap recorder.
Expand Down Expand Up @@ -593,6 +598,7 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio
uint8_t *position_for = context->state->position_for;

metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = object_data->weight;
metric_values[position_for[HEAP_SIZE_VALUE_ID]] = object_data->size * object_data->weight;

ddog_prof_Label labels[2];
size_t label_offset = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,8 @@
.find(&test_struct_heap_sample)

expect(relevant_sample.values[:'heap-live-samples']).to eq test_num_allocated_object
# 40 is the size of a basic object and we have test_num_allocated_object of them
expect(relevant_sample.values[:'heap-live-size']).to eq test_num_allocated_object * 40
end
end

Expand Down
33 changes: 27 additions & 6 deletions spec/datadog/profiling/stack_recorder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,13 @@ def slot_two_mutex_locked?
'wall-time' => 'nanoseconds',
'alloc-samples' => 'count',
'heap-live-samples' => 'count',
'heap-live-size' => 'bytes',
'timeline' => 'nanoseconds',
}
end

def profile_types_without(type)
all_profile_types.dup.tap { |it| it.delete(type) { raise 'Missing key' } }
def profile_types_without(*types)
all_profile_types.dup.tap { |it| it.delete_if { |k| types.include?(k) }}
end

context 'when all profile types are enabled' do
Expand Down Expand Up @@ -166,7 +167,7 @@ def profile_types_without(type)
let(:heap_samples_enabled) { false }

it 'returns a pprof without the heap-live-samples type' do
expect(sample_types_from(decoded_profile)).to eq(profile_types_without('heap-live-samples'))
expect(sample_types_from(decoded_profile)).to eq(profile_types_without('heap-live-samples', 'heap-live-size'))
end
end

Expand Down Expand Up @@ -348,8 +349,8 @@ def sample_types_from(decoded_profile)
let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a }

let(:a_string) { 'a beautiful string' }
let(:an_array) { (1..10).to_a }
let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true } }
let(:an_array) { (1..100).to_a }
let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true, 'd' => Object.new } }

let(:samples) { samples_from_pprof(encoded_pprof) }

Expand Down Expand Up @@ -404,6 +405,7 @@ def sample_allocation(obj)
# We sample from 2 distinct locations
expect(samples.size).to eq(2)
expect(samples.select { |s| s.values.key?('heap-live-samples') }).to be_empty
expect(samples.select { |s| s.values.key?('heap-live-size') }).to be_empty
end
end

Expand All @@ -426,6 +428,12 @@ def sample_allocation(obj)
expect(string_sample.values).to match(
hash_including(
:"heap-live-samples" => sample_rate,
:"heap-live-size" => be_between(
# Basic object + 18 UTF-8 characters at minimum.
(40 + 18 * 2) * sample_rate,
# Add some extra padding for extra data (length for instance?).
(40 + 18 * 2 + 10) * sample_rate
),
)
)
expect(string_sample.labels).to match(
Expand All @@ -439,6 +447,13 @@ def sample_allocation(obj)
expect(array_sample.values).to match(
hash_including(
:"heap-live-samples" => sample_rate,
# Basic object + 100 FIXNUMs (32 or 64 bits)
:"heap-live-size" => be_between(
# Basic object + 100 FIXNUMs (32 bits)
(40 + 100 * 4) * sample_rate,
# Basic object + 128 FIXNUMs (64 bits and round to nearest power of 2) and eventual extra data
(40 + 128 * 8 + 10) * sample_rate,
)
)
)
expect(array_sample.labels).to match(
Expand All @@ -452,6 +467,12 @@ def sample_allocation(obj)
expect(hash_sample.values).to match(
hash_including(
:"heap-live-samples" => sample_rate,
:"heap-live-size" => be_between(
# Basic object + 4 table entries + no bins
(40 + 4 * 16) * sample_rate,
# Add extra padding to hash itself as well as each entry and 8 bins
(80 + 4 * 32 + 8 * 4) * sample_rate,
)
)
)
expect(hash_sample.labels).to match(
Expand Down Expand Up @@ -500,7 +521,7 @@ def sample_allocation(obj)
# We use the same metric_values in all sample calls in before. So we'd expect
# the summed values to match `@num_allocations * metric_values[profile-type]`
# for each profile-type there in.
expected_summed_values = { :'heap-live-samples' => 0 }
expected_summed_values = { :'heap-live-samples' => 0, :'heap-live-size' => 0, }
metric_values.each_pair do |k, v|
expected_summed_values[k.to_sym] = v * @num_allocations
end
Expand Down

0 comments on commit e73b8d7

Please sign in to comment.