Skip to content

Commit

Permalink
[PROF-8667] Heap Profiling - Part 1 - Setup (#3281)
Browse files Browse the repository at this point in the history
This commit paves the way for the introduction of heap profiling functionality by:
* Introduction of a setting for controlling heap profiling (`DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED`, false by default).
* Refactoring of settings related to allocation profiling (`DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED` and `DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE`) and improving the warnings on broken rubies.
  * As a result of this refactoring, allocation counting is now tied with allocation profiling and can no longer be enabled stand-alone.
* Introduction of a heap recorder component (with noop implementation for now) in the native profiling extension and plugging it in on top of the existing allocation profiling functionality.
* Interaction with the heap recorder component to collect new `heap-live-samples` data at profile serialization time.
* The necessary tests to have coverage for this functionality (some marked as pending until the proper implementation is added)

Future commits will gradually build on the heap recorder implementation to actually enable heap data collection.
  • Loading branch information
AlexJF authored Dec 12, 2023
1 parent 0fa4a67 commit dfbc324
Show file tree
Hide file tree
Showing 22 changed files with 1,063 additions and 173 deletions.
6 changes: 5 additions & 1 deletion benchmarks/profiler_sample_loop_v2.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@ class ProfilerSampleLoopBenchmark
PROFILER_OVERHEAD_STACK_THREAD = Thread.new { sleep }

def create_profiler
@recorder = Datadog::Profiling::StackRecorder.new(cpu_time_enabled: true, alloc_samples_enabled: true)
@recorder = Datadog::Profiling::StackRecorder.new(
cpu_time_enabled: true,
alloc_samples_enabled: false,
heap_samples_enabled: false
)
@collector = Datadog::Profiling::Collectors::ThreadContext.new(
recorder: @recorder, max_frames: 400, tracer: nil, endpoint_collection_enabled: false, timeline_enabled: false
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,10 @@ struct cpu_and_wall_time_worker_state {
// These are immutable after initialization

bool gc_profiling_enabled;
bool allocation_counting_enabled;
bool no_signals_workaround_enabled;
bool dynamic_sampling_rate_enabled;
int allocation_sample_every; // Temporarily used for development/testing of allocation profiling
int allocation_sample_every;
bool allocation_profiling_enabled;
VALUE self_instance;
VALUE thread_context_collector_instance;
VALUE idle_sampling_helper_instance;
Expand Down Expand Up @@ -149,11 +149,11 @@ static VALUE _native_initialize(
VALUE thread_context_collector_instance,
VALUE gc_profiling_enabled,
VALUE idle_sampling_helper_instance,
VALUE allocation_counting_enabled,
VALUE no_signals_workaround_enabled,
VALUE dynamic_sampling_rate_enabled,
VALUE dynamic_sampling_rate_overhead_target_percentage,
VALUE allocation_sample_every
VALUE allocation_sample_every,
VALUE allocation_profiling_enabled
);
static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
static VALUE _native_sampling_loop(VALUE self, VALUE instance);
Expand Down Expand Up @@ -265,10 +265,10 @@ static VALUE _native_new(VALUE klass) {
// being leaked.

state->gc_profiling_enabled = false;
state->allocation_counting_enabled = false;
state->no_signals_workaround_enabled = false;
state->dynamic_sampling_rate_enabled = true;
state->allocation_sample_every = 0;
state->allocation_profiling_enabled = false;
state->thread_context_collector_instance = Qnil;
state->idle_sampling_helper_instance = Qnil;
state->owner_thread = Qnil;
Expand All @@ -293,31 +293,31 @@ static VALUE _native_initialize(
VALUE thread_context_collector_instance,
VALUE gc_profiling_enabled,
VALUE idle_sampling_helper_instance,
VALUE allocation_counting_enabled,
VALUE no_signals_workaround_enabled,
VALUE dynamic_sampling_rate_enabled,
VALUE dynamic_sampling_rate_overhead_target_percentage,
VALUE allocation_sample_every
VALUE allocation_sample_every,
VALUE allocation_profiling_enabled
) {
ENFORCE_BOOLEAN(gc_profiling_enabled);
ENFORCE_BOOLEAN(allocation_counting_enabled);
ENFORCE_BOOLEAN(no_signals_workaround_enabled);
ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
ENFORCE_TYPE(allocation_sample_every, T_FIXNUM);
ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
ENFORCE_BOOLEAN(allocation_profiling_enabled);

struct cpu_and_wall_time_worker_state *state;
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);

state->gc_profiling_enabled = (gc_profiling_enabled == Qtrue);
state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
dynamic_sampling_rate_set_overhead_target_percentage(&state->dynamic_sampling_rate, NUM2DBL(dynamic_sampling_rate_overhead_target_percentage));
state->allocation_sample_every = NUM2INT(allocation_sample_every);
state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);

if (state->allocation_sample_every < 0) {
rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be >= 0.", state->allocation_sample_every);
if (state->allocation_sample_every <= 0) {
rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be > 0.", state->allocation_sample_every);
}

state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
Expand Down Expand Up @@ -636,7 +636,7 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
// because they may raise exceptions.
install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal");
if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
if (state->allocation_counting_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);

rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state);

Expand Down Expand Up @@ -892,9 +892,9 @@ static void sleep_for(uint64_t time_ns) {
}

static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
bool is_profiler_running = active_sampler_instance_state != NULL;
bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled;

return is_profiler_running ? ULL2NUM(allocation_count) : Qnil;
return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil;
}

// Implements memory-related profiling events. This function is called by Ruby via the `object_allocation_tracepoint`
Expand Down Expand Up @@ -928,7 +928,7 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)

// TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before
// beta), and having something here allows us to test the rest of feature, sampling decision aside.
if (state->allocation_sample_every > 0 && ((allocation_count % state->allocation_sample_every) == 0)) {
if (allocation_count % state->allocation_sample_every == 0) {
// Rescue against any exceptions that happen during sampling
safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1210,6 +1210,8 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
}
}

track_object(state->recorder_instance, new_object, sample_weight);

trigger_sample_for_thread(
state,
/* thread: */ current_thread,
Expand Down
5 changes: 5 additions & 0 deletions ext/ddtrace_profiling_native_extension/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ def add_compiler_flag(flag)
add_compiler_flag '-Wall'
add_compiler_flag '-Wextra'

if ENV['DDTRACE_DEBUG']
CONFIG['optflags'] = '-O0'
CONFIG['debugflags'] = '-ggdb3'
end

if RUBY_PLATFORM.include?('linux')
# Supposedly, the correct way to do this is
# ```
Expand Down
117 changes: 117 additions & 0 deletions ext/ddtrace_profiling_native_extension/heap_recorder.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#include "heap_recorder.h"
#include <pthread.h>
#include "ruby/st.h"
#include "ruby/util.h"
#include "ruby_helpers.h"
#include <errno.h>

// Allows storing data passed to ::start_heap_allocation_recording to make it accessible to
// ::end_heap_allocation_recording.
//
// obj != Qnil flags this struct as holding a valid partial heap recording.
typedef struct {
VALUE obj;
live_object_data object_data;
} partial_heap_recording;

struct heap_recorder {
// Data for a heap recording that was started but not yet ended
partial_heap_recording active_recording;
};

// ==========================
// Heap Recorder External API
//
// WARN: All these APIs should support receiving a NULL heap_recorder, resulting in a noop.
//
// WARN: Except for ::heap_recorder_for_each_live_object, we always assume interaction with these APIs
// happens under the GVL.
//
// ==========================
heap_recorder* heap_recorder_new(void) {
heap_recorder* recorder = ruby_xmalloc(sizeof(heap_recorder));

recorder->active_recording = (partial_heap_recording) {
.obj = Qnil,
.object_data = {0},
};

return recorder;
}

void heap_recorder_free(struct heap_recorder* recorder) {
if (recorder == NULL) {
return;
}

ruby_xfree(recorder);
}

// TODO: Remove when things get implemented
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"

void heap_recorder_after_fork(heap_recorder *heap_recorder) {
if (heap_recorder == NULL) {
return;
}

// TODO: Implement
}

void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight) {
if (heap_recorder == NULL) {
return;
}

heap_recorder->active_recording = (partial_heap_recording) {
.obj = new_obj,
.object_data = (live_object_data) {
.weight = weight,
},
};
}

void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
if (heap_recorder == NULL) {
return;
}

partial_heap_recording *active_recording = &heap_recorder->active_recording;

VALUE new_obj = active_recording->obj;
if (new_obj == Qnil) {
// Recording ended without having been started?
rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started");
}

// From now on, mark active recording as invalid so we can short-circuit at any point and
// not end up with a still active recording. new_obj still holds the object for this recording
active_recording->obj = Qnil;

// TODO: Implement
}

void heap_recorder_flush(heap_recorder *heap_recorder) {
if (heap_recorder == NULL) {
return;
}

// TODO: Implement
}

// WARN: If with_gvl = False, NO HEAP ALLOCATIONS, EXCEPTIONS or RUBY CALLS ARE ALLOWED.
void heap_recorder_for_each_live_object(
heap_recorder *heap_recorder,
bool (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg),
void *for_each_callback_extra_arg,
bool with_gvl) {
if (heap_recorder == NULL) {
return;
}

// TODO: Implement
}

// TODO: Remove when things get implemented
#pragma GCC diagnostic pop
91 changes: 91 additions & 0 deletions ext/ddtrace_profiling_native_extension/heap_recorder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#pragma once

#include <datadog/profiling.h>
#include <ruby.h>

// A heap recorder keeps track of a collection of live heap objects.
//
// All allocations observed by this recorder for which a corresponding free was
// not yet observed are deemed as alive and can be iterated on to produce a
// live heap profile.
//
// NOTE: All public APIs of heap_recorder support receiving a NULL heap_recorder
// in which case the behaviour will be a noop.
//
// WARN: Unless otherwise stated the heap recorder APIs assume calls are done
// under the GVL.
typedef struct heap_recorder heap_recorder;

// Extra data associated with each live object being tracked.
typedef struct live_object_data {
// The weight of this object from a sampling perspective.
//
// A notion of weight is preserved for each tracked object to allow for an approximate
// extrapolation to an unsampled view.
//
// Example: If we were sampling every 50 objects, then each sampled object
// could be seen as being representative of 50 objects.
unsigned int weight;
} live_object_data;

// Data that is made available to iterators of heap recorder data for each live object
// tracked therein.
typedef struct {
ddog_prof_Slice_Location locations;
live_object_data object_data;
} heap_recorder_iteration_data;

// Initialize a new heap recorder.
heap_recorder* heap_recorder_new(void);

// Free a previously initialized heap recorder.
void heap_recorder_free(heap_recorder *heap_recorder);

// Do any cleanup needed after forking.
void heap_recorder_after_fork(heap_recorder *heap_recorder);

// Start a heap allocation recording on the heap recorder for a new object.
//
// This heap allocation recording needs to be ended via ::end_heap_allocation_recording
// before it will become fully committed and able to be iterated on.
//
// @param new_obj
// The newly allocated Ruby object/value.
// @param weight
// The sampling weight of this object.
//
// WARN: It needs to be paired with a ::end_heap_allocation_recording call.
void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight);

// End a previously started heap allocation recording on the heap recorder.
//
// It is at this point that an allocated object will become fully tracked and able to be iterated on.
//
// @param locations The stacktrace representing the location of the allocation.
//
// WARN: It is illegal to call this without previously having called ::start_heap_allocation_recording.
void end_heap_allocation_recording(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations);

// Flush any intermediate state that might be queued inside the heap recorder.
//
// NOTE: This should usually be called before iteration to ensure data is as little stale as possible.
void heap_recorder_flush(heap_recorder *heap_recorder);

// Iterate over each live object being tracked by the heap recorder.
//
// @param for_each_callback
// A callback function that shall be called for each live object being tracked
// by the heap recorder. Alongside the iteration_data for each live object,
// a second argument will be forwarded with the contents of the optional
// for_each_callback_extra_arg. Iteration will continue until the callback
// returns false or we run out of objects.
// @param for_each_callback_extra_arg
// Optional (NULL if empty) extra data that should be passed to the
// callback function alongside the data for each live tracked object.
// @param with_gvl
// True if we're calling this while holding the GVL, false otherwise.
void heap_recorder_for_each_live_object(
heap_recorder *heap_recorder,
bool (*for_each_callback)(heap_recorder_iteration_data data, void* extra_arg),
void *for_each_callback_extra_arg,
bool with_gvl);
20 changes: 20 additions & 0 deletions ext/ddtrace_profiling_native_extension/libdatadog_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,23 @@ ddog_CharSlice ruby_value_type_to_char_slice(enum ruby_value_type type) {
default: return DDOG_CHARSLICE_C("BUG: Unknown value for ruby_value_type");
}
}

size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capacity) {
if (capacity == 0 || string == NULL) {
// short-circuit, we can't write anything
ddog_Error_drop(error);
return 0;
}

ddog_CharSlice error_msg_slice = ddog_Error_message(error);
size_t error_msg_size = error_msg_slice.len;
// Account for extra null char for proper cstring
if (error_msg_size >= capacity) {
// Error message too big, lets truncate it to capacity - 1 to allow for extra null at end
error_msg_size = capacity - 1;
}
strncpy(string, error_msg_slice.ptr, error_msg_size);
string[error_msg_size] = '\0';
ddog_Error_drop(error);
return error_msg_size;
}
5 changes: 5 additions & 0 deletions ext/ddtrace_profiling_native_extension/libdatadog_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ inline static VALUE get_error_details_and_drop(ddog_Error *error) {
return result;
}

// Utility function to be able to extract an error cstring from a ddog_Error.
// Returns the amount of characters written to string (which are necessarily
// bounded by capacity - 1 since the string will be null-terminated).
size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capacity);

// Used for pretty printing this Ruby enum. Returns "T_UNKNOWN_OR_MISSING_RUBY_VALUE_TYPE_ENTRY" for unknown elements.
// In practice, there's a few types that the profiler will probably never encounter, but I've added all entries of
// ruby_value_type that Ruby uses so that we can also use this for debugging.
Expand Down
Loading

0 comments on commit dfbc324

Please sign in to comment.