From dfbc324528bb33c0ffd18cf6f7de1fbdacc88e7c Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Tue, 12 Dec 2023 12:27:58 +0000 Subject: [PATCH] [PROF-8667] Heap Profiling - Part 1 - Setup (#3281) This commit paves the way for the introduction of heap profiling functionality by: * Introduction of a setting for controlling heap profiling (`DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED`, false by default). * Refactoring of settings related to allocation profiling (`DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED` and `DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE`) and improving the warnings on broken rubies. * As a result of this refactoring, allocation counting is now tied with allocation profiling and can no longer be enabled stand-alone. * Introduction of a heap recorder component (with noop implementation for now) in the native profiling extension and plugging it in on top of the existing allocation profiling functionality. * Interaction with the heap recorder component to collect new `heap-live-samples` data at profile serialization time. * The necessary tests to have coverage for this functionality (some marked as pending until the proper implementation is added) Future commits will gradually build on the heap recorder implementation to actually enable heap data collection. --- benchmarks/profiler_sample_loop_v2.rb | 6 +- .../collectors_cpu_and_wall_time_worker.c | 30 ++-- .../collectors_thread_context.c | 2 + .../extconf.rb | 5 + .../heap_recorder.c | 117 ++++++++++++ .../heap_recorder.h | 91 ++++++++++ .../libdatadog_helpers.c | 20 +++ .../libdatadog_helpers.h | 5 + .../stack_recorder.c | 141 ++++++++++++++- .../stack_recorder.h | 2 + lib/datadog/core/configuration/settings.rb | 70 +++++--- .../collectors/cpu_and_wall_time_worker.rb | 15 +- lib/datadog/profiling/component.rb | 121 +++++++++++-- lib/datadog/profiling/stack_recorder.rb | 4 +- .../collectors/cpu_and_wall_time_worker.rbs | 6 +- sig/datadog/profiling/component.rbs | 15 ++ sig/datadog/profiling/stack_recorder.rbs | 3 +- .../core/configuration/settings_spec.rb | 119 ++++++++++--- .../cpu_and_wall_time_worker_spec.rb | 166 ++++++++++++------ spec/datadog/profiling/component_spec.rb | 160 ++++++++++++++--- spec/datadog/profiling/spec_helper.rb | 10 +- spec/datadog/profiling/stack_recorder_spec.rb | 128 +++++++++++++- 22 files changed, 1063 insertions(+), 173 deletions(-) create mode 100644 ext/ddtrace_profiling_native_extension/heap_recorder.c create mode 100644 ext/ddtrace_profiling_native_extension/heap_recorder.h diff --git a/benchmarks/profiler_sample_loop_v2.rb b/benchmarks/profiler_sample_loop_v2.rb index 0afc5d6cc70..4722a6c050f 100644 --- a/benchmarks/profiler_sample_loop_v2.rb +++ b/benchmarks/profiler_sample_loop_v2.rb @@ -16,7 +16,11 @@ class ProfilerSampleLoopBenchmark PROFILER_OVERHEAD_STACK_THREAD = Thread.new { sleep } def create_profiler - @recorder = Datadog::Profiling::StackRecorder.new(cpu_time_enabled: true, alloc_samples_enabled: true) + @recorder = Datadog::Profiling::StackRecorder.new( + cpu_time_enabled: true, + alloc_samples_enabled: false, + heap_samples_enabled: false + ) @collector = Datadog::Profiling::Collectors::ThreadContext.new( recorder: @recorder, max_frames: 400, tracer: nil, endpoint_collection_enabled: false, timeline_enabled: false ) diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index ea56ba2a593..af9b0127e18 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -80,10 +80,10 @@ struct cpu_and_wall_time_worker_state { // These are immutable after initialization bool gc_profiling_enabled; - bool allocation_counting_enabled; bool no_signals_workaround_enabled; bool dynamic_sampling_rate_enabled; - int allocation_sample_every; // Temporarily used for development/testing of allocation profiling + int allocation_sample_every; + bool allocation_profiling_enabled; VALUE self_instance; VALUE thread_context_collector_instance; VALUE idle_sampling_helper_instance; @@ -149,11 +149,11 @@ static VALUE _native_initialize( VALUE thread_context_collector_instance, VALUE gc_profiling_enabled, VALUE idle_sampling_helper_instance, - VALUE allocation_counting_enabled, VALUE no_signals_workaround_enabled, VALUE dynamic_sampling_rate_enabled, VALUE dynamic_sampling_rate_overhead_target_percentage, - VALUE allocation_sample_every + VALUE allocation_sample_every, + VALUE allocation_profiling_enabled ); static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr); static VALUE _native_sampling_loop(VALUE self, VALUE instance); @@ -265,10 +265,10 @@ static VALUE _native_new(VALUE klass) { // being leaked. state->gc_profiling_enabled = false; - state->allocation_counting_enabled = false; state->no_signals_workaround_enabled = false; state->dynamic_sampling_rate_enabled = true; state->allocation_sample_every = 0; + state->allocation_profiling_enabled = false; state->thread_context_collector_instance = Qnil; state->idle_sampling_helper_instance = Qnil; state->owner_thread = Qnil; @@ -293,31 +293,31 @@ static VALUE _native_initialize( VALUE thread_context_collector_instance, VALUE gc_profiling_enabled, VALUE idle_sampling_helper_instance, - VALUE allocation_counting_enabled, VALUE no_signals_workaround_enabled, VALUE dynamic_sampling_rate_enabled, VALUE dynamic_sampling_rate_overhead_target_percentage, - VALUE allocation_sample_every + VALUE allocation_sample_every, + VALUE allocation_profiling_enabled ) { ENFORCE_BOOLEAN(gc_profiling_enabled); - ENFORCE_BOOLEAN(allocation_counting_enabled); ENFORCE_BOOLEAN(no_signals_workaround_enabled); ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled); ENFORCE_TYPE(allocation_sample_every, T_FIXNUM); ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT); + ENFORCE_BOOLEAN(allocation_profiling_enabled); struct cpu_and_wall_time_worker_state *state; TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state); state->gc_profiling_enabled = (gc_profiling_enabled == Qtrue); - state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue); state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue); state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue); dynamic_sampling_rate_set_overhead_target_percentage(&state->dynamic_sampling_rate, NUM2DBL(dynamic_sampling_rate_overhead_target_percentage)); state->allocation_sample_every = NUM2INT(allocation_sample_every); + state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue); - if (state->allocation_sample_every < 0) { - rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be >= 0.", state->allocation_sample_every); + if (state->allocation_sample_every <= 0) { + rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be > 0.", state->allocation_sample_every); } state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance); @@ -636,7 +636,7 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) { // because they may raise exceptions. install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal"); if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint); - if (state->allocation_counting_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint); + if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint); rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state); @@ -892,9 +892,9 @@ static void sleep_for(uint64_t time_ns) { } static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) { - bool is_profiler_running = active_sampler_instance_state != NULL; + bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled; - return is_profiler_running ? ULL2NUM(allocation_count) : Qnil; + return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil; } // Implements memory-related profiling events. This function is called by Ruby via the `object_allocation_tracepoint` @@ -928,7 +928,7 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) // TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before // beta), and having something here allows us to test the rest of feature, sampling decision aside. - if (state->allocation_sample_every > 0 && ((allocation_count % state->allocation_sample_every) == 0)) { + if (allocation_count % state->allocation_sample_every == 0) { // Rescue against any exceptions that happen during sampling safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance); } diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 3b1c798260c..618cdb23f5d 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -1210,6 +1210,8 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in } } + track_object(state->recorder_instance, new_object, sample_weight); + trigger_sample_for_thread( state, /* thread: */ current_thread, diff --git a/ext/ddtrace_profiling_native_extension/extconf.rb b/ext/ddtrace_profiling_native_extension/extconf.rb index 2151f6ddb50..10ceafdb4f0 100644 --- a/ext/ddtrace_profiling_native_extension/extconf.rb +++ b/ext/ddtrace_profiling_native_extension/extconf.rb @@ -114,6 +114,11 @@ def add_compiler_flag(flag) add_compiler_flag '-Wall' add_compiler_flag '-Wextra' +if ENV['DDTRACE_DEBUG'] + CONFIG['optflags'] = '-O0' + CONFIG['debugflags'] = '-ggdb3' +end + if RUBY_PLATFORM.include?('linux') # Supposedly, the correct way to do this is # ``` diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.c b/ext/ddtrace_profiling_native_extension/heap_recorder.c new file mode 100644 index 00000000000..627567c3880 --- /dev/null +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.c @@ -0,0 +1,117 @@ +#include "heap_recorder.h" +#include +#include "ruby/st.h" +#include "ruby/util.h" +#include "ruby_helpers.h" +#include + +// Allows storing data passed to ::start_heap_allocation_recording to make it accessible to +// ::end_heap_allocation_recording. +// +// obj != Qnil flags this struct as holding a valid partial heap recording. +typedef struct { + VALUE obj; + live_object_data object_data; +} partial_heap_recording; + +struct heap_recorder { + // Data for a heap recording that was started but not yet ended + partial_heap_recording active_recording; +}; + +// ========================== +// Heap Recorder External API +// +// WARN: All these APIs should support receiving a NULL heap_recorder, resulting in a noop. +// +// WARN: Except for ::heap_recorder_for_each_live_object, we always assume interaction with these APIs +// happens under the GVL. +// +// ========================== +heap_recorder* heap_recorder_new(void) { + heap_recorder* recorder = ruby_xmalloc(sizeof(heap_recorder)); + + recorder->active_recording = (partial_heap_recording) { + .obj = Qnil, + .object_data = {0}, + }; + + return recorder; +} + +void heap_recorder_free(struct heap_recorder* recorder) { + if (recorder == NULL) { + return; + } + + ruby_xfree(recorder); +} + +// TODO: Remove when things get implemented +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" + +void heap_recorder_after_fork(heap_recorder *heap_recorder) { + if (heap_recorder == NULL) { + return; + } + + // TODO: Implement +} + +void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight) { + if (heap_recorder == NULL) { + return; + } + + heap_recorder->active_recording = (partial_heap_recording) { + .obj = new_obj, + .object_data = (live_object_data) { + .weight = weight, + }, + }; +} + +void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) { + if (heap_recorder == NULL) { + return; + } + + partial_heap_recording *active_recording = &heap_recorder->active_recording; + + VALUE new_obj = active_recording->obj; + if (new_obj == Qnil) { + // Recording ended without having been started? + rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started"); + } + + // From now on, mark active recording as invalid so we can short-circuit at any point and + // not end up with a still active recording. new_obj still holds the object for this recording + active_recording->obj = Qnil; + + // TODO: Implement +} + +void heap_recorder_flush(heap_recorder *heap_recorder) { + if (heap_recorder == NULL) { + return; + } + + // TODO: Implement +} + +// WARN: If with_gvl = False, NO HEAP ALLOCATIONS, EXCEPTIONS or RUBY CALLS ARE ALLOWED. +void heap_recorder_for_each_live_object( + heap_recorder *heap_recorder, + bool (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg), + void *for_each_callback_extra_arg, + bool with_gvl) { + if (heap_recorder == NULL) { + return; + } + + // TODO: Implement +} + +// TODO: Remove when things get implemented +#pragma GCC diagnostic pop diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.h b/ext/ddtrace_profiling_native_extension/heap_recorder.h new file mode 100644 index 00000000000..add47d46734 --- /dev/null +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.h @@ -0,0 +1,91 @@ +#pragma once + +#include +#include + +// A heap recorder keeps track of a collection of live heap objects. +// +// All allocations observed by this recorder for which a corresponding free was +// not yet observed are deemed as alive and can be iterated on to produce a +// live heap profile. +// +// NOTE: All public APIs of heap_recorder support receiving a NULL heap_recorder +// in which case the behaviour will be a noop. +// +// WARN: Unless otherwise stated the heap recorder APIs assume calls are done +// under the GVL. +typedef struct heap_recorder heap_recorder; + +// Extra data associated with each live object being tracked. +typedef struct live_object_data { + // The weight of this object from a sampling perspective. + // + // A notion of weight is preserved for each tracked object to allow for an approximate + // extrapolation to an unsampled view. + // + // Example: If we were sampling every 50 objects, then each sampled object + // could be seen as being representative of 50 objects. + unsigned int weight; +} live_object_data; + +// Data that is made available to iterators of heap recorder data for each live object +// tracked therein. +typedef struct { + ddog_prof_Slice_Location locations; + live_object_data object_data; +} heap_recorder_iteration_data; + +// Initialize a new heap recorder. +heap_recorder* heap_recorder_new(void); + +// Free a previously initialized heap recorder. +void heap_recorder_free(heap_recorder *heap_recorder); + +// Do any cleanup needed after forking. +void heap_recorder_after_fork(heap_recorder *heap_recorder); + +// Start a heap allocation recording on the heap recorder for a new object. +// +// This heap allocation recording needs to be ended via ::end_heap_allocation_recording +// before it will become fully committed and able to be iterated on. +// +// @param new_obj +// The newly allocated Ruby object/value. +// @param weight +// The sampling weight of this object. +// +// WARN: It needs to be paired with a ::end_heap_allocation_recording call. +void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight); + +// End a previously started heap allocation recording on the heap recorder. +// +// It is at this point that an allocated object will become fully tracked and able to be iterated on. +// +// @param locations The stacktrace representing the location of the allocation. +// +// WARN: It is illegal to call this without previously having called ::start_heap_allocation_recording. +void end_heap_allocation_recording(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations); + +// Flush any intermediate state that might be queued inside the heap recorder. +// +// NOTE: This should usually be called before iteration to ensure data is as little stale as possible. +void heap_recorder_flush(heap_recorder *heap_recorder); + +// Iterate over each live object being tracked by the heap recorder. +// +// @param for_each_callback +// A callback function that shall be called for each live object being tracked +// by the heap recorder. Alongside the iteration_data for each live object, +// a second argument will be forwarded with the contents of the optional +// for_each_callback_extra_arg. Iteration will continue until the callback +// returns false or we run out of objects. +// @param for_each_callback_extra_arg +// Optional (NULL if empty) extra data that should be passed to the +// callback function alongside the data for each live tracked object. +// @param with_gvl +// True if we're calling this while holding the GVL, false otherwise. +void heap_recorder_for_each_live_object( + heap_recorder *heap_recorder, + bool (*for_each_callback)(heap_recorder_iteration_data data, void* extra_arg), + void *for_each_callback_extra_arg, + bool with_gvl); diff --git a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c index 43bd02c4a07..f6f8b69b1b9 100644 --- a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c +++ b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c @@ -40,3 +40,23 @@ ddog_CharSlice ruby_value_type_to_char_slice(enum ruby_value_type type) { default: return DDOG_CHARSLICE_C("BUG: Unknown value for ruby_value_type"); } } + +size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capacity) { + if (capacity == 0 || string == NULL) { + // short-circuit, we can't write anything + ddog_Error_drop(error); + return 0; + } + + ddog_CharSlice error_msg_slice = ddog_Error_message(error); + size_t error_msg_size = error_msg_slice.len; + // Account for extra null char for proper cstring + if (error_msg_size >= capacity) { + // Error message too big, lets truncate it to capacity - 1 to allow for extra null at end + error_msg_size = capacity - 1; + } + strncpy(string, error_msg_slice.ptr, error_msg_size); + string[error_msg_size] = '\0'; + ddog_Error_drop(error); + return error_msg_size; +} diff --git a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h index edd11e31954..7d4593fa550 100644 --- a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +++ b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h @@ -24,6 +24,11 @@ inline static VALUE get_error_details_and_drop(ddog_Error *error) { return result; } +// Utility function to be able to extract an error cstring from a ddog_Error. +// Returns the amount of characters written to string (which are necessarily +// bounded by capacity - 1 since the string will be null-terminated). +size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capacity); + // Used for pretty printing this Ruby enum. Returns "T_UNKNOWN_OR_MISSING_RUBY_VALUE_TYPE_ENTRY" for unknown elements. // In practice, there's a few types that the profiler will probably never encounter, but I've added all entries of // ruby_value_type that Ruby uses so that we can also use this for debugging. diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index a192a8d61ca..4918f8caea8 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -7,6 +7,7 @@ #include "libdatadog_helpers.h" #include "ruby_helpers.h" #include "time_helpers.h" +#include "heap_recorder.h" // Used to wrap a ddog_prof_Profile in a Ruby object and expose Ruby-level serialization APIs // This file implements the native bits of the Datadog::Profiling::StackRecorder class @@ -150,18 +151,23 @@ static VALUE error_symbol = Qnil; // :error in Ruby #define WALL_TIME_VALUE_ID 2 #define ALLOC_SAMPLES_VALUE {.type_ = VALUE_STRING("alloc-samples"), .unit = VALUE_STRING("count")} #define ALLOC_SAMPLES_VALUE_ID 3 +#define HEAP_SAMPLES_VALUE {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")} +#define HEAP_SAMPLES_VALUE_ID 4 -static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE}; +static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE}; // This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it // occupies on the all_value_types array. // E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types. -static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID}; +static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID}; #define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType)) // Contains native state for each instance struct stack_recorder_state { + // Heap recorder instance + heap_recorder *heap_recorder; + pthread_mutex_t slot_one_mutex; ddog_prof_Profile slot_one_profile; @@ -197,11 +203,11 @@ static VALUE _native_new(VALUE klass); static void initialize_slot_concurrency_control(struct stack_recorder_state *state); static void initialize_profiles(struct stack_recorder_state *state, ddog_prof_Slice_ValueType sample_types); static void stack_recorder_typed_data_free(void *data); -static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled); +static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled, VALUE heap_samples_enabled); static VALUE _native_serialize(VALUE self, VALUE recorder_instance); static VALUE ruby_time_from(ddog_Timespec ddprof_time); static void *call_serialize_without_gvl(void *call_args); -static struct active_slot_pair sampler_lock_active_profile(); +static struct active_slot_pair sampler_lock_active_profile(struct stack_recorder_state *state); static void sampler_unlock_active_profile(struct active_slot_pair active_slot); static ddog_prof_Profile *serializer_flip_active_and_inactive_slots(struct stack_recorder_state *state); static VALUE _native_active_slot(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); @@ -213,6 +219,8 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_ static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time); static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint); static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */); +static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight); + void stack_recorder_init(VALUE profiling_module) { VALUE stack_recorder_class = rb_define_class_under(profiling_module, "StackRecorder", rb_cObject); @@ -229,13 +237,14 @@ void stack_recorder_init(VALUE profiling_module) { // https://bugs.ruby-lang.org/issues/18007 for a discussion around this. rb_define_alloc_func(stack_recorder_class, _native_new); - rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 3); + rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 4); rb_define_singleton_method(stack_recorder_class, "_native_serialize", _native_serialize, 1); rb_define_singleton_method(stack_recorder_class, "_native_reset_after_fork", _native_reset_after_fork, 1); rb_define_singleton_method(testing_module, "_native_active_slot", _native_active_slot, 1); rb_define_singleton_method(testing_module, "_native_slot_one_mutex_locked?", _native_is_slot_one_mutex_locked, 1); rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1); rb_define_singleton_method(testing_module, "_native_record_endpoint", _native_record_endpoint, 3); + rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 3); ok_symbol = ID2SYM(rb_intern_const("ok")); error_symbol = ID2SYM(rb_intern_const("error")); @@ -270,6 +279,12 @@ static VALUE _native_new(VALUE klass) { VALUE stack_recorder = TypedData_Wrap_Struct(klass, &stack_recorder_typed_data, state); + // NOTE: We initialize this because we want a new recorder to be operational even without initialization and our + // default is everything enabled. However, if during recording initialization it turns out we don't want + // heap samples, we will free and reset heap_recorder to NULL, effectively disabling all behaviour specific + // to heap profiling (all calls to heap_recorder_* with a NULL heap recorder are noops). + state->heap_recorder = heap_recorder_new(); + // Note: Don't raise exceptions after this point, since it'll lead to libdatadog memory leaking! initialize_profiles(state, sample_types); @@ -318,23 +333,29 @@ static void stack_recorder_typed_data_free(void *state_ptr) { pthread_mutex_destroy(&state->slot_two_mutex); ddog_prof_Profile_drop(&state->slot_two_profile); + heap_recorder_free(state->heap_recorder); + ruby_xfree(state); } -static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled) { +static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled, VALUE heap_samples_enabled) { ENFORCE_BOOLEAN(cpu_time_enabled); ENFORCE_BOOLEAN(alloc_samples_enabled); + ENFORCE_BOOLEAN(heap_samples_enabled); struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); - if (cpu_time_enabled == Qtrue && alloc_samples_enabled == Qtrue) return Qtrue; // Nothing to do, this is the default + if (cpu_time_enabled == Qtrue && alloc_samples_enabled == Qtrue && heap_samples_enabled == Qtrue) return Qtrue; // Nothing to do, this is the default // When some sample types are disabled, we need to reconfigure libdatadog to record less types, // as well as reconfigure the position_for array to push the disabled types to the end so they don't get recorded. // See record_sample for details on the use of position_for. - state->enabled_values_count = ALL_VALUE_TYPES_COUNT - (cpu_time_enabled == Qtrue ? 0 : 1) - (alloc_samples_enabled == Qtrue? 0 : 1); + state->enabled_values_count = ALL_VALUE_TYPES_COUNT - + (cpu_time_enabled == Qtrue ? 0 : 1) - + (alloc_samples_enabled == Qtrue? 0 : 1) - + (heap_samples_enabled == Qtrue ? 0 : 1); ddog_prof_ValueType enabled_value_types[ALL_VALUE_TYPES_COUNT]; uint8_t next_enabled_pos = 0; @@ -362,6 +383,18 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_insta state->position_for[ALLOC_SAMPLES_VALUE_ID] = next_disabled_pos++; } + if (heap_samples_enabled == Qtrue) { + enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SAMPLES_VALUE; + state->position_for[HEAP_SAMPLES_VALUE_ID] = next_enabled_pos++; + } else { + state->position_for[HEAP_SAMPLES_VALUE_ID] = next_disabled_pos++; + + // Turns out heap sampling is disabled but we initialized everything in _native_new + // assuming all samples were enabled. We need to deinitialize the heap recorder. + heap_recorder_free(state->heap_recorder); + state->heap_recorder = NULL; + } + ddog_prof_Profile_drop(&state->slot_one_profile); ddog_prof_Profile_drop(&state->slot_two_profile); @@ -379,6 +412,10 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan // Need to do this while still holding on to the Global VM Lock; see comments on method for why serializer_set_start_timestamp_for_next_profile(state, finish_timestamp); + // Flush any pending data in the heap recorder prior to doing the iteration during serialization. + // This needs to happen while holding on to the GVL + heap_recorder_flush(state->heap_recorder); + // We'll release the Global VM Lock while we're calling serialize, so that the Ruby VM can continue to work while this // is pending struct call_serialize_without_gvl_arguments args = {.state = state, .finish_timestamp = finish_timestamp, .serialize_ran = false}; @@ -440,6 +477,14 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, metric_values[position_for[WALL_TIME_VALUE_ID]] = values.wall_time_ns; metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples; + if (values.alloc_samples != 0) { + // If we got an allocation sample end the heap allocation recording to commit the heap sample. + // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening + // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can + // be fixed with some refactoring but for now this leads to a less impactful change. + end_heap_allocation_recording(state->heap_recorder, locations); + } + ddog_prof_Profile_Result result = ddog_prof_Profile_add( active_slot.profile, (ddog_prof_Sample) { @@ -457,6 +502,15 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, } } +void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight) { + struct stack_recorder_state *state; + TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); + // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening + // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can + // be fixed with some refactoring but for now this leads to a less impactful change. + start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight); +} + void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); @@ -472,10 +526,73 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_ } } +#define MAX_LEN_HEAP_ITERATION_ERROR_MSG 256 + +// Heap recorder iteration context allows us access to stack recorder state and profile being serialized +// during iteration of heap recorder live objects. +typedef struct heap_recorder_iteration_context { + struct stack_recorder_state *state; + ddog_prof_Profile *profile; + bool error; + char error_msg[MAX_LEN_HEAP_ITERATION_ERROR_MSG]; +} heap_recorder_iteration_context; + +static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) { + heap_recorder_iteration_context *context = (heap_recorder_iteration_context*) extra_arg; + + int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0}; + uint8_t *position_for = context->state->position_for; + + metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = iteration_data.object_data.weight; + + ddog_prof_Profile_Result result = ddog_prof_Profile_add( + context->profile, + (ddog_prof_Sample) { + .locations = iteration_data.locations, + .values = (ddog_Slice_I64) {.ptr = metric_values, .len = context->state->enabled_values_count}, + .labels = {0}, + }, + 0 + ); + + if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { + read_ddogerr_string_and_drop(&result.err, context->error_msg, MAX_LEN_HEAP_ITERATION_ERROR_MSG); + context->error = true; + // By returning false we cancel the iteration + return false; + } + + // Keep on iterating to next item! + return true; +} + +static void build_heap_profile_without_gvl(struct stack_recorder_state *state, ddog_prof_Profile *profile) { + heap_recorder_iteration_context iteration_context = { + .state = state, + .profile = profile, + .error = false, + .error_msg = {0}, + }; + heap_recorder_for_each_live_object(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context, false); + if (iteration_context.error) { + // We wait until we're out of the iteration to grab the gvl and raise. This is important because during + // iteration we first grab the records_mutex and raising requires grabbing the GVL. When sampling, we are + // in the opposite situation: we have the GVL and may need to grab the records_mutex for mutation. This + // different ordering can lead to deadlocks. By delaying the raise here until after we no longer hold + // records_mutex, we prevent this different-lock-acquisition-order situation. + grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: %s", iteration_context.error_msg); + } +} + static void *call_serialize_without_gvl(void *call_args) { struct call_serialize_without_gvl_arguments *args = (struct call_serialize_without_gvl_arguments *) call_args; args->profile = serializer_flip_active_and_inactive_slots(args->state); + + // Now that we have the inactive profile with all but heap samples, lets fill it with heap data + // without needing to race with the active sampler + build_heap_profile_without_gvl(args->state, args->profile); + // Note: The profile gets reset by the serialize call args->result = ddog_prof_Profile_serialize(args->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */); args->serialize_ran = true; @@ -597,6 +714,8 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_ reset_profile(&state->slot_one_profile, /* start_time: */ NULL); reset_profile(&state->slot_two_profile, /* start_time: */ NULL); + heap_recorder_after_fork(state->heap_recorder); + return Qtrue; } @@ -614,6 +733,12 @@ static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_ return Qtrue; } +static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight) { + ENFORCE_TYPE(weight, T_FIXNUM); + track_object(recorder_instance, new_obj, NUM2UINT(weight)); + return Qtrue; +} + static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */) { ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(profile, start_time); if (reset_result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.h b/ext/ddtrace_profiling_native_extension/stack_recorder.h index 2e871069019..9190cc89a25 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.h +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.h @@ -1,6 +1,7 @@ #pragma once #include +#include typedef struct { int64_t cpu_time_ns; @@ -21,4 +22,5 @@ typedef struct sample_labels { void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels); void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint); +void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight); VALUE enforce_recorder_instance(VALUE object); diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index 558e45cabef..43f6f259a69 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -314,32 +314,58 @@ def initialize(*_) # Can be used to enable/disable the Datadog::Profiling.allocation_count feature. # - # This feature is safe and enabled by default on Ruby 2.x, but has a few caveats on Ruby 3.x. + # This feature is now controlled via {:experimental_allocation_enabled} + option :allocation_counting_enabled do |o| + o.after_set do + Datadog.logger.warn( + 'The profiling.advanced.allocation_counting_enabled setting has been deprecated for removal and no ' \ + 'longer does anything. Please remove it from your Datadog.configure block. ' \ + 'Allocation counting is now controlled by the `experimental_allocation_enabled` setting instead.' + ) + end + end + + # Can be used to enable/disable collection of allocation profiles. # - # Caveat 1 (severe): - # On Ruby versions 3.0 (all), 3.1.0 to 3.1.3, and 3.2.0 to 3.2.2 this is disabled by default because it - # can trigger a VM bug that causes a segmentation fault during garbage collection of Ractors - # (https://bugs.ruby-lang.org/issues/18464). We don't recommend using this feature on such Rubies. - # This bug is fixed on Ruby versions 3.1.4, 3.2.3 and 3.3.0. + # This feature is alpha and disabled by default # - # Caveat 2 (annoyance): - # On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets - # garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on. - # Thus this feature is only usable if you're not using Ractors. + # @warn This feature is not supported/safe in all Rubies. Details in {Datadog::Profiling::Component} but + # in summary, this should be supported on Ruby 2.x, 3.1.4+, 3.2.3+ and 3.3.0+. Enabling it on + # unsupported Rubies may result in unexpected behaviour, including crashes. # - # Caveat 3 (severe): - # Ruby 3.2.0 to 3.2.2 have a bug in the newobj tracepoint (https://bugs.ruby-lang.org/issues/19482, - # https://github.com/ruby/ruby/pull/7464) so that's an extra reason why it's not safe on those Rubies. - # This bug is fixed on Ruby versions 3.2.3 and 3.3.0. + # @note Allocation profiles are not yet GA in the Datadog UI, get in touch if you want to help us test it. # - # @default `true` on Ruby 2.x and 3.1.4+, 3.2.3+ and 3.3.0+; `false` for Ruby 3.0 and unpatched Rubies. - option :allocation_counting_enabled do |o| - o.default do - RUBY_VERSION.start_with?('2.') || - (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION >= '3.1.4') || - (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION >= '3.2.3') || - RUBY_VERSION >= '3.3.' - end + # @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED` environment variable as a boolean, otherwise `false` + option :experimental_allocation_enabled do |o| + o.type :bool + o.env 'DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED' + o.default false + end + + # Can be used to enable/disable the collection of heap profiles. + # + # This feature is alpha and disabled by default + # + # @warn To enable heap profiling you are required to also enable allocation profiling. + # @note Heap profiles are not yet GA in the Datadog UI, get in touch if you want to help us test it. + # + # @default `DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED` environment variable as a boolean, otherwise `false` + option :experimental_heap_enabled do |o| + o.type :bool + o.env 'DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED' + o.default false + end + + # Can be used to configure the allocation sampling rate: a sample will be collected every x allocations. + # + # The lower the value, the more accuracy in allocation and heap tracking but the bigger the overhead. In + # particular, a value of 1 will sample ALL allocations. + # + # @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE` environment variable, otherwise `50`. + option :experimental_allocation_sample_rate do |o| + o.type :int + o.env 'DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE' + o.default 50 end # Can be used to disable checking which version of `libmysqlclient` is being used by the `mysql2` gem. diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb index d1a06a13964..3507a53dd56 100644 --- a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +++ b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb @@ -15,15 +15,15 @@ class CpuAndWallTimeWorker def initialize( gc_profiling_enabled:, - allocation_counting_enabled:, no_signals_workaround_enabled:, thread_context_collector:, dynamic_sampling_rate_overhead_target_percentage:, - idle_sampling_helper: IdleSamplingHelper.new, + allocation_sample_every:, + allocation_profiling_enabled:, # **NOTE**: This should only be used for testing; disabling the dynamic sampling rate will increase the # profiler overhead! dynamic_sampling_rate_enabled: true, - allocation_sample_every: 0 # Currently only for testing; Setting this to > 0 can add a lot of overhead! + idle_sampling_helper: IdleSamplingHelper.new ) unless dynamic_sampling_rate_enabled Datadog.logger.warn( @@ -31,23 +31,16 @@ def initialize( ) end - if allocation_counting_enabled && allocation_sample_every > 0 - Datadog.logger.warn( - "Enabled experimental allocation profiling: allocation_sample_every=#{allocation_sample_every}. This is " \ - 'experimental, not recommended, and will increase overhead!' - ) - end - self.class._native_initialize( self, thread_context_collector, gc_profiling_enabled, idle_sampling_helper, - allocation_counting_enabled, no_signals_workaround_enabled, dynamic_sampling_rate_enabled, dynamic_sampling_rate_overhead_target_percentage, allocation_sample_every, + allocation_profiling_enabled, ) @worker_thread = nil @failure_exception = nil diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index 5c50d602c94..228fd01f7a9 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -7,7 +7,7 @@ module Component # Passing in a `nil` tracer is supported and will disable the following profiling features: # * Code Hotspots panel in the trace viewer, as well as scoping a profile down to a span # * Endpoint aggregation in the profiler UX, including normalization (resource per endpoint call) - def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) # rubocop:disable Metrics/MethodLength + def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) require_relative '../profiling/diagnostics/environment_logger' Profiling::Diagnostics::EnvironmentLogger.collect_and_log! @@ -41,32 +41,28 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) no_signals_workaround_enabled = no_signals_workaround_enabled?(settings) timeline_enabled = settings.profiling.advanced.experimental_timeline_enabled + allocation_sample_every = get_allocation_sample_every(settings) + allocation_profiling_enabled = enable_allocation_profiling?(settings, allocation_sample_every) + heap_profiling_enabled = enable_heap_profiling?(settings, allocation_profiling_enabled) + overhead_target_percentage = valid_overhead_target(settings.profiling.advanced.overhead_target_percentage) upload_period_seconds = [60, settings.profiling.advanced.upload_period_seconds].max - recorder = Datadog::Profiling::StackRecorder.new( - cpu_time_enabled: RUBY_PLATFORM.include?('linux'), # Only supported on Linux currently - alloc_samples_enabled: false, # Always disabled for now -- work in progress - ) - thread_context_collector = Datadog::Profiling::Collectors::ThreadContext.new( - recorder: recorder, - max_frames: settings.profiling.advanced.max_frames, - tracer: optional_tracer, - endpoint_collection_enabled: settings.profiling.advanced.endpoint.collection.enabled, - timeline_enabled: timeline_enabled, - ) + recorder = build_recorder(allocation_profiling_enabled, heap_profiling_enabled) + thread_context_collector = build_thread_context_collector(settings, recorder, optional_tracer, timeline_enabled) worker = Datadog::Profiling::Collectors::CpuAndWallTimeWorker.new( gc_profiling_enabled: enable_gc_profiling?(settings), - allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled, no_signals_workaround_enabled: no_signals_workaround_enabled, thread_context_collector: thread_context_collector, dynamic_sampling_rate_overhead_target_percentage: overhead_target_percentage, - allocation_sample_every: 0, + allocation_sample_every: allocation_sample_every, + allocation_profiling_enabled: allocation_profiling_enabled, ) internal_metadata = { no_signals_workaround_enabled: no_signals_workaround_enabled, timeline_enabled: timeline_enabled, + allocation_sample_every: allocation_sample_every, }.freeze exporter = build_profiler_exporter(settings, recorder, internal_metadata: internal_metadata) @@ -76,6 +72,24 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) Profiling::Profiler.new(worker: worker, scheduler: scheduler) end + private_class_method def self.build_recorder(allocation_profiling_enabled, heap_profiling_enabled) + Datadog::Profiling::StackRecorder.new( + cpu_time_enabled: RUBY_PLATFORM.include?('linux'), # Only supported on Linux currently + alloc_samples_enabled: allocation_profiling_enabled, + heap_samples_enabled: heap_profiling_enabled, + ) + end + + private_class_method def self.build_thread_context_collector(settings, recorder, optional_tracer, timeline_enabled) + Datadog::Profiling::Collectors::ThreadContext.new( + recorder: recorder, + max_frames: settings.profiling.advanced.max_frames, + tracer: optional_tracer, + endpoint_collection_enabled: settings.profiling.advanced.endpoint.collection.enabled, + timeline_enabled: timeline_enabled, + ) + end + private_class_method def self.build_profiler_exporter(settings, recorder, internal_metadata:) code_provenance_collector = (Profiling::Collectors::CodeProvenance.new if settings.profiling.advanced.code_provenance_enabled) @@ -113,6 +127,85 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) end end + private_class_method def self.get_allocation_sample_every(settings) + allocation_sample_rate = settings.profiling.advanced.experimental_allocation_sample_rate + + if allocation_sample_rate <= 0 + raise ArgumentError, "Allocation sample rate must be a positive integer. Was #{allocation_sample_rate}" + end + + allocation_sample_rate + end + + private_class_method def self.enable_allocation_profiling?(settings, allocation_sample_every) + unless settings.profiling.advanced.experimental_allocation_enabled + # Allocation profiling disabled, short-circuit out + return false + end + + # Allocation sampling is safe and supported on Ruby 2.x, but has a few caveats on Ruby 3.x. + + # SEVERE - All configurations + # Ruby 3.2.0 to 3.2.2 have a bug in the newobj tracepoint (https://bugs.ruby-lang.org/issues/19482, + # https://github.com/ruby/ruby/pull/7464) that makes this crash in any configuration. This bug is + # fixed on Ruby versions 3.2.3 and 3.3.0. + if RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION < '3.2.3' + Datadog.logger.warn( + 'Allocation profiling is not supported in Ruby versions 3.2.0, 3.2.1 and 3.2.2 and will be forcibly '\ + 'disabled. This is due to a VM bug that can lead to crashes (https://bugs.ruby-lang.org/issues/19482). '\ + 'Other Ruby versions do not suffer from this issue.' + ) + return false + end + + # SEVERE - Only with Ractors + # On Ruby versions 3.0 (all), 3.1.0 to 3.1.3, and 3.2.0 to 3.2.2 allocation profiling can trigger a VM bug + # that causes a segmentation fault during garbage collection of Ractors + # (https://bugs.ruby-lang.org/issues/18464). We don't recommend using this feature on such Rubies. + # This bug is fixed on Ruby versions 3.1.4, 3.2.3 and 3.3.0. + if RUBY_VERSION.start_with?('3.0.') || + (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION < '3.1.4') || + (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION < '3.2.3') + Datadog.logger.warn( + "Current Ruby version (#{RUBY_VERSION}) has a VM bug where enabling allocation profiling while using "\ + 'Ractors may cause unexpected issues, including crashes (https://bugs.ruby-lang.org/issues/18464). '\ + 'This does not happen if Ractors are not used.' + ) + # ANNOYANCE - Only with Ractors + # On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets + # garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on. + elsif RUBY_VERSION.start_with?('3.') + Datadog.logger.warn( + 'In all known versions of Ruby 3.x, using Ractors may result in allocation profiling unexpectedly ' \ + 'stopping (https://bugs.ruby-lang.org/issues/19112). Note that this stop has no impact in your ' \ + 'application stability or performance. This does not happen if Ractors are not used.' + ) + end + + Datadog.logger.warn( + "Enabled experimental allocation profiling: allocation_sample_rate=#{allocation_sample_every}. This is " \ + 'experimental, not recommended, and will increase overhead!' + ) + + true + end + + private_class_method def self.enable_heap_profiling?(settings, allocation_profiling_enabled) + heap_profiling_enabled = settings.profiling.advanced.experimental_heap_enabled + + if heap_profiling_enabled && !allocation_profiling_enabled + raise ArgumentError, 'Heap profiling requires allocation profiling to be enabled' + end + + if heap_profiling_enabled + Datadog.logger.warn( + 'Enabled experimental heap profiling. This is experimental, not recommended, and will increase overhead!' + ) + end + + heap_profiling_enabled + end + private_class_method def self.no_signals_workaround_enabled?(settings) # rubocop:disable Metrics/MethodLength setting_value = settings.profiling.advanced.no_signals_workaround_enabled legacy_ruby_that_should_use_workaround = RUBY_VERSION.start_with?('2.3.', '2.4.', '2.5.') diff --git a/lib/datadog/profiling/stack_recorder.rb b/lib/datadog/profiling/stack_recorder.rb index ccbe710dafd..3153a4982c5 100644 --- a/lib/datadog/profiling/stack_recorder.rb +++ b/lib/datadog/profiling/stack_recorder.rb @@ -4,7 +4,7 @@ module Profiling # Note that `record_sample` is only accessible from native code. # Methods prefixed with _native_ are implemented in `stack_recorder.c` class StackRecorder - def initialize(cpu_time_enabled:, alloc_samples_enabled:) + def initialize(cpu_time_enabled:, alloc_samples_enabled:, heap_samples_enabled:) # This mutex works in addition to the fancy C-level mutexes we have in the native side (see the docs there). # It prevents multiple Ruby threads calling serialize at the same time -- something like # `10.times { Thread.new { stack_recorder.serialize } }`. @@ -13,7 +13,7 @@ def initialize(cpu_time_enabled:, alloc_samples_enabled:) # accidentally happening. @no_concurrent_synchronize_mutex = Mutex.new - self.class._native_initialize(self, cpu_time_enabled, alloc_samples_enabled) + self.class._native_initialize(self, cpu_time_enabled, alloc_samples_enabled, heap_samples_enabled) end def serialize diff --git a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs index f819fcdbdff..cc887d59ce8 100644 --- a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs +++ b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs @@ -9,13 +9,13 @@ module Datadog def initialize: ( gc_profiling_enabled: bool, - allocation_counting_enabled: bool, no_signals_workaround_enabled: bool, thread_context_collector: Datadog::Profiling::Collectors::ThreadContext, dynamic_sampling_rate_overhead_target_percentage: Float, ?idle_sampling_helper: Datadog::Profiling::Collectors::IdleSamplingHelper, ?dynamic_sampling_rate_enabled: bool, - ?allocation_sample_every: Integer, + allocation_sample_every: Integer, + allocation_profiling_enabled: bool, ) -> void def self._native_initialize: ( @@ -23,11 +23,11 @@ module Datadog ThreadContext thread_context_collector, bool gc_profiling_enabled, IdleSamplingHelper idle_sampling_helper, - bool allocation_counting_enabled, bool no_signals_workaround_enabled, bool dynamic_sampling_rate_enabled, Float dynamic_sampling_rate_overhead_target_percentage, ::Integer allocation_sample_every, + bool allocation_profiling_enabled, ) -> true def start: (?on_failure_proc: ::Proc?) -> bool? diff --git a/sig/datadog/profiling/component.rbs b/sig/datadog/profiling/component.rbs index 675143e397c..2b32dbe88fe 100644 --- a/sig/datadog/profiling/component.rbs +++ b/sig/datadog/profiling/component.rbs @@ -7,6 +7,18 @@ module Datadog optional_tracer: Datadog::Tracing::Tracer?, ) -> Datadog::Profiling::Profiler? + def self.build_recorder: ( + bool allocation_profiling_enabled, + bool heap_profiling_enabled, + ) -> Datadog::Profiling::StackRecorder + + def self.build_thread_context_collector: ( + untyped settings, + Datadog::Profiling::StackRecorder recorder, + Datadog::Tracing::Tracer? optional_tracer, + bool timeline_enabled, + ) -> Datadog::Profiling::Collectors::ThreadContext + def self.build_profiler_exporter: ( untyped settings, Datadog::Profiling::StackRecorder recorder, @@ -19,6 +31,9 @@ module Datadog ) -> untyped def self.enable_gc_profiling?: (untyped settings) -> bool + def self.get_allocation_sample_every: (untyped settings) -> ::Integer + def self.enable_allocation_profiling?: (untyped settings, ::Integer allocation_sample_every) -> bool + def self.enable_heap_profiling?: (untyped settings, bool allocation_profiling_enabled) -> bool def self.no_signals_workaround_enabled?: (untyped settings) -> bool diff --git a/sig/datadog/profiling/stack_recorder.rbs b/sig/datadog/profiling/stack_recorder.rbs index a3ff67b7e38..0f61f9c8ab9 100644 --- a/sig/datadog/profiling/stack_recorder.rbs +++ b/sig/datadog/profiling/stack_recorder.rbs @@ -3,12 +3,13 @@ module Datadog class StackRecorder @no_concurrent_synchronize_mutex: ::Thread::Mutex - def initialize: (cpu_time_enabled: bool, alloc_samples_enabled: bool) -> void + def initialize: (cpu_time_enabled: bool, alloc_samples_enabled: bool, heap_samples_enabled: bool) -> void def self._native_initialize: ( Datadog::Profiling::StackRecorder recorder_instance, bool cpu_time_enabled, bool alloc_samples_enabled, + bool heap_samples_enabled, ) -> true def serialize: () -> untyped diff --git a/spec/datadog/core/configuration/settings_spec.rb b/spec/datadog/core/configuration/settings_spec.rb index a1e180c83c0..743a5294241 100644 --- a/spec/datadog/core/configuration/settings_spec.rb +++ b/spec/datadog/core/configuration/settings_spec.rb @@ -497,39 +497,116 @@ end end - describe '#allocation_counting_enabled' do - subject(:allocation_counting_enabled) { settings.profiling.advanced.allocation_counting_enabled } - - before { stub_const('RUBY_VERSION', testing_version) } + describe '#allocation_counting_enabled=' do + it 'logs a warning informing customers this no longer does anything' do + expect(Datadog.logger).to receive(:warn).with(/no longer does anything/) - context 'on Ruby 2.x' do - let(:testing_version) { '2.3.0 ' } - it { is_expected.to be true } + settings.profiling.advanced.allocation_counting_enabled = false end + end + + describe '#experimental_allocation_enabled' do + subject(:experimental_allocation_enabled) { settings.profiling.advanced.experimental_allocation_enabled } + + context 'when DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED' do + around do |example| + ClimateControl.modify('DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED' => environment) do + example.run + end + end + + context 'is not defined' do + let(:environment) { nil } - ['3.0.0', '3.1.0', '3.1.3', '3.2.0', '3.2.2'].each do |broken_ruby| - context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/18464 (#{broken_ruby})" do - let(:testing_version) { broken_ruby } it { is_expected.to be false } end + + [true, false].each do |value| + context "is defined as #{value}" do + let(:environment) { value.to_s } + + it { is_expected.to be value } + end + end end + end - ['3.1.4', '3.2.3', '3.3.0'].each do |fixed_ruby| - context "on a Ruby 3 version where https://bugs.ruby-lang.org/issues/18464 is fixed (#{fixed_ruby})" do - let(:testing_version) { fixed_ruby } - it { is_expected.to be true } + describe '#experimental_allocation_enabled=' do + it 'updates the #experimental_allocation_enabled setting' do + expect { settings.profiling.advanced.experimental_allocation_enabled = true } + .to change { settings.profiling.advanced.experimental_allocation_enabled } + .from(false) + .to(true) + end + end + + describe '#experimental_heap_enabled' do + subject(:experimental_heap_enabled) { settings.profiling.advanced.experimental_heap_enabled } + + context 'when DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED' do + around do |example| + ClimateControl.modify('DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED' => environment) do + example.run + end + end + + context 'is not defined' do + let(:environment) { nil } + + it { is_expected.to be false } + end + + [true, false].each do |value| + context "is defined as #{value}" do + let(:environment) { value.to_s } + + it { is_expected.to be value } + end end end end - describe '#allocation_counting_enabled=' do - it 'updates the #allocation_counting_enabled setting' do - settings.profiling.advanced.allocation_counting_enabled = true + describe '#experimental_heap_enabled=' do + it 'updates the #experimental_heap_enabled setting' do + expect { settings.profiling.advanced.experimental_heap_enabled = true } + .to change { settings.profiling.advanced.experimental_heap_enabled } + .from(false) + .to(true) + end + end - expect { settings.profiling.advanced.allocation_counting_enabled = false } - .to change { settings.profiling.advanced.allocation_counting_enabled } - .from(true) - .to(false) + describe '#experimental_allocation_sample_rate' do + subject(:experimental_allocation_sample_rate) { settings.profiling.advanced.experimental_allocation_sample_rate } + + context 'when DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE' do + around do |example| + ClimateControl.modify('DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE' => environment) do + example.run + end + end + + context 'is not defined' do + let(:environment) { nil } + + it { is_expected.to be 50 } + end + + [100, 30.5].each do |value| + context "is defined as #{value}" do + let(:environment) { value.to_s } + + it { is_expected.to be value.to_i } + end + end + end + end + + describe '#experimental_allocation_sample_rate=' do + it 'updates the #experimental_allocation_sample_rate setting' do + expect { settings.profiling.advanced.experimental_allocation_sample_rate = 100 } + .to change { settings.profiling.advanced.experimental_allocation_sample_rate } + .from(50) + .to(100) end end diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index 43c63adfc24..21888900e7c 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -7,20 +7,24 @@ # drops support for these Rubies globally let(:described_class) { Datadog::Profiling::Collectors::CpuAndWallTimeWorker } - let(:recorder) { build_stack_recorder } let(:endpoint_collection_enabled) { true } let(:gc_profiling_enabled) { true } - let(:allocation_counting_enabled) { true } + let(:allocation_sample_every) { 50 } + let(:allocation_profiling_enabled) { false } + let(:heap_profiling_enabled) { false } + let(:recorder) { build_stack_recorder(heap_samples_enabled: heap_profiling_enabled) } let(:no_signals_workaround_enabled) { false } let(:timeline_enabled) { false } let(:options) { {} } let(:worker_settings) do { gc_profiling_enabled: gc_profiling_enabled, - allocation_counting_enabled: allocation_counting_enabled, no_signals_workaround_enabled: no_signals_workaround_enabled, thread_context_collector: build_thread_context_collector(recorder), dynamic_sampling_rate_overhead_target_percentage: 2.0, + allocation_sample_every: allocation_sample_every, + allocation_profiling_enabled: allocation_profiling_enabled, + **options } end @@ -57,6 +61,8 @@ end describe '#start' do + let(:expected_worker_initialization_error) { nil } + subject(:start) do cpu_and_wall_time_worker.start wait_until_running @@ -442,25 +448,22 @@ end end - context 'when allocation sampling is enabled' do - let(:options) { { allocation_sample_every: 1 } } + context 'when allocation profiling is enabled' do + let(:allocation_profiling_enabled) { true } + let(:test_num_allocated_object) { 123 } + # Sample all allocations to get easy-to-reason about numbers that are also not flaky in CI. + let(:allocation_sample_every) { 1 } before do allow(Datadog.logger).to receive(:warn) end - it 'logs a warning message mentioning this is experimental' do - expect(Datadog.logger).to receive(:warn).with(/Enabled experimental allocation profiling/) - - start - end - it 'records allocated objects' do stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) start - 123.times { CpuAndWallTimeWorkerSpec::TestStruct.new } + test_num_allocated_object.times { CpuAndWallTimeWorkerSpec::TestStruct.new } allocation_line = __LINE__ - 1 cpu_and_wall_time_worker.stop @@ -469,7 +472,7 @@ samples_for_thread(samples_from_pprof(recorder.serialize!), Thread.current) .find { |s| s.labels[:'allocation class'] == 'CpuAndWallTimeWorkerSpec::TestStruct' } - expect(allocation_sample.values).to include(:'alloc-samples' => 123) + expect(allocation_sample.values).to include(:'alloc-samples' => test_num_allocated_object) expect(allocation_sample.locations.first.lineno).to eq allocation_line end @@ -540,7 +543,7 @@ end context 'when allocation sampling is disabled' do - let(:options) { { allocation_sample_every: 0 } } + let(:allocation_profiling_enabled) { false } it 'does not record allocations' do stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) @@ -555,6 +558,52 @@ end end + context 'when heap profiling is enabled' do + let(:allocation_profiling_enabled) { true } + let(:heap_profiling_enabled) { true } + let(:test_num_allocated_object) { 123 } + # Sample all allocations to get easy-to-reason about numbers that are also not flaky in CI. + let(:allocation_sample_every) { 1 } + + before do + allow(Datadog.logger).to receive(:warn) + end + + it 'records live heap objects' do + pending "heap profiling isn't actually implemented just yet" + + stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) + + start + + test_num_allocated_object.times { CpuAndWallTimeWorkerSpec::TestStruct.new } + allocation_line = __LINE__ - 1 + + cpu_and_wall_time_worker.stop + + relevant_sample = samples_for_thread(samples_from_pprof(recorder.serialize!), Thread.current) + .find { |s| s.locations.first.lineno == allocation_line && s.locations.first.path == __FILE__ } + + expect(relevant_sample.values[':heap-live-samples']).to eq test_num_allocated_object + end + end + + context 'when heap profiling is disabled' do + let(:heap_profiling_enabled) { false } + + it 'does not record heap samples' do + stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) + + start + + 123.times { CpuAndWallTimeWorkerSpec::TestStruct.new } + + cpu_and_wall_time_worker.stop + + expect(samples_from_pprof(recorder.serialize!).select { |s| s.values.key?(:'heap-live-samples') }).to be_empty + end + end + context 'Process::Waiter crash regression tests' do # On Ruby 2.3 to 2.6, there's a crash when accessing instance variables of the `process_waiter_thread`, # see https://bugs.ruby-lang.org/issues/17807 . @@ -633,7 +682,7 @@ # so this must be disabled when interacting with Ractors. let(:gc_profiling_enabled) { false } # ...same thing for the tracepoint for allocation counting/profiling :( - let(:allocation_counting_enabled) { false } + let(:allocation_profiling_enabled) { false } describe 'handle_sampling_signal' do include_examples 'does not trigger a sample', @@ -815,51 +864,68 @@ cpu_and_wall_time_worker.stop end - it 'returns the number of allocations between two calls of the method' do - # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all - # it needs to - new_object = proc { Object.new } - 1.times(&new_object) + context 'when allocation profiling is enabled' do + let(:allocation_profiling_enabled) { true } - before_allocations = described_class._native_allocation_count - 100.times(&new_object) - after_allocations = described_class._native_allocation_count + it 'always returns a >= 0 value' do + expect(described_class._native_allocation_count).to be >= 0 + end - expect(after_allocations - before_allocations).to be 100 - end + it 'returns the number of allocations between two calls of the method' do + # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all + # it needs to + new_object = proc { Object.new } + 1.times(&new_object) - it 'returns different numbers of allocations for different threads' do - # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all - # it needs to - new_object = proc { Object.new } - 1.times(&new_object) + before_allocations = described_class._native_allocation_count + 100.times(&new_object) + after_allocations = described_class._native_allocation_count - t1_can_run = Queue.new - t1_has_run = Queue.new - before_t1 = nil - after_t1 = nil + expect(after_allocations - before_allocations).to be 100 + end - background_t1 = Thread.new do - before_t1 = described_class._native_allocation_count - t1_can_run.pop + it 'returns different numbers of allocations for different threads' do + # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all + # it needs to + new_object = proc { Object.new } + 1.times(&new_object) - 100.times(&new_object) - after_t1 = described_class._native_allocation_count - t1_has_run << true - end + t1_can_run = Queue.new + t1_has_run = Queue.new + before_t1 = nil + after_t1 = nil + + background_t1 = Thread.new do + before_t1 = described_class._native_allocation_count + t1_can_run.pop + + 100.times(&new_object) + after_t1 = described_class._native_allocation_count + t1_has_run << true + end + + before_allocations = described_class._native_allocation_count + t1_can_run << true + t1_has_run.pop + after_allocations = described_class._native_allocation_count + + background_t1.join - before_allocations = described_class._native_allocation_count - t1_can_run << true - t1_has_run.pop - after_allocations = described_class._native_allocation_count + # This test checks that even though we observed 100 allocations in a background thread t1, the counters for + # the current thread were not affected by this change - background_t1.join + expect(after_t1 - before_t1).to be 100 + expect(after_allocations - before_allocations).to be < 10 + end + end - # This test checks that even though we observed 100 allocations in a background thread t1, the counters for - # the current thread were not affected by this change + context 'when allocation profiling is disabled' do + let(:allocation_profiling_enabled) { false } + it 'always returns a nil value' do + 100.times { Object.new } - expect(after_t1 - before_t1).to be 100 - expect(after_allocations - before_allocations).to be < 10 + expect(described_class._native_allocation_count).to be nil + end end end end diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index 7d7a28d4931..77ac09732b4 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -80,11 +80,11 @@ expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with( gc_profiling_enabled: anything, - allocation_counting_enabled: anything, no_signals_workaround_enabled: :no_signals_result, thread_context_collector: instance_of(Datadog::Profiling::Collectors::ThreadContext), dynamic_sampling_rate_overhead_target_percentage: :overhead_target_percentage_config, - allocation_sample_every: 0, + allocation_sample_every: kind_of(Integer), + allocation_profiling_enabled: false, ) build_profiler_component @@ -124,29 +124,153 @@ end end - context 'when allocation_counting_enabled is enabled' do + context 'when allocation profiling is enabled' do before do - settings.profiling.advanced.allocation_counting_enabled = true + settings.profiling.advanced.experimental_allocation_enabled = true + stub_const('RUBY_VERSION', testing_version) end - it 'initializes a CpuAndWallTimeWorker collector with allocation_counting_enabled set to true' do + context 'on Ruby 2.x' do + let(:testing_version) { '2.3.0 ' } + + it 'initializes CpuAndWallTimeWorker and StackRecorder with allocation sampling support and warns' do + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( + allocation_profiling_enabled: true, + ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: true)) + .and_call_original + + expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) + expect(Datadog.logger).to_not receive(:warn).with(/Ractor/) + + build_profiler_component + end + end + + ['3.2.0', '3.2.1', '3.2.2'].each do |broken_ruby| + context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/19482 (#{broken_ruby})" do + let(:testing_version) { broken_ruby } + + it 'initializes a CpuAndWallTimeWorker and StackRecorder with allocation sampling force-disabled and warns' do + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( + allocation_profiling_enabled: false, + ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: false)) + .and_call_original + + expect(Datadog.logger).to receive(:warn).with(/forcibly disabled/) + expect(Datadog.logger).to_not receive(:warn).with(/Ractor/) + expect(Datadog.logger).to_not receive(:warn).with(/experimental allocation profiling/) + + build_profiler_component + end + end + end + + ['3.0.0', '3.1.0', '3.1.3'].each do |broken_ractors_ruby| + context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/18464 (#{broken_ractors_ruby})" do + let(:testing_version) { broken_ractors_ruby } + + it 'initializes CpuAndWallTimeWorker and StackRecorder with allocation sampling support and warns' do + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( + allocation_profiling_enabled: true, + ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: true)) + .and_call_original + + expect(Datadog.logger).to receive(:warn).with(/Ractors.+crashes/) + expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) + + build_profiler_component + end + end + end + + ['3.1.4', '3.2.3', '3.3.0'].each do |fixed_ruby| + context "on a Ruby 3 version where https://bugs.ruby-lang.org/issues/18464 is fixed (#{fixed_ruby})" do + let(:testing_version) { fixed_ruby } + it 'initializes CpuAndWallTimeWorker and StackRecorder with allocation sampling support and warns' do + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( + allocation_profiling_enabled: true, + ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: true)) + .and_call_original + + expect(Datadog.logger).to receive(:warn).with(/Ractors.+stopping/) + expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) + + build_profiler_component + end + end + end + end + + context 'when allocation profiling is disabled' do + before do + settings.profiling.advanced.experimental_allocation_enabled = false + end + + it 'initializes CpuAndWallTimeWorker and StackRecorder without allocation sampling support' do expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( - allocation_counting_enabled: true, + allocation_profiling_enabled: false, ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: false)) + .and_call_original build_profiler_component end end - context 'when allocation_counting_enabled is disabled' do + context 'when heap profiling is enabled' do before do - settings.profiling.advanced.allocation_counting_enabled = false + settings.profiling.advanced.experimental_heap_enabled = true + # Universally supported ruby version for allocation profiling, we don't want to test those + # edge cases here + stub_const('RUBY_VERSION', '2.7.2') end - it 'initializes a CpuAndWallTimeWorker collector with allocation_counting_enabled set to false' do - expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( - allocation_counting_enabled: false, - ) + context 'and allocation profiling disabled' do + before do + settings.profiling.advanced.experimental_allocation_enabled = false + end + + it 'raises an ArgumentError during component initialization' do + expect { build_profiler_component }.to raise_error(ArgumentError, /requires allocation profiling/) + end + end + + context 'and allocation profiling enabled and supported' do + before do + settings.profiling.advanced.experimental_allocation_enabled = true + end + + it 'initializes StackRecorder with heap sampling support and warns' do + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(heap_samples_enabled: true)) + .and_call_original + + expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) + expect(Datadog.logger).to receive(:warn).with(/experimental heap profiling/) + + build_profiler_component + end + end + end + + context 'when heap profiling is disabled' do + before do + settings.profiling.advanced.experimental_heap_enabled = false + end + + it 'initializes StackRecorder without heap sampling support' do + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(heap_samples_enabled: false)) + .and_call_original build_profiler_component end @@ -168,17 +292,20 @@ build_profiler_component end - it 'sets up the Exporter internal_metadata with no_signals_workaround_enabled and timeline_enabled settings' do + it 'sets up the Exporter internal_metadata with relevant settings' do allow(Datadog::Profiling::Collectors::ThreadContext).to receive(:new) allow(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new) expect(described_class).to receive(:no_signals_workaround_enabled?).and_return(:no_signals_result) expect(settings.profiling.advanced).to receive(:experimental_timeline_enabled).and_return(:timeline_result) + expect(settings.profiling.advanced).to receive(:experimental_allocation_sample_rate) + .and_return(123) expect(Datadog::Profiling::Exporter).to receive(:new).with( hash_including( internal_metadata: { no_signals_workaround_enabled: :no_signals_result, timeline_enabled: :timeline_result, + allocation_sample_every: 123, } ) ) @@ -186,13 +313,6 @@ build_profiler_component end - it 'sets up the StackRecorder with alloc_samples_enabled: false' do - expect(Datadog::Profiling::StackRecorder) - .to receive(:new).with(hash_including(alloc_samples_enabled: false)).and_call_original - - build_profiler_component - end - context 'when on Linux' do before { stub_const('RUBY_PLATFORM', 'some-linux-based-platform') } diff --git a/spec/datadog/profiling/spec_helper.rb b/spec/datadog/profiling/spec_helper.rb index 6d15cf22170..e02c1af213b 100644 --- a/spec/datadog/profiling/spec_helper.rb +++ b/spec/datadog/profiling/spec_helper.rb @@ -72,8 +72,14 @@ def samples_for_thread(samples, thread) samples.select { |sample| object_id_from(sample.labels.fetch(:'thread id')) == thread.object_id } end - def build_stack_recorder - Datadog::Profiling::StackRecorder.new(cpu_time_enabled: true, alloc_samples_enabled: true) + # We disable heap_sample collection by default in tests since it requires some extra mocking/ + # setup for it to properly work. + def build_stack_recorder(heap_samples_enabled: false) + Datadog::Profiling::StackRecorder.new( + cpu_time_enabled: true, + alloc_samples_enabled: true, + heap_samples_enabled: heap_samples_enabled + ) end end diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 585d41f9d44..8652ceb5e05 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -7,9 +7,16 @@ let(:numeric_labels) { [] } let(:cpu_time_enabled) { true } let(:alloc_samples_enabled) { true } + # Disabling these by default since they require some extra setup and produce separate samples. + # Enabling this is tested in a particular context below. + let(:heap_samples_enabled) { false } subject(:stack_recorder) do - described_class.new(cpu_time_enabled: cpu_time_enabled, alloc_samples_enabled: alloc_samples_enabled) + described_class.new( + cpu_time_enabled: cpu_time_enabled, + alloc_samples_enabled: alloc_samples_enabled, + heap_samples_enabled: heap_samples_enabled + ) end # NOTE: A lot of libdatadog integration behaviors are tested in the Collectors::Stack specs, since we need actual @@ -114,6 +121,7 @@ def slot_two_mutex_locked? context 'when all profile types are enabled' do let(:cpu_time_enabled) { true } let(:alloc_samples_enabled) { true } + let(:heap_samples_enabled) { true } it 'returns a pprof with the configured sample types' do expect(sample_types_from(decoded_profile)).to eq( @@ -121,6 +129,7 @@ def slot_two_mutex_locked? 'cpu-samples' => 'count', 'wall-time' => 'nanoseconds', 'alloc-samples' => 'count', + 'heap-live-samples' => 'count', ) end end @@ -128,12 +137,14 @@ def slot_two_mutex_locked? context 'when cpu-time is disabled' do let(:cpu_time_enabled) { false } let(:alloc_samples_enabled) { true } + let(:heap_samples_enabled) { true } it 'returns a pprof without the cpu-type type' do expect(sample_types_from(decoded_profile)).to eq( 'cpu-samples' => 'count', 'wall-time' => 'nanoseconds', 'alloc-samples' => 'count', + 'heap-live-samples' => 'count', ) end end @@ -141,12 +152,29 @@ def slot_two_mutex_locked? context 'when alloc-samples is disabled' do let(:cpu_time_enabled) { true } let(:alloc_samples_enabled) { false } + let(:heap_samples_enabled) { true } it 'returns a pprof without the alloc-samples type' do expect(sample_types_from(decoded_profile)).to eq( 'cpu-time' => 'nanoseconds', 'cpu-samples' => 'count', 'wall-time' => 'nanoseconds', + 'heap-live-samples' => 'count', + ) + end + end + + context 'when heap-live-samples is disabled' do + let(:cpu_time_enabled) { true } + let(:alloc_samples_enabled) { true } + let(:heap_samples_enabled) { false } + + it 'returns a pprof without the heap-live-samples type' do + expect(sample_types_from(decoded_profile)).to eq( + 'cpu-time' => 'nanoseconds', + 'cpu-samples' => 'count', + 'wall-time' => 'nanoseconds', + 'alloc-samples' => 'count', ) end end @@ -154,8 +182,9 @@ def slot_two_mutex_locked? context 'when all optional types are disabled' do let(:cpu_time_enabled) { false } let(:alloc_samples_enabled) { false } + let(:heap_samples_enabled) { false } - it 'returns a pprof with without the optional types' do + it 'returns a pprof without the optional types' do expect(sample_types_from(decoded_profile)).to eq( 'cpu-samples' => 'count', 'wall-time' => 'nanoseconds', @@ -198,7 +227,12 @@ def sample_types_from(decoded_profile) it 'encodes the sample with the metrics provided' do expect(samples.first.values) - .to eq(:'cpu-time' => 123, :'cpu-samples' => 456, :'wall-time' => 789, :'alloc-samples' => 4242) + .to eq( + :'cpu-time' => 123, + :'cpu-samples' => 456, + :'wall-time' => 789, + :'alloc-samples' => 4242, + ) end context 'when disabling an optional profile sample type' do @@ -302,6 +336,94 @@ def sample_types_from(decoded_profile) end end + describe 'heap samples' do + let(:sample_rate) { 50 } + let(:metric_values) { { 'cpu-time' => 101, 'cpu-samples' => 1, 'wall-time' => 789, 'alloc-samples' => sample_rate } } + let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a } + + let(:a_string) { 'a beautiful string' } + let(:an_array) { [1..10] } + let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true } } + + let(:samples) { samples_from_pprof(encoded_pprof) } + + before do + allocations = [a_string, an_array, 'a fearsome string', [-10..-1], a_hash, { 'z' => -1, 'y' => '-2', 'x' => false }] + @num_allocations = 0 + allocations.each_with_index do |obj, i| + # Heap sampling currently requires this 2-step process to first pass data about the allocated object... + described_class::Testing._native_track_object(stack_recorder, obj, sample_rate) + # ...and then pass data about the allocation stacktrace (with 2 distinct stacktraces) + if i.even? + Datadog::Profiling::Collectors::Stack::Testing + ._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 400, false) + else + # 401 used instead of 400 here just to make the branches different and appease Rubocop + Datadog::Profiling::Collectors::Stack::Testing + ._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 401, false) + end + @num_allocations += 1 + end + + allocations.clear # The literals in the previous array are now dangling + GC.start # And this will clear them, leaving only the non-literals which are still pointed to by the lets + end + + context 'when disabled' do + let(:heap_samples_enabled) { false } + + it 'are ommitted from the profile' do + # We sample from 2 distinct locations + expect(samples.size).to eq(2) + expect(samples.select { |s| s.values.key?('heap-live-samples') }).to be_empty + end + end + + context 'when enabled' do + let(:heap_samples_enabled) { true } + + let(:heap_samples) do + samples.select { |s| s.values[:'heap-live-samples'] > 0 } + end + + let(:non_heap_samples) do + samples.select { |s| s.values[:'heap-live-samples'] == 0 } + end + + it 'include the stack and sample counts for the objects still left alive' do + pending 'heap_recorder implementation is currently missing' + + # We sample from 2 distinct locations + expect(heap_samples.size).to eq(2) + + sum_heap_samples = 0 + heap_samples.each { |s| sum_heap_samples += s.values[:'heap-live-samples'] } + expect(sum_heap_samples).to eq([a_string, an_array, a_hash].size * sample_rate) + end + + it 'keeps on reporting accurate samples for other profile types' do + expect(non_heap_samples.size).to eq(2) + + summed_values = {} + non_heap_samples.each do |s| + s.values.each_pair do |k, v| + summed_values[k] = (summed_values[k] || 0) + v + end + end + + # We use the same metric_values in all sample calls in before. So we'd expect + # the summed values to match `@num_allocations * metric_values[profile-type]` + # for each profile-type there in. + expected_summed_values = { :'heap-live-samples' => 0 } + metric_values.each_pair do |k, v| + expected_summed_values[k.to_sym] = v * @num_allocations + end + + expect(summed_values).to eq(expected_summed_values) + end + end + end + context 'when there is a failure during serialization' do before do allow(Datadog.logger).to receive(:error)