diff --git a/benchmarks/profiler_sample_loop_v2.rb b/benchmarks/profiler_sample_loop_v2.rb index 0afc5d6cc70..4722a6c050f 100644 --- a/benchmarks/profiler_sample_loop_v2.rb +++ b/benchmarks/profiler_sample_loop_v2.rb @@ -16,7 +16,11 @@ class ProfilerSampleLoopBenchmark PROFILER_OVERHEAD_STACK_THREAD = Thread.new { sleep } def create_profiler - @recorder = Datadog::Profiling::StackRecorder.new(cpu_time_enabled: true, alloc_samples_enabled: true) + @recorder = Datadog::Profiling::StackRecorder.new( + cpu_time_enabled: true, + alloc_samples_enabled: false, + heap_samples_enabled: false + ) @collector = Datadog::Profiling::Collectors::ThreadContext.new( recorder: @recorder, max_frames: 400, tracer: nil, endpoint_collection_enabled: false, timeline_enabled: false ) diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index ea56ba2a593..af9b0127e18 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -80,10 +80,10 @@ struct cpu_and_wall_time_worker_state { // These are immutable after initialization bool gc_profiling_enabled; - bool allocation_counting_enabled; bool no_signals_workaround_enabled; bool dynamic_sampling_rate_enabled; - int allocation_sample_every; // Temporarily used for development/testing of allocation profiling + int allocation_sample_every; + bool allocation_profiling_enabled; VALUE self_instance; VALUE thread_context_collector_instance; VALUE idle_sampling_helper_instance; @@ -149,11 +149,11 @@ static VALUE _native_initialize( VALUE thread_context_collector_instance, VALUE gc_profiling_enabled, VALUE idle_sampling_helper_instance, - VALUE allocation_counting_enabled, VALUE no_signals_workaround_enabled, VALUE dynamic_sampling_rate_enabled, VALUE dynamic_sampling_rate_overhead_target_percentage, - VALUE allocation_sample_every + VALUE allocation_sample_every, + VALUE allocation_profiling_enabled ); static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr); static VALUE _native_sampling_loop(VALUE self, VALUE instance); @@ -265,10 +265,10 @@ static VALUE _native_new(VALUE klass) { // being leaked. state->gc_profiling_enabled = false; - state->allocation_counting_enabled = false; state->no_signals_workaround_enabled = false; state->dynamic_sampling_rate_enabled = true; state->allocation_sample_every = 0; + state->allocation_profiling_enabled = false; state->thread_context_collector_instance = Qnil; state->idle_sampling_helper_instance = Qnil; state->owner_thread = Qnil; @@ -293,31 +293,31 @@ static VALUE _native_initialize( VALUE thread_context_collector_instance, VALUE gc_profiling_enabled, VALUE idle_sampling_helper_instance, - VALUE allocation_counting_enabled, VALUE no_signals_workaround_enabled, VALUE dynamic_sampling_rate_enabled, VALUE dynamic_sampling_rate_overhead_target_percentage, - VALUE allocation_sample_every + VALUE allocation_sample_every, + VALUE allocation_profiling_enabled ) { ENFORCE_BOOLEAN(gc_profiling_enabled); - ENFORCE_BOOLEAN(allocation_counting_enabled); ENFORCE_BOOLEAN(no_signals_workaround_enabled); ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled); ENFORCE_TYPE(allocation_sample_every, T_FIXNUM); ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT); + ENFORCE_BOOLEAN(allocation_profiling_enabled); struct cpu_and_wall_time_worker_state *state; TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state); state->gc_profiling_enabled = (gc_profiling_enabled == Qtrue); - state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue); state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue); state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue); dynamic_sampling_rate_set_overhead_target_percentage(&state->dynamic_sampling_rate, NUM2DBL(dynamic_sampling_rate_overhead_target_percentage)); state->allocation_sample_every = NUM2INT(allocation_sample_every); + state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue); - if (state->allocation_sample_every < 0) { - rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be >= 0.", state->allocation_sample_every); + if (state->allocation_sample_every <= 0) { + rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be > 0.", state->allocation_sample_every); } state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance); @@ -636,7 +636,7 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) { // because they may raise exceptions. install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal"); if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint); - if (state->allocation_counting_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint); + if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint); rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state); @@ -892,9 +892,9 @@ static void sleep_for(uint64_t time_ns) { } static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) { - bool is_profiler_running = active_sampler_instance_state != NULL; + bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled; - return is_profiler_running ? ULL2NUM(allocation_count) : Qnil; + return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil; } // Implements memory-related profiling events. This function is called by Ruby via the `object_allocation_tracepoint` @@ -928,7 +928,7 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) // TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before // beta), and having something here allows us to test the rest of feature, sampling decision aside. - if (state->allocation_sample_every > 0 && ((allocation_count % state->allocation_sample_every) == 0)) { + if (allocation_count % state->allocation_sample_every == 0) { // Rescue against any exceptions that happen during sampling safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance); } diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 3b1c798260c..618cdb23f5d 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -1210,6 +1210,8 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in } } + track_object(state->recorder_instance, new_object, sample_weight); + trigger_sample_for_thread( state, /* thread: */ current_thread, diff --git a/ext/ddtrace_profiling_native_extension/extconf.rb b/ext/ddtrace_profiling_native_extension/extconf.rb index 2151f6ddb50..10ceafdb4f0 100644 --- a/ext/ddtrace_profiling_native_extension/extconf.rb +++ b/ext/ddtrace_profiling_native_extension/extconf.rb @@ -114,6 +114,11 @@ def add_compiler_flag(flag) add_compiler_flag '-Wall' add_compiler_flag '-Wextra' +if ENV['DDTRACE_DEBUG'] + CONFIG['optflags'] = '-O0' + CONFIG['debugflags'] = '-ggdb3' +end + if RUBY_PLATFORM.include?('linux') # Supposedly, the correct way to do this is # ``` diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.c b/ext/ddtrace_profiling_native_extension/heap_recorder.c new file mode 100644 index 00000000000..627567c3880 --- /dev/null +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.c @@ -0,0 +1,117 @@ +#include "heap_recorder.h" +#include +#include "ruby/st.h" +#include "ruby/util.h" +#include "ruby_helpers.h" +#include + +// Allows storing data passed to ::start_heap_allocation_recording to make it accessible to +// ::end_heap_allocation_recording. +// +// obj != Qnil flags this struct as holding a valid partial heap recording. +typedef struct { + VALUE obj; + live_object_data object_data; +} partial_heap_recording; + +struct heap_recorder { + // Data for a heap recording that was started but not yet ended + partial_heap_recording active_recording; +}; + +// ========================== +// Heap Recorder External API +// +// WARN: All these APIs should support receiving a NULL heap_recorder, resulting in a noop. +// +// WARN: Except for ::heap_recorder_for_each_live_object, we always assume interaction with these APIs +// happens under the GVL. +// +// ========================== +heap_recorder* heap_recorder_new(void) { + heap_recorder* recorder = ruby_xmalloc(sizeof(heap_recorder)); + + recorder->active_recording = (partial_heap_recording) { + .obj = Qnil, + .object_data = {0}, + }; + + return recorder; +} + +void heap_recorder_free(struct heap_recorder* recorder) { + if (recorder == NULL) { + return; + } + + ruby_xfree(recorder); +} + +// TODO: Remove when things get implemented +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" + +void heap_recorder_after_fork(heap_recorder *heap_recorder) { + if (heap_recorder == NULL) { + return; + } + + // TODO: Implement +} + +void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight) { + if (heap_recorder == NULL) { + return; + } + + heap_recorder->active_recording = (partial_heap_recording) { + .obj = new_obj, + .object_data = (live_object_data) { + .weight = weight, + }, + }; +} + +void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) { + if (heap_recorder == NULL) { + return; + } + + partial_heap_recording *active_recording = &heap_recorder->active_recording; + + VALUE new_obj = active_recording->obj; + if (new_obj == Qnil) { + // Recording ended without having been started? + rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started"); + } + + // From now on, mark active recording as invalid so we can short-circuit at any point and + // not end up with a still active recording. new_obj still holds the object for this recording + active_recording->obj = Qnil; + + // TODO: Implement +} + +void heap_recorder_flush(heap_recorder *heap_recorder) { + if (heap_recorder == NULL) { + return; + } + + // TODO: Implement +} + +// WARN: If with_gvl = False, NO HEAP ALLOCATIONS, EXCEPTIONS or RUBY CALLS ARE ALLOWED. +void heap_recorder_for_each_live_object( + heap_recorder *heap_recorder, + bool (*for_each_callback)(heap_recorder_iteration_data stack_data, void *extra_arg), + void *for_each_callback_extra_arg, + bool with_gvl) { + if (heap_recorder == NULL) { + return; + } + + // TODO: Implement +} + +// TODO: Remove when things get implemented +#pragma GCC diagnostic pop diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.h b/ext/ddtrace_profiling_native_extension/heap_recorder.h new file mode 100644 index 00000000000..add47d46734 --- /dev/null +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.h @@ -0,0 +1,91 @@ +#pragma once + +#include +#include + +// A heap recorder keeps track of a collection of live heap objects. +// +// All allocations observed by this recorder for which a corresponding free was +// not yet observed are deemed as alive and can be iterated on to produce a +// live heap profile. +// +// NOTE: All public APIs of heap_recorder support receiving a NULL heap_recorder +// in which case the behaviour will be a noop. +// +// WARN: Unless otherwise stated the heap recorder APIs assume calls are done +// under the GVL. +typedef struct heap_recorder heap_recorder; + +// Extra data associated with each live object being tracked. +typedef struct live_object_data { + // The weight of this object from a sampling perspective. + // + // A notion of weight is preserved for each tracked object to allow for an approximate + // extrapolation to an unsampled view. + // + // Example: If we were sampling every 50 objects, then each sampled object + // could be seen as being representative of 50 objects. + unsigned int weight; +} live_object_data; + +// Data that is made available to iterators of heap recorder data for each live object +// tracked therein. +typedef struct { + ddog_prof_Slice_Location locations; + live_object_data object_data; +} heap_recorder_iteration_data; + +// Initialize a new heap recorder. +heap_recorder* heap_recorder_new(void); + +// Free a previously initialized heap recorder. +void heap_recorder_free(heap_recorder *heap_recorder); + +// Do any cleanup needed after forking. +void heap_recorder_after_fork(heap_recorder *heap_recorder); + +// Start a heap allocation recording on the heap recorder for a new object. +// +// This heap allocation recording needs to be ended via ::end_heap_allocation_recording +// before it will become fully committed and able to be iterated on. +// +// @param new_obj +// The newly allocated Ruby object/value. +// @param weight +// The sampling weight of this object. +// +// WARN: It needs to be paired with a ::end_heap_allocation_recording call. +void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight); + +// End a previously started heap allocation recording on the heap recorder. +// +// It is at this point that an allocated object will become fully tracked and able to be iterated on. +// +// @param locations The stacktrace representing the location of the allocation. +// +// WARN: It is illegal to call this without previously having called ::start_heap_allocation_recording. +void end_heap_allocation_recording(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations); + +// Flush any intermediate state that might be queued inside the heap recorder. +// +// NOTE: This should usually be called before iteration to ensure data is as little stale as possible. +void heap_recorder_flush(heap_recorder *heap_recorder); + +// Iterate over each live object being tracked by the heap recorder. +// +// @param for_each_callback +// A callback function that shall be called for each live object being tracked +// by the heap recorder. Alongside the iteration_data for each live object, +// a second argument will be forwarded with the contents of the optional +// for_each_callback_extra_arg. Iteration will continue until the callback +// returns false or we run out of objects. +// @param for_each_callback_extra_arg +// Optional (NULL if empty) extra data that should be passed to the +// callback function alongside the data for each live tracked object. +// @param with_gvl +// True if we're calling this while holding the GVL, false otherwise. +void heap_recorder_for_each_live_object( + heap_recorder *heap_recorder, + bool (*for_each_callback)(heap_recorder_iteration_data data, void* extra_arg), + void *for_each_callback_extra_arg, + bool with_gvl); diff --git a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c index 43bd02c4a07..f6f8b69b1b9 100644 --- a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c +++ b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c @@ -40,3 +40,23 @@ ddog_CharSlice ruby_value_type_to_char_slice(enum ruby_value_type type) { default: return DDOG_CHARSLICE_C("BUG: Unknown value for ruby_value_type"); } } + +size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capacity) { + if (capacity == 0 || string == NULL) { + // short-circuit, we can't write anything + ddog_Error_drop(error); + return 0; + } + + ddog_CharSlice error_msg_slice = ddog_Error_message(error); + size_t error_msg_size = error_msg_slice.len; + // Account for extra null char for proper cstring + if (error_msg_size >= capacity) { + // Error message too big, lets truncate it to capacity - 1 to allow for extra null at end + error_msg_size = capacity - 1; + } + strncpy(string, error_msg_slice.ptr, error_msg_size); + string[error_msg_size] = '\0'; + ddog_Error_drop(error); + return error_msg_size; +} diff --git a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h index edd11e31954..7d4593fa550 100644 --- a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +++ b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h @@ -24,6 +24,11 @@ inline static VALUE get_error_details_and_drop(ddog_Error *error) { return result; } +// Utility function to be able to extract an error cstring from a ddog_Error. +// Returns the amount of characters written to string (which are necessarily +// bounded by capacity - 1 since the string will be null-terminated). +size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capacity); + // Used for pretty printing this Ruby enum. Returns "T_UNKNOWN_OR_MISSING_RUBY_VALUE_TYPE_ENTRY" for unknown elements. // In practice, there's a few types that the profiler will probably never encounter, but I've added all entries of // ruby_value_type that Ruby uses so that we can also use this for debugging. diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index a192a8d61ca..4918f8caea8 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -7,6 +7,7 @@ #include "libdatadog_helpers.h" #include "ruby_helpers.h" #include "time_helpers.h" +#include "heap_recorder.h" // Used to wrap a ddog_prof_Profile in a Ruby object and expose Ruby-level serialization APIs // This file implements the native bits of the Datadog::Profiling::StackRecorder class @@ -150,18 +151,23 @@ static VALUE error_symbol = Qnil; // :error in Ruby #define WALL_TIME_VALUE_ID 2 #define ALLOC_SAMPLES_VALUE {.type_ = VALUE_STRING("alloc-samples"), .unit = VALUE_STRING("count")} #define ALLOC_SAMPLES_VALUE_ID 3 +#define HEAP_SAMPLES_VALUE {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")} +#define HEAP_SAMPLES_VALUE_ID 4 -static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE}; +static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE}; // This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it // occupies on the all_value_types array. // E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types. -static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID}; +static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID}; #define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType)) // Contains native state for each instance struct stack_recorder_state { + // Heap recorder instance + heap_recorder *heap_recorder; + pthread_mutex_t slot_one_mutex; ddog_prof_Profile slot_one_profile; @@ -197,11 +203,11 @@ static VALUE _native_new(VALUE klass); static void initialize_slot_concurrency_control(struct stack_recorder_state *state); static void initialize_profiles(struct stack_recorder_state *state, ddog_prof_Slice_ValueType sample_types); static void stack_recorder_typed_data_free(void *data); -static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled); +static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled, VALUE heap_samples_enabled); static VALUE _native_serialize(VALUE self, VALUE recorder_instance); static VALUE ruby_time_from(ddog_Timespec ddprof_time); static void *call_serialize_without_gvl(void *call_args); -static struct active_slot_pair sampler_lock_active_profile(); +static struct active_slot_pair sampler_lock_active_profile(struct stack_recorder_state *state); static void sampler_unlock_active_profile(struct active_slot_pair active_slot); static ddog_prof_Profile *serializer_flip_active_and_inactive_slots(struct stack_recorder_state *state); static VALUE _native_active_slot(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); @@ -213,6 +219,8 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_ static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time); static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint); static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */); +static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight); + void stack_recorder_init(VALUE profiling_module) { VALUE stack_recorder_class = rb_define_class_under(profiling_module, "StackRecorder", rb_cObject); @@ -229,13 +237,14 @@ void stack_recorder_init(VALUE profiling_module) { // https://bugs.ruby-lang.org/issues/18007 for a discussion around this. rb_define_alloc_func(stack_recorder_class, _native_new); - rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 3); + rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 4); rb_define_singleton_method(stack_recorder_class, "_native_serialize", _native_serialize, 1); rb_define_singleton_method(stack_recorder_class, "_native_reset_after_fork", _native_reset_after_fork, 1); rb_define_singleton_method(testing_module, "_native_active_slot", _native_active_slot, 1); rb_define_singleton_method(testing_module, "_native_slot_one_mutex_locked?", _native_is_slot_one_mutex_locked, 1); rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1); rb_define_singleton_method(testing_module, "_native_record_endpoint", _native_record_endpoint, 3); + rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 3); ok_symbol = ID2SYM(rb_intern_const("ok")); error_symbol = ID2SYM(rb_intern_const("error")); @@ -270,6 +279,12 @@ static VALUE _native_new(VALUE klass) { VALUE stack_recorder = TypedData_Wrap_Struct(klass, &stack_recorder_typed_data, state); + // NOTE: We initialize this because we want a new recorder to be operational even without initialization and our + // default is everything enabled. However, if during recording initialization it turns out we don't want + // heap samples, we will free and reset heap_recorder to NULL, effectively disabling all behaviour specific + // to heap profiling (all calls to heap_recorder_* with a NULL heap recorder are noops). + state->heap_recorder = heap_recorder_new(); + // Note: Don't raise exceptions after this point, since it'll lead to libdatadog memory leaking! initialize_profiles(state, sample_types); @@ -318,23 +333,29 @@ static void stack_recorder_typed_data_free(void *state_ptr) { pthread_mutex_destroy(&state->slot_two_mutex); ddog_prof_Profile_drop(&state->slot_two_profile); + heap_recorder_free(state->heap_recorder); + ruby_xfree(state); } -static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled) { +static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled, VALUE heap_samples_enabled) { ENFORCE_BOOLEAN(cpu_time_enabled); ENFORCE_BOOLEAN(alloc_samples_enabled); + ENFORCE_BOOLEAN(heap_samples_enabled); struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); - if (cpu_time_enabled == Qtrue && alloc_samples_enabled == Qtrue) return Qtrue; // Nothing to do, this is the default + if (cpu_time_enabled == Qtrue && alloc_samples_enabled == Qtrue && heap_samples_enabled == Qtrue) return Qtrue; // Nothing to do, this is the default // When some sample types are disabled, we need to reconfigure libdatadog to record less types, // as well as reconfigure the position_for array to push the disabled types to the end so they don't get recorded. // See record_sample for details on the use of position_for. - state->enabled_values_count = ALL_VALUE_TYPES_COUNT - (cpu_time_enabled == Qtrue ? 0 : 1) - (alloc_samples_enabled == Qtrue? 0 : 1); + state->enabled_values_count = ALL_VALUE_TYPES_COUNT - + (cpu_time_enabled == Qtrue ? 0 : 1) - + (alloc_samples_enabled == Qtrue? 0 : 1) - + (heap_samples_enabled == Qtrue ? 0 : 1); ddog_prof_ValueType enabled_value_types[ALL_VALUE_TYPES_COUNT]; uint8_t next_enabled_pos = 0; @@ -362,6 +383,18 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_insta state->position_for[ALLOC_SAMPLES_VALUE_ID] = next_disabled_pos++; } + if (heap_samples_enabled == Qtrue) { + enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SAMPLES_VALUE; + state->position_for[HEAP_SAMPLES_VALUE_ID] = next_enabled_pos++; + } else { + state->position_for[HEAP_SAMPLES_VALUE_ID] = next_disabled_pos++; + + // Turns out heap sampling is disabled but we initialized everything in _native_new + // assuming all samples were enabled. We need to deinitialize the heap recorder. + heap_recorder_free(state->heap_recorder); + state->heap_recorder = NULL; + } + ddog_prof_Profile_drop(&state->slot_one_profile); ddog_prof_Profile_drop(&state->slot_two_profile); @@ -379,6 +412,10 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan // Need to do this while still holding on to the Global VM Lock; see comments on method for why serializer_set_start_timestamp_for_next_profile(state, finish_timestamp); + // Flush any pending data in the heap recorder prior to doing the iteration during serialization. + // This needs to happen while holding on to the GVL + heap_recorder_flush(state->heap_recorder); + // We'll release the Global VM Lock while we're calling serialize, so that the Ruby VM can continue to work while this // is pending struct call_serialize_without_gvl_arguments args = {.state = state, .finish_timestamp = finish_timestamp, .serialize_ran = false}; @@ -440,6 +477,14 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, metric_values[position_for[WALL_TIME_VALUE_ID]] = values.wall_time_ns; metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples; + if (values.alloc_samples != 0) { + // If we got an allocation sample end the heap allocation recording to commit the heap sample. + // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening + // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can + // be fixed with some refactoring but for now this leads to a less impactful change. + end_heap_allocation_recording(state->heap_recorder, locations); + } + ddog_prof_Profile_Result result = ddog_prof_Profile_add( active_slot.profile, (ddog_prof_Sample) { @@ -457,6 +502,15 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, } } +void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight) { + struct stack_recorder_state *state; + TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); + // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening + // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can + // be fixed with some refactoring but for now this leads to a less impactful change. + start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight); +} + void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); @@ -472,10 +526,73 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_ } } +#define MAX_LEN_HEAP_ITERATION_ERROR_MSG 256 + +// Heap recorder iteration context allows us access to stack recorder state and profile being serialized +// during iteration of heap recorder live objects. +typedef struct heap_recorder_iteration_context { + struct stack_recorder_state *state; + ddog_prof_Profile *profile; + bool error; + char error_msg[MAX_LEN_HEAP_ITERATION_ERROR_MSG]; +} heap_recorder_iteration_context; + +static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) { + heap_recorder_iteration_context *context = (heap_recorder_iteration_context*) extra_arg; + + int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0}; + uint8_t *position_for = context->state->position_for; + + metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = iteration_data.object_data.weight; + + ddog_prof_Profile_Result result = ddog_prof_Profile_add( + context->profile, + (ddog_prof_Sample) { + .locations = iteration_data.locations, + .values = (ddog_Slice_I64) {.ptr = metric_values, .len = context->state->enabled_values_count}, + .labels = {0}, + }, + 0 + ); + + if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { + read_ddogerr_string_and_drop(&result.err, context->error_msg, MAX_LEN_HEAP_ITERATION_ERROR_MSG); + context->error = true; + // By returning false we cancel the iteration + return false; + } + + // Keep on iterating to next item! + return true; +} + +static void build_heap_profile_without_gvl(struct stack_recorder_state *state, ddog_prof_Profile *profile) { + heap_recorder_iteration_context iteration_context = { + .state = state, + .profile = profile, + .error = false, + .error_msg = {0}, + }; + heap_recorder_for_each_live_object(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context, false); + if (iteration_context.error) { + // We wait until we're out of the iteration to grab the gvl and raise. This is important because during + // iteration we first grab the records_mutex and raising requires grabbing the GVL. When sampling, we are + // in the opposite situation: we have the GVL and may need to grab the records_mutex for mutation. This + // different ordering can lead to deadlocks. By delaying the raise here until after we no longer hold + // records_mutex, we prevent this different-lock-acquisition-order situation. + grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: %s", iteration_context.error_msg); + } +} + static void *call_serialize_without_gvl(void *call_args) { struct call_serialize_without_gvl_arguments *args = (struct call_serialize_without_gvl_arguments *) call_args; args->profile = serializer_flip_active_and_inactive_slots(args->state); + + // Now that we have the inactive profile with all but heap samples, lets fill it with heap data + // without needing to race with the active sampler + build_heap_profile_without_gvl(args->state, args->profile); + // Note: The profile gets reset by the serialize call args->result = ddog_prof_Profile_serialize(args->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */); args->serialize_ran = true; @@ -597,6 +714,8 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_ reset_profile(&state->slot_one_profile, /* start_time: */ NULL); reset_profile(&state->slot_two_profile, /* start_time: */ NULL); + heap_recorder_after_fork(state->heap_recorder); + return Qtrue; } @@ -614,6 +733,12 @@ static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_ return Qtrue; } +static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight) { + ENFORCE_TYPE(weight, T_FIXNUM); + track_object(recorder_instance, new_obj, NUM2UINT(weight)); + return Qtrue; +} + static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */) { ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(profile, start_time); if (reset_result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.h b/ext/ddtrace_profiling_native_extension/stack_recorder.h index 2e871069019..9190cc89a25 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.h +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.h @@ -1,6 +1,7 @@ #pragma once #include +#include typedef struct { int64_t cpu_time_ns; @@ -21,4 +22,5 @@ typedef struct sample_labels { void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels); void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint); +void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight); VALUE enforce_recorder_instance(VALUE object); diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index 558e45cabef..43f6f259a69 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -314,32 +314,58 @@ def initialize(*_) # Can be used to enable/disable the Datadog::Profiling.allocation_count feature. # - # This feature is safe and enabled by default on Ruby 2.x, but has a few caveats on Ruby 3.x. + # This feature is now controlled via {:experimental_allocation_enabled} + option :allocation_counting_enabled do |o| + o.after_set do + Datadog.logger.warn( + 'The profiling.advanced.allocation_counting_enabled setting has been deprecated for removal and no ' \ + 'longer does anything. Please remove it from your Datadog.configure block. ' \ + 'Allocation counting is now controlled by the `experimental_allocation_enabled` setting instead.' + ) + end + end + + # Can be used to enable/disable collection of allocation profiles. # - # Caveat 1 (severe): - # On Ruby versions 3.0 (all), 3.1.0 to 3.1.3, and 3.2.0 to 3.2.2 this is disabled by default because it - # can trigger a VM bug that causes a segmentation fault during garbage collection of Ractors - # (https://bugs.ruby-lang.org/issues/18464). We don't recommend using this feature on such Rubies. - # This bug is fixed on Ruby versions 3.1.4, 3.2.3 and 3.3.0. + # This feature is alpha and disabled by default # - # Caveat 2 (annoyance): - # On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets - # garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on. - # Thus this feature is only usable if you're not using Ractors. + # @warn This feature is not supported/safe in all Rubies. Details in {Datadog::Profiling::Component} but + # in summary, this should be supported on Ruby 2.x, 3.1.4+, 3.2.3+ and 3.3.0+. Enabling it on + # unsupported Rubies may result in unexpected behaviour, including crashes. # - # Caveat 3 (severe): - # Ruby 3.2.0 to 3.2.2 have a bug in the newobj tracepoint (https://bugs.ruby-lang.org/issues/19482, - # https://github.com/ruby/ruby/pull/7464) so that's an extra reason why it's not safe on those Rubies. - # This bug is fixed on Ruby versions 3.2.3 and 3.3.0. + # @note Allocation profiles are not yet GA in the Datadog UI, get in touch if you want to help us test it. # - # @default `true` on Ruby 2.x and 3.1.4+, 3.2.3+ and 3.3.0+; `false` for Ruby 3.0 and unpatched Rubies. - option :allocation_counting_enabled do |o| - o.default do - RUBY_VERSION.start_with?('2.') || - (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION >= '3.1.4') || - (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION >= '3.2.3') || - RUBY_VERSION >= '3.3.' - end + # @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED` environment variable as a boolean, otherwise `false` + option :experimental_allocation_enabled do |o| + o.type :bool + o.env 'DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED' + o.default false + end + + # Can be used to enable/disable the collection of heap profiles. + # + # This feature is alpha and disabled by default + # + # @warn To enable heap profiling you are required to also enable allocation profiling. + # @note Heap profiles are not yet GA in the Datadog UI, get in touch if you want to help us test it. + # + # @default `DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED` environment variable as a boolean, otherwise `false` + option :experimental_heap_enabled do |o| + o.type :bool + o.env 'DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED' + o.default false + end + + # Can be used to configure the allocation sampling rate: a sample will be collected every x allocations. + # + # The lower the value, the more accuracy in allocation and heap tracking but the bigger the overhead. In + # particular, a value of 1 will sample ALL allocations. + # + # @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE` environment variable, otherwise `50`. + option :experimental_allocation_sample_rate do |o| + o.type :int + o.env 'DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE' + o.default 50 end # Can be used to disable checking which version of `libmysqlclient` is being used by the `mysql2` gem. diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb index d1a06a13964..3507a53dd56 100644 --- a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +++ b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb @@ -15,15 +15,15 @@ class CpuAndWallTimeWorker def initialize( gc_profiling_enabled:, - allocation_counting_enabled:, no_signals_workaround_enabled:, thread_context_collector:, dynamic_sampling_rate_overhead_target_percentage:, - idle_sampling_helper: IdleSamplingHelper.new, + allocation_sample_every:, + allocation_profiling_enabled:, # **NOTE**: This should only be used for testing; disabling the dynamic sampling rate will increase the # profiler overhead! dynamic_sampling_rate_enabled: true, - allocation_sample_every: 0 # Currently only for testing; Setting this to > 0 can add a lot of overhead! + idle_sampling_helper: IdleSamplingHelper.new ) unless dynamic_sampling_rate_enabled Datadog.logger.warn( @@ -31,23 +31,16 @@ def initialize( ) end - if allocation_counting_enabled && allocation_sample_every > 0 - Datadog.logger.warn( - "Enabled experimental allocation profiling: allocation_sample_every=#{allocation_sample_every}. This is " \ - 'experimental, not recommended, and will increase overhead!' - ) - end - self.class._native_initialize( self, thread_context_collector, gc_profiling_enabled, idle_sampling_helper, - allocation_counting_enabled, no_signals_workaround_enabled, dynamic_sampling_rate_enabled, dynamic_sampling_rate_overhead_target_percentage, allocation_sample_every, + allocation_profiling_enabled, ) @worker_thread = nil @failure_exception = nil diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index 5c50d602c94..228fd01f7a9 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -7,7 +7,7 @@ module Component # Passing in a `nil` tracer is supported and will disable the following profiling features: # * Code Hotspots panel in the trace viewer, as well as scoping a profile down to a span # * Endpoint aggregation in the profiler UX, including normalization (resource per endpoint call) - def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) # rubocop:disable Metrics/MethodLength + def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) require_relative '../profiling/diagnostics/environment_logger' Profiling::Diagnostics::EnvironmentLogger.collect_and_log! @@ -41,32 +41,28 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) no_signals_workaround_enabled = no_signals_workaround_enabled?(settings) timeline_enabled = settings.profiling.advanced.experimental_timeline_enabled + allocation_sample_every = get_allocation_sample_every(settings) + allocation_profiling_enabled = enable_allocation_profiling?(settings, allocation_sample_every) + heap_profiling_enabled = enable_heap_profiling?(settings, allocation_profiling_enabled) + overhead_target_percentage = valid_overhead_target(settings.profiling.advanced.overhead_target_percentage) upload_period_seconds = [60, settings.profiling.advanced.upload_period_seconds].max - recorder = Datadog::Profiling::StackRecorder.new( - cpu_time_enabled: RUBY_PLATFORM.include?('linux'), # Only supported on Linux currently - alloc_samples_enabled: false, # Always disabled for now -- work in progress - ) - thread_context_collector = Datadog::Profiling::Collectors::ThreadContext.new( - recorder: recorder, - max_frames: settings.profiling.advanced.max_frames, - tracer: optional_tracer, - endpoint_collection_enabled: settings.profiling.advanced.endpoint.collection.enabled, - timeline_enabled: timeline_enabled, - ) + recorder = build_recorder(allocation_profiling_enabled, heap_profiling_enabled) + thread_context_collector = build_thread_context_collector(settings, recorder, optional_tracer, timeline_enabled) worker = Datadog::Profiling::Collectors::CpuAndWallTimeWorker.new( gc_profiling_enabled: enable_gc_profiling?(settings), - allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled, no_signals_workaround_enabled: no_signals_workaround_enabled, thread_context_collector: thread_context_collector, dynamic_sampling_rate_overhead_target_percentage: overhead_target_percentage, - allocation_sample_every: 0, + allocation_sample_every: allocation_sample_every, + allocation_profiling_enabled: allocation_profiling_enabled, ) internal_metadata = { no_signals_workaround_enabled: no_signals_workaround_enabled, timeline_enabled: timeline_enabled, + allocation_sample_every: allocation_sample_every, }.freeze exporter = build_profiler_exporter(settings, recorder, internal_metadata: internal_metadata) @@ -76,6 +72,24 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) Profiling::Profiler.new(worker: worker, scheduler: scheduler) end + private_class_method def self.build_recorder(allocation_profiling_enabled, heap_profiling_enabled) + Datadog::Profiling::StackRecorder.new( + cpu_time_enabled: RUBY_PLATFORM.include?('linux'), # Only supported on Linux currently + alloc_samples_enabled: allocation_profiling_enabled, + heap_samples_enabled: heap_profiling_enabled, + ) + end + + private_class_method def self.build_thread_context_collector(settings, recorder, optional_tracer, timeline_enabled) + Datadog::Profiling::Collectors::ThreadContext.new( + recorder: recorder, + max_frames: settings.profiling.advanced.max_frames, + tracer: optional_tracer, + endpoint_collection_enabled: settings.profiling.advanced.endpoint.collection.enabled, + timeline_enabled: timeline_enabled, + ) + end + private_class_method def self.build_profiler_exporter(settings, recorder, internal_metadata:) code_provenance_collector = (Profiling::Collectors::CodeProvenance.new if settings.profiling.advanced.code_provenance_enabled) @@ -113,6 +127,85 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) end end + private_class_method def self.get_allocation_sample_every(settings) + allocation_sample_rate = settings.profiling.advanced.experimental_allocation_sample_rate + + if allocation_sample_rate <= 0 + raise ArgumentError, "Allocation sample rate must be a positive integer. Was #{allocation_sample_rate}" + end + + allocation_sample_rate + end + + private_class_method def self.enable_allocation_profiling?(settings, allocation_sample_every) + unless settings.profiling.advanced.experimental_allocation_enabled + # Allocation profiling disabled, short-circuit out + return false + end + + # Allocation sampling is safe and supported on Ruby 2.x, but has a few caveats on Ruby 3.x. + + # SEVERE - All configurations + # Ruby 3.2.0 to 3.2.2 have a bug in the newobj tracepoint (https://bugs.ruby-lang.org/issues/19482, + # https://github.com/ruby/ruby/pull/7464) that makes this crash in any configuration. This bug is + # fixed on Ruby versions 3.2.3 and 3.3.0. + if RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION < '3.2.3' + Datadog.logger.warn( + 'Allocation profiling is not supported in Ruby versions 3.2.0, 3.2.1 and 3.2.2 and will be forcibly '\ + 'disabled. This is due to a VM bug that can lead to crashes (https://bugs.ruby-lang.org/issues/19482). '\ + 'Other Ruby versions do not suffer from this issue.' + ) + return false + end + + # SEVERE - Only with Ractors + # On Ruby versions 3.0 (all), 3.1.0 to 3.1.3, and 3.2.0 to 3.2.2 allocation profiling can trigger a VM bug + # that causes a segmentation fault during garbage collection of Ractors + # (https://bugs.ruby-lang.org/issues/18464). We don't recommend using this feature on such Rubies. + # This bug is fixed on Ruby versions 3.1.4, 3.2.3 and 3.3.0. + if RUBY_VERSION.start_with?('3.0.') || + (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION < '3.1.4') || + (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION < '3.2.3') + Datadog.logger.warn( + "Current Ruby version (#{RUBY_VERSION}) has a VM bug where enabling allocation profiling while using "\ + 'Ractors may cause unexpected issues, including crashes (https://bugs.ruby-lang.org/issues/18464). '\ + 'This does not happen if Ractors are not used.' + ) + # ANNOYANCE - Only with Ractors + # On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets + # garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on. + elsif RUBY_VERSION.start_with?('3.') + Datadog.logger.warn( + 'In all known versions of Ruby 3.x, using Ractors may result in allocation profiling unexpectedly ' \ + 'stopping (https://bugs.ruby-lang.org/issues/19112). Note that this stop has no impact in your ' \ + 'application stability or performance. This does not happen if Ractors are not used.' + ) + end + + Datadog.logger.warn( + "Enabled experimental allocation profiling: allocation_sample_rate=#{allocation_sample_every}. This is " \ + 'experimental, not recommended, and will increase overhead!' + ) + + true + end + + private_class_method def self.enable_heap_profiling?(settings, allocation_profiling_enabled) + heap_profiling_enabled = settings.profiling.advanced.experimental_heap_enabled + + if heap_profiling_enabled && !allocation_profiling_enabled + raise ArgumentError, 'Heap profiling requires allocation profiling to be enabled' + end + + if heap_profiling_enabled + Datadog.logger.warn( + 'Enabled experimental heap profiling. This is experimental, not recommended, and will increase overhead!' + ) + end + + heap_profiling_enabled + end + private_class_method def self.no_signals_workaround_enabled?(settings) # rubocop:disable Metrics/MethodLength setting_value = settings.profiling.advanced.no_signals_workaround_enabled legacy_ruby_that_should_use_workaround = RUBY_VERSION.start_with?('2.3.', '2.4.', '2.5.') diff --git a/lib/datadog/profiling/stack_recorder.rb b/lib/datadog/profiling/stack_recorder.rb index ccbe710dafd..3153a4982c5 100644 --- a/lib/datadog/profiling/stack_recorder.rb +++ b/lib/datadog/profiling/stack_recorder.rb @@ -4,7 +4,7 @@ module Profiling # Note that `record_sample` is only accessible from native code. # Methods prefixed with _native_ are implemented in `stack_recorder.c` class StackRecorder - def initialize(cpu_time_enabled:, alloc_samples_enabled:) + def initialize(cpu_time_enabled:, alloc_samples_enabled:, heap_samples_enabled:) # This mutex works in addition to the fancy C-level mutexes we have in the native side (see the docs there). # It prevents multiple Ruby threads calling serialize at the same time -- something like # `10.times { Thread.new { stack_recorder.serialize } }`. @@ -13,7 +13,7 @@ def initialize(cpu_time_enabled:, alloc_samples_enabled:) # accidentally happening. @no_concurrent_synchronize_mutex = Mutex.new - self.class._native_initialize(self, cpu_time_enabled, alloc_samples_enabled) + self.class._native_initialize(self, cpu_time_enabled, alloc_samples_enabled, heap_samples_enabled) end def serialize diff --git a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs index f819fcdbdff..cc887d59ce8 100644 --- a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs +++ b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs @@ -9,13 +9,13 @@ module Datadog def initialize: ( gc_profiling_enabled: bool, - allocation_counting_enabled: bool, no_signals_workaround_enabled: bool, thread_context_collector: Datadog::Profiling::Collectors::ThreadContext, dynamic_sampling_rate_overhead_target_percentage: Float, ?idle_sampling_helper: Datadog::Profiling::Collectors::IdleSamplingHelper, ?dynamic_sampling_rate_enabled: bool, - ?allocation_sample_every: Integer, + allocation_sample_every: Integer, + allocation_profiling_enabled: bool, ) -> void def self._native_initialize: ( @@ -23,11 +23,11 @@ module Datadog ThreadContext thread_context_collector, bool gc_profiling_enabled, IdleSamplingHelper idle_sampling_helper, - bool allocation_counting_enabled, bool no_signals_workaround_enabled, bool dynamic_sampling_rate_enabled, Float dynamic_sampling_rate_overhead_target_percentage, ::Integer allocation_sample_every, + bool allocation_profiling_enabled, ) -> true def start: (?on_failure_proc: ::Proc?) -> bool? diff --git a/sig/datadog/profiling/component.rbs b/sig/datadog/profiling/component.rbs index 675143e397c..2b32dbe88fe 100644 --- a/sig/datadog/profiling/component.rbs +++ b/sig/datadog/profiling/component.rbs @@ -7,6 +7,18 @@ module Datadog optional_tracer: Datadog::Tracing::Tracer?, ) -> Datadog::Profiling::Profiler? + def self.build_recorder: ( + bool allocation_profiling_enabled, + bool heap_profiling_enabled, + ) -> Datadog::Profiling::StackRecorder + + def self.build_thread_context_collector: ( + untyped settings, + Datadog::Profiling::StackRecorder recorder, + Datadog::Tracing::Tracer? optional_tracer, + bool timeline_enabled, + ) -> Datadog::Profiling::Collectors::ThreadContext + def self.build_profiler_exporter: ( untyped settings, Datadog::Profiling::StackRecorder recorder, @@ -19,6 +31,9 @@ module Datadog ) -> untyped def self.enable_gc_profiling?: (untyped settings) -> bool + def self.get_allocation_sample_every: (untyped settings) -> ::Integer + def self.enable_allocation_profiling?: (untyped settings, ::Integer allocation_sample_every) -> bool + def self.enable_heap_profiling?: (untyped settings, bool allocation_profiling_enabled) -> bool def self.no_signals_workaround_enabled?: (untyped settings) -> bool diff --git a/sig/datadog/profiling/stack_recorder.rbs b/sig/datadog/profiling/stack_recorder.rbs index a3ff67b7e38..0f61f9c8ab9 100644 --- a/sig/datadog/profiling/stack_recorder.rbs +++ b/sig/datadog/profiling/stack_recorder.rbs @@ -3,12 +3,13 @@ module Datadog class StackRecorder @no_concurrent_synchronize_mutex: ::Thread::Mutex - def initialize: (cpu_time_enabled: bool, alloc_samples_enabled: bool) -> void + def initialize: (cpu_time_enabled: bool, alloc_samples_enabled: bool, heap_samples_enabled: bool) -> void def self._native_initialize: ( Datadog::Profiling::StackRecorder recorder_instance, bool cpu_time_enabled, bool alloc_samples_enabled, + bool heap_samples_enabled, ) -> true def serialize: () -> untyped diff --git a/spec/datadog/core/configuration/settings_spec.rb b/spec/datadog/core/configuration/settings_spec.rb index a1e180c83c0..743a5294241 100644 --- a/spec/datadog/core/configuration/settings_spec.rb +++ b/spec/datadog/core/configuration/settings_spec.rb @@ -497,39 +497,116 @@ end end - describe '#allocation_counting_enabled' do - subject(:allocation_counting_enabled) { settings.profiling.advanced.allocation_counting_enabled } - - before { stub_const('RUBY_VERSION', testing_version) } + describe '#allocation_counting_enabled=' do + it 'logs a warning informing customers this no longer does anything' do + expect(Datadog.logger).to receive(:warn).with(/no longer does anything/) - context 'on Ruby 2.x' do - let(:testing_version) { '2.3.0 ' } - it { is_expected.to be true } + settings.profiling.advanced.allocation_counting_enabled = false end + end + + describe '#experimental_allocation_enabled' do + subject(:experimental_allocation_enabled) { settings.profiling.advanced.experimental_allocation_enabled } + + context 'when DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED' do + around do |example| + ClimateControl.modify('DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED' => environment) do + example.run + end + end + + context 'is not defined' do + let(:environment) { nil } - ['3.0.0', '3.1.0', '3.1.3', '3.2.0', '3.2.2'].each do |broken_ruby| - context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/18464 (#{broken_ruby})" do - let(:testing_version) { broken_ruby } it { is_expected.to be false } end + + [true, false].each do |value| + context "is defined as #{value}" do + let(:environment) { value.to_s } + + it { is_expected.to be value } + end + end end + end - ['3.1.4', '3.2.3', '3.3.0'].each do |fixed_ruby| - context "on a Ruby 3 version where https://bugs.ruby-lang.org/issues/18464 is fixed (#{fixed_ruby})" do - let(:testing_version) { fixed_ruby } - it { is_expected.to be true } + describe '#experimental_allocation_enabled=' do + it 'updates the #experimental_allocation_enabled setting' do + expect { settings.profiling.advanced.experimental_allocation_enabled = true } + .to change { settings.profiling.advanced.experimental_allocation_enabled } + .from(false) + .to(true) + end + end + + describe '#experimental_heap_enabled' do + subject(:experimental_heap_enabled) { settings.profiling.advanced.experimental_heap_enabled } + + context 'when DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED' do + around do |example| + ClimateControl.modify('DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED' => environment) do + example.run + end + end + + context 'is not defined' do + let(:environment) { nil } + + it { is_expected.to be false } + end + + [true, false].each do |value| + context "is defined as #{value}" do + let(:environment) { value.to_s } + + it { is_expected.to be value } + end end end end - describe '#allocation_counting_enabled=' do - it 'updates the #allocation_counting_enabled setting' do - settings.profiling.advanced.allocation_counting_enabled = true + describe '#experimental_heap_enabled=' do + it 'updates the #experimental_heap_enabled setting' do + expect { settings.profiling.advanced.experimental_heap_enabled = true } + .to change { settings.profiling.advanced.experimental_heap_enabled } + .from(false) + .to(true) + end + end - expect { settings.profiling.advanced.allocation_counting_enabled = false } - .to change { settings.profiling.advanced.allocation_counting_enabled } - .from(true) - .to(false) + describe '#experimental_allocation_sample_rate' do + subject(:experimental_allocation_sample_rate) { settings.profiling.advanced.experimental_allocation_sample_rate } + + context 'when DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE' do + around do |example| + ClimateControl.modify('DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE' => environment) do + example.run + end + end + + context 'is not defined' do + let(:environment) { nil } + + it { is_expected.to be 50 } + end + + [100, 30.5].each do |value| + context "is defined as #{value}" do + let(:environment) { value.to_s } + + it { is_expected.to be value.to_i } + end + end + end + end + + describe '#experimental_allocation_sample_rate=' do + it 'updates the #experimental_allocation_sample_rate setting' do + expect { settings.profiling.advanced.experimental_allocation_sample_rate = 100 } + .to change { settings.profiling.advanced.experimental_allocation_sample_rate } + .from(50) + .to(100) end end diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index 43c63adfc24..21888900e7c 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -7,20 +7,24 @@ # drops support for these Rubies globally let(:described_class) { Datadog::Profiling::Collectors::CpuAndWallTimeWorker } - let(:recorder) { build_stack_recorder } let(:endpoint_collection_enabled) { true } let(:gc_profiling_enabled) { true } - let(:allocation_counting_enabled) { true } + let(:allocation_sample_every) { 50 } + let(:allocation_profiling_enabled) { false } + let(:heap_profiling_enabled) { false } + let(:recorder) { build_stack_recorder(heap_samples_enabled: heap_profiling_enabled) } let(:no_signals_workaround_enabled) { false } let(:timeline_enabled) { false } let(:options) { {} } let(:worker_settings) do { gc_profiling_enabled: gc_profiling_enabled, - allocation_counting_enabled: allocation_counting_enabled, no_signals_workaround_enabled: no_signals_workaround_enabled, thread_context_collector: build_thread_context_collector(recorder), dynamic_sampling_rate_overhead_target_percentage: 2.0, + allocation_sample_every: allocation_sample_every, + allocation_profiling_enabled: allocation_profiling_enabled, + **options } end @@ -57,6 +61,8 @@ end describe '#start' do + let(:expected_worker_initialization_error) { nil } + subject(:start) do cpu_and_wall_time_worker.start wait_until_running @@ -442,25 +448,22 @@ end end - context 'when allocation sampling is enabled' do - let(:options) { { allocation_sample_every: 1 } } + context 'when allocation profiling is enabled' do + let(:allocation_profiling_enabled) { true } + let(:test_num_allocated_object) { 123 } + # Sample all allocations to get easy-to-reason about numbers that are also not flaky in CI. + let(:allocation_sample_every) { 1 } before do allow(Datadog.logger).to receive(:warn) end - it 'logs a warning message mentioning this is experimental' do - expect(Datadog.logger).to receive(:warn).with(/Enabled experimental allocation profiling/) - - start - end - it 'records allocated objects' do stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) start - 123.times { CpuAndWallTimeWorkerSpec::TestStruct.new } + test_num_allocated_object.times { CpuAndWallTimeWorkerSpec::TestStruct.new } allocation_line = __LINE__ - 1 cpu_and_wall_time_worker.stop @@ -469,7 +472,7 @@ samples_for_thread(samples_from_pprof(recorder.serialize!), Thread.current) .find { |s| s.labels[:'allocation class'] == 'CpuAndWallTimeWorkerSpec::TestStruct' } - expect(allocation_sample.values).to include(:'alloc-samples' => 123) + expect(allocation_sample.values).to include(:'alloc-samples' => test_num_allocated_object) expect(allocation_sample.locations.first.lineno).to eq allocation_line end @@ -540,7 +543,7 @@ end context 'when allocation sampling is disabled' do - let(:options) { { allocation_sample_every: 0 } } + let(:allocation_profiling_enabled) { false } it 'does not record allocations' do stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) @@ -555,6 +558,52 @@ end end + context 'when heap profiling is enabled' do + let(:allocation_profiling_enabled) { true } + let(:heap_profiling_enabled) { true } + let(:test_num_allocated_object) { 123 } + # Sample all allocations to get easy-to-reason about numbers that are also not flaky in CI. + let(:allocation_sample_every) { 1 } + + before do + allow(Datadog.logger).to receive(:warn) + end + + it 'records live heap objects' do + pending "heap profiling isn't actually implemented just yet" + + stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) + + start + + test_num_allocated_object.times { CpuAndWallTimeWorkerSpec::TestStruct.new } + allocation_line = __LINE__ - 1 + + cpu_and_wall_time_worker.stop + + relevant_sample = samples_for_thread(samples_from_pprof(recorder.serialize!), Thread.current) + .find { |s| s.locations.first.lineno == allocation_line && s.locations.first.path == __FILE__ } + + expect(relevant_sample.values[':heap-live-samples']).to eq test_num_allocated_object + end + end + + context 'when heap profiling is disabled' do + let(:heap_profiling_enabled) { false } + + it 'does not record heap samples' do + stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) + + start + + 123.times { CpuAndWallTimeWorkerSpec::TestStruct.new } + + cpu_and_wall_time_worker.stop + + expect(samples_from_pprof(recorder.serialize!).select { |s| s.values.key?(:'heap-live-samples') }).to be_empty + end + end + context 'Process::Waiter crash regression tests' do # On Ruby 2.3 to 2.6, there's a crash when accessing instance variables of the `process_waiter_thread`, # see https://bugs.ruby-lang.org/issues/17807 . @@ -633,7 +682,7 @@ # so this must be disabled when interacting with Ractors. let(:gc_profiling_enabled) { false } # ...same thing for the tracepoint for allocation counting/profiling :( - let(:allocation_counting_enabled) { false } + let(:allocation_profiling_enabled) { false } describe 'handle_sampling_signal' do include_examples 'does not trigger a sample', @@ -815,51 +864,68 @@ cpu_and_wall_time_worker.stop end - it 'returns the number of allocations between two calls of the method' do - # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all - # it needs to - new_object = proc { Object.new } - 1.times(&new_object) + context 'when allocation profiling is enabled' do + let(:allocation_profiling_enabled) { true } - before_allocations = described_class._native_allocation_count - 100.times(&new_object) - after_allocations = described_class._native_allocation_count + it 'always returns a >= 0 value' do + expect(described_class._native_allocation_count).to be >= 0 + end - expect(after_allocations - before_allocations).to be 100 - end + it 'returns the number of allocations between two calls of the method' do + # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all + # it needs to + new_object = proc { Object.new } + 1.times(&new_object) - it 'returns different numbers of allocations for different threads' do - # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all - # it needs to - new_object = proc { Object.new } - 1.times(&new_object) + before_allocations = described_class._native_allocation_count + 100.times(&new_object) + after_allocations = described_class._native_allocation_count - t1_can_run = Queue.new - t1_has_run = Queue.new - before_t1 = nil - after_t1 = nil + expect(after_allocations - before_allocations).to be 100 + end - background_t1 = Thread.new do - before_t1 = described_class._native_allocation_count - t1_can_run.pop + it 'returns different numbers of allocations for different threads' do + # To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all + # it needs to + new_object = proc { Object.new } + 1.times(&new_object) - 100.times(&new_object) - after_t1 = described_class._native_allocation_count - t1_has_run << true - end + t1_can_run = Queue.new + t1_has_run = Queue.new + before_t1 = nil + after_t1 = nil + + background_t1 = Thread.new do + before_t1 = described_class._native_allocation_count + t1_can_run.pop + + 100.times(&new_object) + after_t1 = described_class._native_allocation_count + t1_has_run << true + end + + before_allocations = described_class._native_allocation_count + t1_can_run << true + t1_has_run.pop + after_allocations = described_class._native_allocation_count + + background_t1.join - before_allocations = described_class._native_allocation_count - t1_can_run << true - t1_has_run.pop - after_allocations = described_class._native_allocation_count + # This test checks that even though we observed 100 allocations in a background thread t1, the counters for + # the current thread were not affected by this change - background_t1.join + expect(after_t1 - before_t1).to be 100 + expect(after_allocations - before_allocations).to be < 10 + end + end - # This test checks that even though we observed 100 allocations in a background thread t1, the counters for - # the current thread were not affected by this change + context 'when allocation profiling is disabled' do + let(:allocation_profiling_enabled) { false } + it 'always returns a nil value' do + 100.times { Object.new } - expect(after_t1 - before_t1).to be 100 - expect(after_allocations - before_allocations).to be < 10 + expect(described_class._native_allocation_count).to be nil + end end end end diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index 7d7a28d4931..77ac09732b4 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -80,11 +80,11 @@ expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with( gc_profiling_enabled: anything, - allocation_counting_enabled: anything, no_signals_workaround_enabled: :no_signals_result, thread_context_collector: instance_of(Datadog::Profiling::Collectors::ThreadContext), dynamic_sampling_rate_overhead_target_percentage: :overhead_target_percentage_config, - allocation_sample_every: 0, + allocation_sample_every: kind_of(Integer), + allocation_profiling_enabled: false, ) build_profiler_component @@ -124,29 +124,153 @@ end end - context 'when allocation_counting_enabled is enabled' do + context 'when allocation profiling is enabled' do before do - settings.profiling.advanced.allocation_counting_enabled = true + settings.profiling.advanced.experimental_allocation_enabled = true + stub_const('RUBY_VERSION', testing_version) end - it 'initializes a CpuAndWallTimeWorker collector with allocation_counting_enabled set to true' do + context 'on Ruby 2.x' do + let(:testing_version) { '2.3.0 ' } + + it 'initializes CpuAndWallTimeWorker and StackRecorder with allocation sampling support and warns' do + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( + allocation_profiling_enabled: true, + ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: true)) + .and_call_original + + expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) + expect(Datadog.logger).to_not receive(:warn).with(/Ractor/) + + build_profiler_component + end + end + + ['3.2.0', '3.2.1', '3.2.2'].each do |broken_ruby| + context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/19482 (#{broken_ruby})" do + let(:testing_version) { broken_ruby } + + it 'initializes a CpuAndWallTimeWorker and StackRecorder with allocation sampling force-disabled and warns' do + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( + allocation_profiling_enabled: false, + ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: false)) + .and_call_original + + expect(Datadog.logger).to receive(:warn).with(/forcibly disabled/) + expect(Datadog.logger).to_not receive(:warn).with(/Ractor/) + expect(Datadog.logger).to_not receive(:warn).with(/experimental allocation profiling/) + + build_profiler_component + end + end + end + + ['3.0.0', '3.1.0', '3.1.3'].each do |broken_ractors_ruby| + context "on a Ruby 3 version affected by https://bugs.ruby-lang.org/issues/18464 (#{broken_ractors_ruby})" do + let(:testing_version) { broken_ractors_ruby } + + it 'initializes CpuAndWallTimeWorker and StackRecorder with allocation sampling support and warns' do + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( + allocation_profiling_enabled: true, + ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: true)) + .and_call_original + + expect(Datadog.logger).to receive(:warn).with(/Ractors.+crashes/) + expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) + + build_profiler_component + end + end + end + + ['3.1.4', '3.2.3', '3.3.0'].each do |fixed_ruby| + context "on a Ruby 3 version where https://bugs.ruby-lang.org/issues/18464 is fixed (#{fixed_ruby})" do + let(:testing_version) { fixed_ruby } + it 'initializes CpuAndWallTimeWorker and StackRecorder with allocation sampling support and warns' do + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( + allocation_profiling_enabled: true, + ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: true)) + .and_call_original + + expect(Datadog.logger).to receive(:warn).with(/Ractors.+stopping/) + expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) + + build_profiler_component + end + end + end + end + + context 'when allocation profiling is disabled' do + before do + settings.profiling.advanced.experimental_allocation_enabled = false + end + + it 'initializes CpuAndWallTimeWorker and StackRecorder without allocation sampling support' do expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( - allocation_counting_enabled: true, + allocation_profiling_enabled: false, ) + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(alloc_samples_enabled: false)) + .and_call_original build_profiler_component end end - context 'when allocation_counting_enabled is disabled' do + context 'when heap profiling is enabled' do before do - settings.profiling.advanced.allocation_counting_enabled = false + settings.profiling.advanced.experimental_heap_enabled = true + # Universally supported ruby version for allocation profiling, we don't want to test those + # edge cases here + stub_const('RUBY_VERSION', '2.7.2') end - it 'initializes a CpuAndWallTimeWorker collector with allocation_counting_enabled set to false' do - expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with hash_including( - allocation_counting_enabled: false, - ) + context 'and allocation profiling disabled' do + before do + settings.profiling.advanced.experimental_allocation_enabled = false + end + + it 'raises an ArgumentError during component initialization' do + expect { build_profiler_component }.to raise_error(ArgumentError, /requires allocation profiling/) + end + end + + context 'and allocation profiling enabled and supported' do + before do + settings.profiling.advanced.experimental_allocation_enabled = true + end + + it 'initializes StackRecorder with heap sampling support and warns' do + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(heap_samples_enabled: true)) + .and_call_original + + expect(Datadog.logger).to receive(:warn).with(/experimental allocation profiling/) + expect(Datadog.logger).to receive(:warn).with(/experimental heap profiling/) + + build_profiler_component + end + end + end + + context 'when heap profiling is disabled' do + before do + settings.profiling.advanced.experimental_heap_enabled = false + end + + it 'initializes StackRecorder without heap sampling support' do + expect(Datadog::Profiling::StackRecorder).to receive(:new) + .with(hash_including(heap_samples_enabled: false)) + .and_call_original build_profiler_component end @@ -168,17 +292,20 @@ build_profiler_component end - it 'sets up the Exporter internal_metadata with no_signals_workaround_enabled and timeline_enabled settings' do + it 'sets up the Exporter internal_metadata with relevant settings' do allow(Datadog::Profiling::Collectors::ThreadContext).to receive(:new) allow(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new) expect(described_class).to receive(:no_signals_workaround_enabled?).and_return(:no_signals_result) expect(settings.profiling.advanced).to receive(:experimental_timeline_enabled).and_return(:timeline_result) + expect(settings.profiling.advanced).to receive(:experimental_allocation_sample_rate) + .and_return(123) expect(Datadog::Profiling::Exporter).to receive(:new).with( hash_including( internal_metadata: { no_signals_workaround_enabled: :no_signals_result, timeline_enabled: :timeline_result, + allocation_sample_every: 123, } ) ) @@ -186,13 +313,6 @@ build_profiler_component end - it 'sets up the StackRecorder with alloc_samples_enabled: false' do - expect(Datadog::Profiling::StackRecorder) - .to receive(:new).with(hash_including(alloc_samples_enabled: false)).and_call_original - - build_profiler_component - end - context 'when on Linux' do before { stub_const('RUBY_PLATFORM', 'some-linux-based-platform') } diff --git a/spec/datadog/profiling/spec_helper.rb b/spec/datadog/profiling/spec_helper.rb index 6d15cf22170..e02c1af213b 100644 --- a/spec/datadog/profiling/spec_helper.rb +++ b/spec/datadog/profiling/spec_helper.rb @@ -72,8 +72,14 @@ def samples_for_thread(samples, thread) samples.select { |sample| object_id_from(sample.labels.fetch(:'thread id')) == thread.object_id } end - def build_stack_recorder - Datadog::Profiling::StackRecorder.new(cpu_time_enabled: true, alloc_samples_enabled: true) + # We disable heap_sample collection by default in tests since it requires some extra mocking/ + # setup for it to properly work. + def build_stack_recorder(heap_samples_enabled: false) + Datadog::Profiling::StackRecorder.new( + cpu_time_enabled: true, + alloc_samples_enabled: true, + heap_samples_enabled: heap_samples_enabled + ) end end diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 585d41f9d44..8652ceb5e05 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -7,9 +7,16 @@ let(:numeric_labels) { [] } let(:cpu_time_enabled) { true } let(:alloc_samples_enabled) { true } + # Disabling these by default since they require some extra setup and produce separate samples. + # Enabling this is tested in a particular context below. + let(:heap_samples_enabled) { false } subject(:stack_recorder) do - described_class.new(cpu_time_enabled: cpu_time_enabled, alloc_samples_enabled: alloc_samples_enabled) + described_class.new( + cpu_time_enabled: cpu_time_enabled, + alloc_samples_enabled: alloc_samples_enabled, + heap_samples_enabled: heap_samples_enabled + ) end # NOTE: A lot of libdatadog integration behaviors are tested in the Collectors::Stack specs, since we need actual @@ -114,6 +121,7 @@ def slot_two_mutex_locked? context 'when all profile types are enabled' do let(:cpu_time_enabled) { true } let(:alloc_samples_enabled) { true } + let(:heap_samples_enabled) { true } it 'returns a pprof with the configured sample types' do expect(sample_types_from(decoded_profile)).to eq( @@ -121,6 +129,7 @@ def slot_two_mutex_locked? 'cpu-samples' => 'count', 'wall-time' => 'nanoseconds', 'alloc-samples' => 'count', + 'heap-live-samples' => 'count', ) end end @@ -128,12 +137,14 @@ def slot_two_mutex_locked? context 'when cpu-time is disabled' do let(:cpu_time_enabled) { false } let(:alloc_samples_enabled) { true } + let(:heap_samples_enabled) { true } it 'returns a pprof without the cpu-type type' do expect(sample_types_from(decoded_profile)).to eq( 'cpu-samples' => 'count', 'wall-time' => 'nanoseconds', 'alloc-samples' => 'count', + 'heap-live-samples' => 'count', ) end end @@ -141,12 +152,29 @@ def slot_two_mutex_locked? context 'when alloc-samples is disabled' do let(:cpu_time_enabled) { true } let(:alloc_samples_enabled) { false } + let(:heap_samples_enabled) { true } it 'returns a pprof without the alloc-samples type' do expect(sample_types_from(decoded_profile)).to eq( 'cpu-time' => 'nanoseconds', 'cpu-samples' => 'count', 'wall-time' => 'nanoseconds', + 'heap-live-samples' => 'count', + ) + end + end + + context 'when heap-live-samples is disabled' do + let(:cpu_time_enabled) { true } + let(:alloc_samples_enabled) { true } + let(:heap_samples_enabled) { false } + + it 'returns a pprof without the heap-live-samples type' do + expect(sample_types_from(decoded_profile)).to eq( + 'cpu-time' => 'nanoseconds', + 'cpu-samples' => 'count', + 'wall-time' => 'nanoseconds', + 'alloc-samples' => 'count', ) end end @@ -154,8 +182,9 @@ def slot_two_mutex_locked? context 'when all optional types are disabled' do let(:cpu_time_enabled) { false } let(:alloc_samples_enabled) { false } + let(:heap_samples_enabled) { false } - it 'returns a pprof with without the optional types' do + it 'returns a pprof without the optional types' do expect(sample_types_from(decoded_profile)).to eq( 'cpu-samples' => 'count', 'wall-time' => 'nanoseconds', @@ -198,7 +227,12 @@ def sample_types_from(decoded_profile) it 'encodes the sample with the metrics provided' do expect(samples.first.values) - .to eq(:'cpu-time' => 123, :'cpu-samples' => 456, :'wall-time' => 789, :'alloc-samples' => 4242) + .to eq( + :'cpu-time' => 123, + :'cpu-samples' => 456, + :'wall-time' => 789, + :'alloc-samples' => 4242, + ) end context 'when disabling an optional profile sample type' do @@ -302,6 +336,94 @@ def sample_types_from(decoded_profile) end end + describe 'heap samples' do + let(:sample_rate) { 50 } + let(:metric_values) { { 'cpu-time' => 101, 'cpu-samples' => 1, 'wall-time' => 789, 'alloc-samples' => sample_rate } } + let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a } + + let(:a_string) { 'a beautiful string' } + let(:an_array) { [1..10] } + let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true } } + + let(:samples) { samples_from_pprof(encoded_pprof) } + + before do + allocations = [a_string, an_array, 'a fearsome string', [-10..-1], a_hash, { 'z' => -1, 'y' => '-2', 'x' => false }] + @num_allocations = 0 + allocations.each_with_index do |obj, i| + # Heap sampling currently requires this 2-step process to first pass data about the allocated object... + described_class::Testing._native_track_object(stack_recorder, obj, sample_rate) + # ...and then pass data about the allocation stacktrace (with 2 distinct stacktraces) + if i.even? + Datadog::Profiling::Collectors::Stack::Testing + ._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 400, false) + else + # 401 used instead of 400 here just to make the branches different and appease Rubocop + Datadog::Profiling::Collectors::Stack::Testing + ._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 401, false) + end + @num_allocations += 1 + end + + allocations.clear # The literals in the previous array are now dangling + GC.start # And this will clear them, leaving only the non-literals which are still pointed to by the lets + end + + context 'when disabled' do + let(:heap_samples_enabled) { false } + + it 'are ommitted from the profile' do + # We sample from 2 distinct locations + expect(samples.size).to eq(2) + expect(samples.select { |s| s.values.key?('heap-live-samples') }).to be_empty + end + end + + context 'when enabled' do + let(:heap_samples_enabled) { true } + + let(:heap_samples) do + samples.select { |s| s.values[:'heap-live-samples'] > 0 } + end + + let(:non_heap_samples) do + samples.select { |s| s.values[:'heap-live-samples'] == 0 } + end + + it 'include the stack and sample counts for the objects still left alive' do + pending 'heap_recorder implementation is currently missing' + + # We sample from 2 distinct locations + expect(heap_samples.size).to eq(2) + + sum_heap_samples = 0 + heap_samples.each { |s| sum_heap_samples += s.values[:'heap-live-samples'] } + expect(sum_heap_samples).to eq([a_string, an_array, a_hash].size * sample_rate) + end + + it 'keeps on reporting accurate samples for other profile types' do + expect(non_heap_samples.size).to eq(2) + + summed_values = {} + non_heap_samples.each do |s| + s.values.each_pair do |k, v| + summed_values[k] = (summed_values[k] || 0) + v + end + end + + # We use the same metric_values in all sample calls in before. So we'd expect + # the summed values to match `@num_allocations * metric_values[profile-type]` + # for each profile-type there in. + expected_summed_values = { :'heap-live-samples' => 0 } + metric_values.each_pair do |k, v| + expected_summed_values[k.to_sym] = v * @num_allocations + end + + expect(summed_values).to eq(expected_summed_values) + end + end + end + context 'when there is a failure during serialization' do before do allow(Datadog.logger).to receive(:error)