Skip to content

Commit

Permalink
Merge pull request #3993 from DataDog/ivoanjo/prof-10589-gvl-profilin…
Browse files Browse the repository at this point in the history
…g-internal-metrics

[PROF-10589] Add internal metrics for GVL profiling performance
  • Loading branch information
ivoanjo authored Oct 11, 2024
2 parents 6399568 + b2b331b commit cdfc685
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 30 deletions.
6 changes: 3 additions & 3 deletions docs/ProfilingDevelopment.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,9 @@ from the VM callbacks and also messing with cpu/wall-time accounting for threads
The `ThreadContext` collector exposes three APIs for GVL profiling:
* `void thread_context_collector_on_gvl_waiting(VALUE thread)`
* `bool thread_context_collector_on_gvl_running(VALUE thread)`
* `VALUE thread_context_collector_sample_after_gvl_running(VALUE self_instance)`
* `thread_context_collector_on_gvl_waiting`
* `thread_context_collector_on_gvl_running`
* `thread_context_collector_sample_after_gvl_running`
The intuition here is that:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,12 @@ struct cpu_and_wall_time_worker_state {
// # GVL profiling stats
// How many times we triggered the after_gvl_running sampling
unsigned int after_gvl_running;
// How many times we skipped the after_gvl_running sampling
unsigned int gvl_dont_sample;
// Min/max/total wall-time spent on gvl sampling
uint64_t gvl_sampling_time_ns_min;
uint64_t gvl_sampling_time_ns_max;
uint64_t gvl_sampling_time_ns_total;
} stats;
};

Expand Down Expand Up @@ -230,6 +236,7 @@ static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self);
static void on_gvl_event(rb_event_flag_t event_id, const rb_internal_thread_event_data_t *event_data, DDTRACE_UNUSED void *_unused);
static void after_gvl_running_from_postponed_job(DDTRACE_UNUSED void *_unused);
#endif
static VALUE rescued_after_gvl_running_from_postponed_job(VALUE self_instance);
static VALUE _native_gvl_profiling_hook_active(DDTRACE_UNUSED VALUE self, VALUE instance);

// We're using `on_newobj_event` function with `rb_add_event_hook2`, which requires in its public signature a function
Expand Down Expand Up @@ -1040,7 +1047,12 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
ID2SYM(rb_intern("allocations_during_sample")), /* => */ state->allocation_profiling_enabled ? UINT2NUM(state->stats.allocations_during_sample) : Qnil,

// GVL profiling stats
ID2SYM(rb_intern("after_gvl_running")), /* => */ UINT2NUM(state->stats.after_gvl_running),
ID2SYM(rb_intern("after_gvl_running")), /* => */ UINT2NUM(state->stats.after_gvl_running),
ID2SYM(rb_intern("gvl_dont_sample")), /* => */ UINT2NUM(state->stats.gvl_dont_sample),
ID2SYM(rb_intern("gvl_sampling_time_ns_min")), /* => */ RUBY_NUM_OR_NIL(state->stats.gvl_sampling_time_ns_min, != UINT64_MAX, ULL2NUM),
ID2SYM(rb_intern("gvl_sampling_time_ns_max")), /* => */ RUBY_NUM_OR_NIL(state->stats.gvl_sampling_time_ns_max, > 0, ULL2NUM),
ID2SYM(rb_intern("gvl_sampling_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats.gvl_sampling_time_ns_total, > 0, ULL2NUM),
ID2SYM(rb_intern("gvl_sampling_time_ns_avg")), /* => */ RUBY_AVG_OR_NIL(state->stats.gvl_sampling_time_ns_total, state->stats.after_gvl_running),
};
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
return stats_as_hash;
Expand Down Expand Up @@ -1078,8 +1090,10 @@ static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *s
// Given the expected infrequency of resetting (~once per 60s profile) and the auxiliary/non-critical nature of these stats
// this momentary loss of accuracy is deemed acceptable to keep overhead to a minimum.
state->stats = (struct stats) {
.cpu_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
.allocation_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
// All these values are initialized to their highest value possible since we always take the min between existing and latest sample
.cpu_sampling_time_ns_min = UINT64_MAX,
.allocation_sampling_time_ns_min = UINT64_MAX,
.gvl_sampling_time_ns_min = UINT64_MAX,
};
}

Expand Down Expand Up @@ -1313,23 +1327,29 @@ static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self) {
} else if (event_id == RUBY_INTERNAL_THREAD_EVENT_RESUMED) { /* running/runnable */
// Interesting note: A RUBY_INTERNAL_THREAD_EVENT_RESUMED is guaranteed to be called with the GVL being acquired.
// (And... I think target_thread will be == rb_thread_current()?)
//
// But we're not sure if we're on the main Ractor yet. The thread context collector actually can actually help here:
// it tags threads it's tracking, so if a thread is tagged then by definition we know that thread belongs to the main
// Ractor. Thus, if we really really wanted to access the state, we could do it after making sure we're on the correct Ractor.
// Ractor. Thus, if we get a ON_GVL_RUNNING_UNKNOWN result we shouldn't touch any state, but otherwise we're good to go.

#ifdef USE_GVL_PROFILING_3_2_WORKAROUNDS
target_thread = gvl_profiling_state_maybe_initialize();
#endif

bool should_sample = thread_context_collector_on_gvl_running(target_thread);
on_gvl_running_result result = thread_context_collector_on_gvl_running(target_thread);

if (should_sample) {
// should_sample is only true if a thread belongs to the main Ractor, so we're good to go
if (result == ON_GVL_RUNNING_SAMPLE) {
#ifndef NO_POSTPONED_TRIGGER
rb_postponed_job_trigger(after_gvl_running_from_postponed_job_handle);
#else
rb_postponed_job_register_one(0, after_gvl_running_from_postponed_job, NULL);
#endif
} else if (result == ON_GVL_RUNNING_DONT_SAMPLE) {
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above

if (state == NULL) return; // This should not happen, but just in case...

state->stats.gvl_dont_sample++;
}
} else {
// This is a very delicate time and it's hard for us to raise an exception so let's at least complain to stderr
Expand All @@ -1345,11 +1365,32 @@ static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self) {

state->during_sample = true;

safely_call(thread_context_collector_sample_after_gvl_running, state->thread_context_collector_instance, state->self_instance);
// Rescue against any exceptions that happen during sampling
safely_call(rescued_after_gvl_running_from_postponed_job, state->self_instance, state->self_instance);

state->during_sample = false;
}

static VALUE rescued_after_gvl_running_from_postponed_job(VALUE self_instance) {
struct cpu_and_wall_time_worker_state *state;
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);

long wall_time_ns_before_sample = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
thread_context_collector_sample_after_gvl_running(state->thread_context_collector_instance, rb_thread_current(), wall_time_ns_before_sample);
long wall_time_ns_after_sample = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);

long delta_ns = wall_time_ns_after_sample - wall_time_ns_before_sample;

// Guard against wall-time going backwards, see https://github.com/DataDog/dd-trace-rb/pull/2336 for discussion.
uint64_t sampling_time_ns = delta_ns < 0 ? 0 : delta_ns;

state->stats.gvl_sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.gvl_sampling_time_ns_min);
state->stats.gvl_sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.gvl_sampling_time_ns_max);
state->stats.gvl_sampling_time_ns_total += sampling_time_ns;

state->stats.after_gvl_running++;

state->during_sample = false;
return Qnil;
}

static VALUE _native_gvl_profiling_hook_active(DDTRACE_UNUSED VALUE self, VALUE instance) {
Expand Down
28 changes: 14 additions & 14 deletions ext/datadog_profiling_native_extension/collectors_thread_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -1746,14 +1746,14 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {

// This function can get called from outside the GVL and even on non-main Ractors
__attribute__((warn_unused_result))
bool thread_context_collector_on_gvl_running_with_threshold(gvl_profiling_thread thread, uint32_t waiting_for_gvl_threshold_ns) {
on_gvl_running_result thread_context_collector_on_gvl_running_with_threshold(gvl_profiling_thread thread, uint32_t waiting_for_gvl_threshold_ns) {
intptr_t gvl_waiting_at = gvl_profiling_state_get(thread);

// Thread was not being profiled / not waiting on gvl
if (gvl_waiting_at == 0 || gvl_waiting_at == GVL_WAITING_ENABLED_EMPTY) return false;
if (gvl_waiting_at == 0 || gvl_waiting_at == GVL_WAITING_ENABLED_EMPTY) return ON_GVL_RUNNING_UNKNOWN;

// @ivoanjo: I'm not sure if this can happen -- It means we should've sampled already but haven't gotten the chance yet?
if (gvl_waiting_at < 0) return true;
if (gvl_waiting_at < 0) return ON_GVL_RUNNING_SAMPLE;

long waiting_for_gvl_duration_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - gvl_waiting_at;

Expand All @@ -1769,11 +1769,11 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
gvl_profiling_state_set(thread, GVL_WAITING_ENABLED_EMPTY);
}

return should_sample;
return should_sample ? ON_GVL_RUNNING_SAMPLE : ON_GVL_RUNNING_DONT_SAMPLE;
}

__attribute__((warn_unused_result))
bool thread_context_collector_on_gvl_running(gvl_profiling_thread thread) {
on_gvl_running_result thread_context_collector_on_gvl_running(gvl_profiling_thread thread) {
return thread_context_collector_on_gvl_running_with_threshold(thread, global_waiting_for_gvl_threshold_ns);
}

Expand Down Expand Up @@ -1805,7 +1805,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
//
// NOTE: In normal use, current_thread is expected to be == rb_thread_current(); the `current_thread` parameter only
// exists to enable testing.
VALUE thread_context_collector_sample_after_gvl_running_with_thread(VALUE self_instance, VALUE current_thread) {
VALUE thread_context_collector_sample_after_gvl_running(VALUE self_instance, VALUE current_thread, long current_monotonic_wall_time_ns) {
struct thread_context_collector_state *state;
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);

Expand Down Expand Up @@ -1837,14 +1837,10 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
thread_context,
thread_context->sampling_buffer,
cpu_time_for_thread,
monotonic_wall_time_now_ns(RAISE_ON_FAILURE)
current_monotonic_wall_time_ns
);

return Qtrue; // To allow this to be called from rb_rescue2
}

VALUE thread_context_collector_sample_after_gvl_running(VALUE self_instance) {
return thread_context_collector_sample_after_gvl_running_with_thread(self_instance, rb_thread_current());
return Qtrue;
}

// This method is intended to be called from update_metrics_and_sample. It exists to handle extra sampling steps we
Expand Down Expand Up @@ -1963,13 +1959,17 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread) {
ENFORCE_THREAD(thread);

return thread_context_collector_on_gvl_running(thread_from_thread_object(thread)) ? Qtrue : Qfalse;
return thread_context_collector_on_gvl_running(thread_from_thread_object(thread)) == ON_GVL_RUNNING_SAMPLE ? Qtrue : Qfalse;
}

static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread) {
ENFORCE_THREAD(thread);

return thread_context_collector_sample_after_gvl_running_with_thread(collector_instance, thread);
return thread_context_collector_sample_after_gvl_running(
collector_instance,
thread,
monotonic_wall_time_now_ns(RAISE_ON_FAILURE)
);
}

static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE delta_ns) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@ __attribute__((warn_unused_result)) bool thread_context_collector_on_gc_finish(V
VALUE enforce_thread_context_collector_instance(VALUE object);

#ifndef NO_GVL_INSTRUMENTATION
typedef enum {
ON_GVL_RUNNING_UNKNOWN, // Thread is not known, it may not even be from the current Ractor
ON_GVL_RUNNING_DONT_SAMPLE, // Thread is known, but "Waiting for GVL" period was too small to be sampled
ON_GVL_RUNNING_SAMPLE, // Thread is known, and "Waiting for GVL" period should be sampled
} on_gvl_running_result;

void thread_context_collector_on_gvl_waiting(gvl_profiling_thread thread);
__attribute__((warn_unused_result)) bool thread_context_collector_on_gvl_running(gvl_profiling_thread thread);
VALUE thread_context_collector_sample_after_gvl_running(VALUE self_instance);
__attribute__((warn_unused_result)) on_gvl_running_result thread_context_collector_on_gvl_running(gvl_profiling_thread thread);
VALUE thread_context_collector_sample_after_gvl_running(VALUE self_instance, VALUE current_thread, long current_monotonic_wall_time_ns);
#endif
55 changes: 53 additions & 2 deletions spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,52 @@
expect(waiting_for_gvl_time).to be_within(5).percent_of(total_time), \
"Expected waiting_for_gvl_time to be close to total_time, debug_failures: #{debug_failures}"

expect(cpu_and_wall_time_worker.stats.fetch(:after_gvl_running)).to be > 0
expect(cpu_and_wall_time_worker.stats).to match(
hash_including(
after_gvl_running: be > 0,
gvl_sampling_time_ns_min: be > 0,
gvl_sampling_time_ns_max: be > 0,
gvl_sampling_time_ns_total: be > 0,
gvl_sampling_time_ns_avg: be > 0,
)
)
end

context "when 'Waiting for GVL' periods are below waiting_for_gvl_threshold_ns" do
let(:options) do
ten_seconds_as_ns = 1_000_000_000
collector = build_thread_context_collector(recorder, waiting_for_gvl_threshold_ns: ten_seconds_as_ns)

{thread_context_collector: collector}
end

it "does not trigger extra samples" do
background_thread_affected_by_gvl_contention
ready_queue_2.pop

start
wait_until_running

sleep 0.1
background_thread_affected_by_gvl_contention.kill

cpu_and_wall_time_worker.stop

# Note: There may still be "Waiting for GVL" samples in the output, but these samples will come from the
# periodic cpu/wall-sampling, not samples directly triggered by the end of a "Waiting for GVL" period.

expect(cpu_and_wall_time_worker.stats.fetch(:gvl_dont_sample)).to be > 0

expect(cpu_and_wall_time_worker.stats).to match(
hash_including(
after_gvl_running: 0,
gvl_sampling_time_ns_min: nil,
gvl_sampling_time_ns_max: nil,
gvl_sampling_time_ns_total: nil,
gvl_sampling_time_ns_avg: nil,
)
)
end
end
end
end
Expand Down Expand Up @@ -1137,6 +1182,11 @@
allocation_sampler_snapshot: nil,
allocations_during_sample: nil,
after_gvl_running: 0,
gvl_dont_sample: 0,
gvl_sampling_time_ns_min: nil,
gvl_sampling_time_ns_max: nil,
gvl_sampling_time_ns_total: nil,
gvl_sampling_time_ns_avg: nil,
}
)
end
Expand Down Expand Up @@ -1417,11 +1467,12 @@ def build_another_instance
described_class.new(**worker_settings)
end

def build_thread_context_collector(recorder)
def build_thread_context_collector(recorder, **options)
Datadog::Profiling::Collectors::ThreadContext.for_testing(
recorder: recorder,
endpoint_collection_enabled: endpoint_collection_enabled,
timeline_enabled: timeline_enabled,
**options,
)
end

Expand Down

0 comments on commit cdfc685

Please sign in to comment.