From b03696ef5ea475f8a60c0c69f91e7ffe59a6e4f2 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Thu, 11 Apr 2024 16:58:57 +0100 Subject: [PATCH 01/21] Expose methods for holding/resuming interruptions These will be needed to implement the dir interruption workaround. --- .../collectors_cpu_and_wall_time_worker.c | 20 ++++++++++++++++++- sig/datadog/profiling.rbs | 3 +++ .../collectors/cpu_and_wall_time_worker.rbs | 2 ++ .../cpu_and_wall_time_worker_spec.rb | 14 +++++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index 749b416c9fd..80637899340 100644 --- a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -222,6 +222,8 @@ static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self); static VALUE rescued_sample_allocation(VALUE tracepoint_data); static void delayed_error(struct cpu_and_wall_time_worker_state *state, const char *error); static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg); +static VALUE _native_hold_interruptions(DDTRACE_UNUSED VALUE self); +static VALUE _native_resume_interruptions(DDTRACE_UNUSED VALUE self); // Note on sampler global state safety: // @@ -285,7 +287,9 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) { rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_allocation_count", _native_allocation_count, 0); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_is_running?", _native_is_running, 1); rb_define_singleton_method(testing_module, "_native_current_sigprof_signal_handler", _native_current_sigprof_signal_handler, 0); - // TODO: Remove `_native_is_running` from `testing_module` once `prof-correctness` has been updated to not need it + rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_hold_interruptions", _native_hold_interruptions, 0); + rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_resume_interruptions", _native_resume_interruptions, 0); + // TODO: Remove `_native_is_running` from `testing_module` (should be in class) once `prof-correctness` has been updated to not need it rb_define_singleton_method(testing_module, "_native_is_running?", _native_is_running, 1); rb_define_singleton_method(testing_module, "_native_install_testing_signal_handler", _native_install_testing_signal_handler, 0); rb_define_singleton_method(testing_module, "_native_remove_testing_signal_handler", _native_remove_testing_signal_handler, 0); @@ -1159,3 +1163,17 @@ static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VA return Qnil; } + +// Masks SIGPROF interruptions for the current thread. Please don't use this -- you may end up with incomplete +// profiling data. +static VALUE _native_hold_interruptions(DDTRACE_UNUSED VALUE self) { + block_sigprof_signal_handler_from_running_in_current_thread(); + return Qtrue; +} + +// Unmasks SIGPROF interruptions for the current thread. If there's a pending sample, it'll be triggered inside this +// method. +static VALUE _native_resume_interruptions(DDTRACE_UNUSED VALUE self) { + unblock_sigprof_signal_handler_from_running_in_current_thread(); + return Qtrue; +} diff --git a/sig/datadog/profiling.rbs b/sig/datadog/profiling.rbs index 35902c8d523..386bfc7aa5c 100644 --- a/sig/datadog/profiling.rbs +++ b/sig/datadog/profiling.rbs @@ -6,6 +6,9 @@ module Datadog def self.allocation_count: () -> ::Integer? def self.enabled?: () -> bool def self.wait_until_running: () -> true + + private + def self.replace_noop_allocation_count: () -> void def self.native_library_compilation_skipped?: () -> ::String? def self.try_reading_skipped_reason_file: (?untyped file_api) -> ::String? diff --git a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs index 917eab40ff1..31d4992f579 100644 --- a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs +++ b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs @@ -44,6 +44,8 @@ module Datadog def self._native_is_running?: (CpuAndWallTimeWorker self_instance) -> bool def self._native_allocation_count: () -> ::Integer? def self._native_sampling_loop: (CpuAndWallTimeWorker self_instance) -> void + def self._native_hold_interruptions: () -> void + def self._native_resume_interruptions: () -> void def wait_until_running: (timeout_seconds: ::Integer?) -> true end diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index 7660468be15..121f9c9b128 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -1176,6 +1176,20 @@ end end + describe '._native_hold_interruptions and _native_resume_interruptions' do + it 'blocks/unblocks interruptions for the current thread' do + expect(described_class::Testing._native_is_sigprof_blocked_in_current_thread).to be false + + described_class._native_hold_interruptions + + expect(described_class::Testing._native_is_sigprof_blocked_in_current_thread).to be true + + described_class._native_resume_interruptions + + expect(described_class::Testing._native_is_sigprof_blocked_in_current_thread).to be false + end + end + def wait_until_running cpu_and_wall_time_worker.wait_until_running end From aeb2db63a48398d7ed0b68f47e428acea718cd83 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 24 Apr 2024 10:04:44 +0100 Subject: [PATCH 02/21] Bootstrap setting for controlling dir interruption workaround Not yet wired up. --- lib/datadog/core/configuration/settings.rb | 10 ++++++ .../core/configuration/settings_spec.rb | 35 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index dd91ed4651a..6b0d3917c6c 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -397,6 +397,16 @@ def initialize(*_) end end + # TODO: description + # + # @default `DD_PROFILING_DIR_INTERRUPTION_WORKAROUND_ENABLED` environment variable as a boolean, + # otherwise `true` + option :dir_interruption_workaround_enabled do |o| + o.env 'DD_PROFILING_DIR_INTERRUPTION_WORKAROUND_ENABLED' + o.type :bool + o.default true + end + # Configures how much wall-time overhead the profiler targets. The profiler will dynamically adjust the # interval between samples it takes so as to try and maintain the property that it spends no longer than # this amount of wall-clock time profiling. For example, with the default value of 2%, the profiler will diff --git a/spec/datadog/core/configuration/settings_spec.rb b/spec/datadog/core/configuration/settings_spec.rb index 4cc46cad087..33fe290f315 100644 --- a/spec/datadog/core/configuration/settings_spec.rb +++ b/spec/datadog/core/configuration/settings_spec.rb @@ -834,6 +834,41 @@ .to(true) end end + + describe '#dir_interruption_workaround_enabled' do + subject(:dir_interruption_workaround_enabled) { settings.profiling.advanced.dir_interruption_workaround_enabled } + + context 'when DD_PROFILING_DIR_INTERRUPTION_WORKAROUND_ENABLED' do + around do |example| + ClimateControl.modify('DD_PROFILING_DIR_INTERRUPTION_WORKAROUND_ENABLED' => environment) do + example.run + end + end + + context 'is not defined' do + let(:environment) { nil } + + it { is_expected.to be true } + end + + [true, false].each do |value| + context "is defined as #{value}" do + let(:environment) { value.to_s } + + it { is_expected.to be value } + end + end + end + end + + describe '#dir_interruption_workaround_enabled=' do + it 'updates the #dir_interruption_workaround_enabled setting from its default of true' do + expect { settings.profiling.advanced.dir_interruption_workaround_enabled = false } + .to change { settings.profiling.advanced.dir_interruption_workaround_enabled } + .from(true) + .to(false) + end + end end describe '#upload' do From 88f116473148226f378da0495cc94dabec650f5d Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 10 May 2024 15:45:28 +0100 Subject: [PATCH 03/21] Add monkey patches for affected dir class APIs I've separately tested these with ruby-spec, but haven't integrated it yet for automated testing. Will do it a follow-up commit. --- .../profiling/ext/dir_monkey_patches.rb | 210 ++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 lib/datadog/profiling/ext/dir_monkey_patches.rb diff --git a/lib/datadog/profiling/ext/dir_monkey_patches.rb b/lib/datadog/profiling/ext/dir_monkey_patches.rb new file mode 100644 index 00000000000..1a976bb89de --- /dev/null +++ b/lib/datadog/profiling/ext/dir_monkey_patches.rb @@ -0,0 +1,210 @@ +# frozen_string_literal: true + +module Datadog + module Profiling + module Ext + # All Ruby versions as of this writing have bugs in the dir class implementation, causing issues such as + # https://github.com/DataDog/dd-trace-rb/issues/3450 . + # + # This monkey patch for the Ruby `Dir` class works around these bugs for affected Ruby versions by temporarily + # blocking the profiler from interrupting system calls. + # + # A lot of these APIs do very similar things -- they're provided by Ruby as helpers so users don't need to keep + # reimplementing them but share the same underlying buggy code. And so our monkey patches are a bit repetitive + # as well. + # We don't DRY out this file to have minimal overhead. + module DirMonkeyPatches + def [](*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + end + ruby2_keywords :[] if respond_to?(:ruby2_keywords, true) + + def children(*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + end + ruby2_keywords :children if respond_to?(:ruby2_keywords, true) + + def each_child(*args, &block) + if block + begin + # <-- Begin critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + super do |entry_name| + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + # <-- We're safe now while running customer code + yield entry_name + # <-- We'll go back to the Dir internals, critical region again + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + end + ensure + # <-- End critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + end + else + # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the + # other branch once it gets going. + super + end + end + ruby2_keywords :each_child if respond_to?(:ruby2_keywords, true) + + def empty?(*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + end + ruby2_keywords :empty? if respond_to?(:ruby2_keywords, true) + + def entries(*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + end + ruby2_keywords :entries if respond_to?(:ruby2_keywords, true) + + def foreach(*args, &block) + if block + begin + # <-- Begin critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + super do |entry_name| + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + # <-- We're safe now while running customer code + yield entry_name + # <-- We'll go back to the Dir internals, critical region again + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + end + ensure + # <-- End critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + end + else + # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the + # other branch once it gets going. + super + end + end + ruby2_keywords :foreach if respond_to?(:ruby2_keywords, true) + + def glob(*args, &block) + if block + begin + # <-- Begin critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + super do |entry_name| + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + # <-- We're safe now while running customer code + yield entry_name + # <-- We'll go back to the Dir internals, critical region again + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + end + ensure + # <-- End critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + end + else + begin + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + end + end + end + ruby2_keywords :glob if respond_to?(:ruby2_keywords, true) + + def home(*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + end + ruby2_keywords :home if respond_to?(:ruby2_keywords, true) + end + + # Same as DirMonkeyPatches, but instance methods on Dir. + module DirInstanceMonkeyPatches + def each(*args, &block) + if block + begin + # <-- Begin critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + super do |entry_name| + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + # <-- We're safe now while running customer code + yield entry_name + # <-- We'll go back to the Dir internals, critical region again + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + end + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions # <-- End critical region + end + else + # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the + # other branch once it gets going. + super + end + end + ruby2_keywords :each if respond_to?(:ruby2_keywords, true) + + unless RUBY_VERSION.start_with?('2.5.') # This is Ruby 2.6+ + def each_child(*args, &block) + if block + begin + # <-- Begin critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + super do |entry_name| + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + # <-- We're safe now while running customer code + yield entry_name + # <-- We'll go back to the Dir internals, critical region again + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + end + ensure + # <-- End critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + end + else + # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the + # other branch once it gets going. + super + end + end + ruby2_keywords :each_child if respond_to?(:ruby2_keywords, true) + + def children(*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + end + ruby2_keywords :children if respond_to?(:ruby2_keywords, true) + end + + def tell(*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + end + ruby2_keywords :tell if respond_to?(:ruby2_keywords, true) + + def pos(*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + end + ruby2_keywords :pos if respond_to?(:ruby2_keywords, true) + end + end + end +end From 3c51234abaf2b55ee19d8889b150f122759360d9 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 12 Jun 2024 15:21:25 +0100 Subject: [PATCH 04/21] Add inline suggestion to function --- ext/datadog_profiling_native_extension/setup_signal_handler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/datadog_profiling_native_extension/setup_signal_handler.c b/ext/datadog_profiling_native_extension/setup_signal_handler.c index 93047b9bfee..fa4bb27e95c 100644 --- a/ext/datadog_profiling_native_extension/setup_signal_handler.c +++ b/ext/datadog_profiling_native_extension/setup_signal_handler.c @@ -91,7 +91,7 @@ void remove_sigprof_signal_handler(void) { if (sigaction(SIGPROF, &signal_handler_config, NULL) != 0) rb_sys_fail("Failure while removing the signal handler"); } -static void toggle_sigprof_signal_handler_for_current_thread(int action) { +static inline void toggle_sigprof_signal_handler_for_current_thread(int action) { sigset_t signals_to_toggle; sigemptyset(&signals_to_toggle); sigaddset(&signals_to_toggle, SIGPROF); From 1721270f9a543bad992d2eb9c600fc588e7904a4 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 12 Jun 2024 15:10:47 +0100 Subject: [PATCH 05/21] Add benchmark for hold/resume interruptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For future reference, here's what I get on my local machine: ``` ruby 3.1.4p223 (2023-03-30 revision 957bb7cb81) [x86_64-linux] Warming up -------------------------------------- hold / resume 391.856k i/100ms Calculating ------------------------------------- hold / resume 3.829M (± 1.9%) i/s - 38.402M in 10.033530s ``` --- .../profiler_hold_resume_interruptions.rb | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 benchmarks/profiler_hold_resume_interruptions.rb diff --git a/benchmarks/profiler_hold_resume_interruptions.rb b/benchmarks/profiler_hold_resume_interruptions.rb new file mode 100644 index 00000000000..657fbe4262c --- /dev/null +++ b/benchmarks/profiler_hold_resume_interruptions.rb @@ -0,0 +1,44 @@ +# Used to quickly run benchmark under RSpec as part of the usual test suite, to validate it didn't bitrot +VALIDATE_BENCHMARK_MODE = ENV['VALIDATE_BENCHMARK'] == 'true' + +return unless __FILE__ == $PROGRAM_NAME || VALIDATE_BENCHMARK_MODE + +require 'benchmark/ips' +require 'datadog' +require 'pry' +require_relative 'dogstatsd_reporter' + +# This benchmark measures the performance of the hold/resume interruptions used by the DirMonkeyPatches +class ProfilerHoldResumeInterruptions + def create_profiler + Datadog.configure do |c| + c.profiling.enabled = true + end + Datadog::Profiling.wait_until_running + end + + def run_benchmark + Benchmark.ips do |x| + benchmark_time = VALIDATE_BENCHMARK_MODE ? { time: 0.01, warmup: 0 } : { time: 10, warmup: 2 } + x.config( + **benchmark_time, + suite: report_to_dogstatsd_if_enabled_via_environment_variable(benchmark_name: 'profiler_hold_resume_interruptions') + ) + + x.report("hold / resume") do + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + end + + x.save! 'profiler_hold_resume_interruptions-results.json' unless VALIDATE_BENCHMARK_MODE + x.compare! + end + end +end + +puts "Current pid is #{Process.pid}" + +ProfilerHoldResumeInterruptions.new.instance_exec do + create_profiler + run_benchmark +end From 42c40b2581994cdd44fa0c54586042365df8d81c Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Thu, 13 Jun 2024 11:47:30 +0100 Subject: [PATCH 06/21] Add helper to load monkey patches (similar to our Kernel monkey patches) --- lib/datadog/profiling/ext/dir_monkey_patches.rb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/datadog/profiling/ext/dir_monkey_patches.rb b/lib/datadog/profiling/ext/dir_monkey_patches.rb index 1a976bb89de..3eba6bc1477 100644 --- a/lib/datadog/profiling/ext/dir_monkey_patches.rb +++ b/lib/datadog/profiling/ext/dir_monkey_patches.rb @@ -13,7 +13,19 @@ module Ext # reimplementing them but share the same underlying buggy code. And so our monkey patches are a bit repetitive # as well. # We don't DRY out this file to have minimal overhead. + # + # These monkey patches are applied by the profiler when the "dir_interruption_workaround_enabled" setting is + # enabled. See the profiling settings for more detail. module DirMonkeyPatches + def self.apply! + ::Dir.singleton_class.prepend(Datadog::Profiling::Ext::DirClassMonkeyPatches) + ::Dir.prepend(Datadog::Profiling::Ext::DirInstanceMonkeyPatches) + + true + end + end + + module DirClassMonkeyPatches def [](*args, &block) Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions super From 25a96a1e8f9bd83d516ea1506f767e82d459d239 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Thu, 13 Jun 2024 11:47:49 +0100 Subject: [PATCH 07/21] Add type signatures for dir monkey patches --- .../profiling/ext/dir_monkey_patches.rbs | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 sig/datadog/profiling/ext/dir_monkey_patches.rbs diff --git a/sig/datadog/profiling/ext/dir_monkey_patches.rbs b/sig/datadog/profiling/ext/dir_monkey_patches.rbs new file mode 100644 index 00000000000..81e98dc8822 --- /dev/null +++ b/sig/datadog/profiling/ext/dir_monkey_patches.rbs @@ -0,0 +1,39 @@ +# TODO: @ivoanjo As of June 2024 upstream rbs signatures do not include this method on module and even one of their +# tests adds this directly +# ( https://github.com/ruby/rbs/blob/28849d4522ba18f45d4f6edfca89a515a4816373/sig/unit_test/spy.rbs#L26C1-L28C4 ) +# so I'm taking the same approach. +class Module + def ruby2_keywords: (*Symbol) -> void +end + +module Datadog + module Profiling + module Ext + module DirMonkeyPatches + def self.apply!: () -> true + end + + # The signatures below are somehow inspired on + # https://github.com/ruby/rbs/blob/28849d4522ba18f45d4f6edfca89a515a4816373/core/dir.rbs + + module DirClassMonkeyPatches + def []: (*untyped) -> Array[String] + def children: (*untyped) -> Array[String] + def each_child: (*untyped) -> Enumerator[String, nil] | (*untyped) { (String filename) -> void } -> nil + def empty?: (path path_name) -> bool + def entries: (*untyped) -> Array[String] + def foreach: (*untyped) -> Enumerator[String, nil] | (*untyped) { (String filename) -> void } -> nil + def glob: (*untyped) -> Array[String] | (*untyped) { (String pathname) -> void } -> nil + def home: (?string? user) -> String + end + + module DirInstanceMonkeyPatches + def each: () { (String) -> void } -> self | () -> Enumerator[String, self] + def each_child: () { (String) -> void } -> self | () -> Enumerator[String, self] + def children: () -> Array[String] + def tell: () -> Integer + def pos: () -> Integer + end + end + end +end From 57e64bf6b2b715f7d7e50cc7047e4ab37c2d3185 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Thu, 13 Jun 2024 11:48:13 +0100 Subject: [PATCH 08/21] Load new file when loading profiler --- lib/datadog/profiling.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/datadog/profiling.rb b/lib/datadog/profiling.rb index 604e9ac4462..367ce829415 100644 --- a/lib/datadog/profiling.rb +++ b/lib/datadog/profiling.rb @@ -136,6 +136,7 @@ def self.allocation_count # rubocop:disable Lint/NestedMethodDefinition (On purp return false unless supported? require_relative 'profiling/ext/forking' + require_relative 'profiling/ext/dir_monkey_patches' require_relative 'profiling/collectors/info' require_relative 'profiling/collectors/code_provenance' require_relative 'profiling/collectors/cpu_and_wall_time_worker' From 0e80c77b1378605a40f4e6e2ddc924d32d28c2bc Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Thu, 13 Jun 2024 11:48:25 +0100 Subject: [PATCH 09/21] Wire up dir interruption workaround to setting --- lib/datadog/profiling/component.rb | 13 ++++++++++ sig/datadog/profiling/component.rbs | 2 +- spec/datadog/profiling/component_spec.rb | 31 +++++++++++++++++++++++- 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index cd6acf551b2..d6504a7582a 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -75,6 +75,10 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) crashtracker = build_crashtracker(settings, transport) profiler = Profiling::Profiler.new(worker: worker, scheduler: scheduler, optional_crashtracker: crashtracker) + if dir_interruption_workaround_enabled?(settings, no_signals_workaround_enabled) + Datadog::Profiling::Ext::DirMonkeyPatches.apply! + end + [profiler, { profiling_enabled: true }] end @@ -445,6 +449,15 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) libmysqlclient_version < Gem::Version.new('5.0.0') && header_version >= Gem::Version.new('10.0.0')) end + + private_class_method def self.dir_interruption_workaround_enabled?(settings, no_signals_workaround_enabled) + return false if no_signals_workaround_enabled + + # Note: In the future this method will evolve to check for Ruby versions affected and not apply the workaround + # when it's not needed but currently all known Ruby versions are affected. + + settings.profiling.advanced.dir_interruption_workaround_enabled + end end end end diff --git a/sig/datadog/profiling/component.rbs b/sig/datadog/profiling/component.rbs index 9e74c5c7ea0..bda16b8f78e 100644 --- a/sig/datadog/profiling/component.rbs +++ b/sig/datadog/profiling/component.rbs @@ -38,12 +38,12 @@ module Datadog def self.enable_heap_size_profiling?: (untyped settings, bool heap_profiling_enabled) -> bool def self.no_signals_workaround_enabled?: (untyped settings) -> bool - def self.incompatible_libmysqlclient_version?: (untyped settings) -> bool def self.incompatible_passenger_version?: () -> bool def self.flush_interval: (untyped settings) -> ::Numeric def self.valid_overhead_target: (::Float overhead_target_percentage) -> ::Float def self.looks_like_mariadb?: ({ header_version: ::String? }, ::Gem::Version) -> bool + def self.dir_interruption_workaround_enabled?: (untyped settings, bool no_signals_workaround_enabled) -> bool end end end diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index 3ff73da8c4f..d0ff00ac63a 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -7,9 +7,10 @@ let(:profiler_setup_task) { instance_double(Datadog::Profiling::Tasks::Setup) if Datadog::Profiling.supported? } before do - # Ensure the real task never gets run (so it doesn't apply our thread patches and other extensions to our test env) + # Make sure these never get run so they doesn't apply our monkey patches to the RSpec process if Datadog::Profiling.supported? allow(Datadog::Profiling::Tasks::Setup).to receive(:new).and_return(profiler_setup_task) + allow(Datadog::Profiling::Ext::DirMonkeyPatches).to receive(:apply!).and_return(true) end end @@ -586,6 +587,34 @@ build_profiler_component end end + + describe 'dir interruption workaround' do + it 'is enabled by default' do + expect(Datadog::Profiling::Ext::DirMonkeyPatches).to receive(:apply!) + + build_profiler_component + end + + context 'when the no signals workaround is enabled' do + before { expect(described_class).to receive(:no_signals_workaround_enabled?).and_return(true) } + + it 'is not applied' do + expect(Datadog::Profiling::Ext::DirMonkeyPatches).to_not receive(:apply!) + + build_profiler_component + end + end + + context 'when the dir interruption workaround is disabled via configuration' do + before { settings.profiling.advanced.dir_interruption_workaround_enabled = false } + + it 'is not applied' do + expect(Datadog::Profiling::Ext::DirMonkeyPatches).to_not receive(:apply!) + + build_profiler_component + end + end + end end end From 577fdb0b025cc0c4989e874abaa8c6a3294a06a6 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Thu, 13 Jun 2024 17:13:26 +0100 Subject: [PATCH 10/21] Add test coverage for DirMonkeyPatches --- .../profiling/ext/dir_monkey_patches.rb | 1 - .../profiling/ext/dir_monkey_patches_spec.rb | 345 ++++++++++++++++++ 2 files changed, 345 insertions(+), 1 deletion(-) create mode 100644 spec/datadog/profiling/ext/dir_monkey_patches_spec.rb diff --git a/lib/datadog/profiling/ext/dir_monkey_patches.rb b/lib/datadog/profiling/ext/dir_monkey_patches.rb index 3eba6bc1477..5184307c027 100644 --- a/lib/datadog/profiling/ext/dir_monkey_patches.rb +++ b/lib/datadog/profiling/ext/dir_monkey_patches.rb @@ -142,7 +142,6 @@ def home(*args, &block) ruby2_keywords :home if respond_to?(:ruby2_keywords, true) end - # Same as DirMonkeyPatches, but instance methods on Dir. module DirInstanceMonkeyPatches def each(*args, &block) if block diff --git a/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb b/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb new file mode 100644 index 00000000000..c634baa7a06 --- /dev/null +++ b/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb @@ -0,0 +1,345 @@ +require 'datadog/profiling/spec_helper' + +require 'datadog/profiling/collectors/cpu_and_wall_time_worker' + +# NOTE: Specs in this file are written so as to not leave the DirMonkeyPatches loaded into the Ruby VM after this +# test executes. They do this by only applying these monkey patches in a separate process. +RSpec.describe Datadog::Profiling::Ext::DirMonkeyPatches do + before do + skip_if_profiling_not_supported(self) + + File.open("#{temporary_directory}/file1", 'w') { |f| f.write('file1') } + File.open("#{temporary_directory}/file2", 'w') { |f| f.write('file2') } + File.open("#{temporary_directory}/file3", 'w') { |f| f.write('file3') } + + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to_not receive(:_native_hold_interruptions) + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to_not receive(:_native_resume_interruptions) + end + + let(:temporary_directory) { Dir.mktmpdir } + let(:temporary_files_count) { 3 } + let(:expected_hold_resume_calls_count) { 1 } + + after do + begin + FileUtils.remove_dir(temporary_directory) + rescue Errno::ENOENT => _e + # Do nothing, it's ok + end + end + + describe 'DirClassMonkeyPatches' do + describe '.[]' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + expect(Dir['*1', '*2', base: temporary_directory, sort: true]).to eq ['file1', 'file2'] + end + end + end + + describe '.children' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + result = Dir.children(temporary_directory, encoding: 'US-ASCII').sort + expect(result.first.encoding).to be Encoding::US_ASCII + expect(result.first).to eq 'file1' + end + end + end + + describe '.each_child' do + let(:expected_hold_resume_calls_count) { 1 + temporary_files_count } + + context 'with a block' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + files = [] + + Dir.each_child(temporary_directory, encoding: 'UTF-8') { |it| files << it } + + expect(files).to contain_exactly('file1', 'file2', 'file3') + end + end + + it 'allows signals to arrive inside the user block' do + test_with_monkey_patch do + ran_assertion = false + + Dir.each_child(temporary_directory, encoding: 'UTF-8') do + expect_sigprof_to_be(:unblocked) + ran_assertion = true + end + + expect(ran_assertion).to be true + end + end + end + + context 'without a block' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + expect(Dir.each_child(temporary_directory, encoding: 'UTF-8').to_a).to include('file1', 'file2', 'file3') + end + end + end + end + + describe '.empty?' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + expect(Dir.empty?(temporary_directory)).to be false + end + end + end + + describe '.entries' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + expect(Dir.entries(temporary_directory)).to contain_exactly('.', '..', 'file1', 'file2', 'file3') + end + end + end + + describe '.foreach' do + let(:expected_hold_resume_calls_count) { 1 + temporary_files_count + ['.', '..'].size } + + context 'with a block' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + files = [] + + Dir.foreach(temporary_directory, encoding: 'UTF-8') { |it| files << it } + + expect(files).to contain_exactly('file1', 'file2', 'file3', '.', '..') + end + end + + it 'allows signals to arrive inside the user block' do + test_with_monkey_patch do + ran_assertion = false + + Dir.foreach(temporary_directory, encoding: 'UTF-8') do + expect_sigprof_to_be(:unblocked) + ran_assertion = true + end + + expect(ran_assertion).to be true + end + end + end + + context 'without a block' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + expect(Dir.foreach(temporary_directory, encoding: 'UTF-8').to_a) + .to include('file1', 'file2', 'file3', '.', '..') + end + end + end + end + + describe '.glob' do + before do + File.open("#{temporary_directory}/.hidden_file1", 'w') { |f| f.write('.hidden_file1') } + end + + let(:expected_files_result) { ['.hidden_file1', 'file1', 'file2'] } + + context 'with a block' do + let(:expected_hold_resume_calls_count) { 1 + expected_files_result.size } + + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + files = [] + + Dir.glob(['*1', '*2'], base: temporary_directory, flags: File::FNM_DOTMATCH, sort: true) { |it| files << it } + + expect(files).to eq(expected_files_result) + end + end + + it 'allows signals to arrive inside the user block' do + test_with_monkey_patch do + ran_assertion = false + + Dir.glob(['*1', '*2'], base: temporary_directory, flags: File::FNM_DOTMATCH, sort: true) do + expect_sigprof_to_be(:unblocked) + ran_assertion = true + end + + expect(ran_assertion).to be true + end + end + end + + context 'without a block' do + # You may be wondering why this one has a call count of 1 when for instance .foreach and each_child have a call + # count of > 1. The difference is the "without a block" versions of those calls **return an enumerator** and + # the enumerator then just calls the block version when executed. + # + # This is not what happens with glob -- glob never returns an enumerator, so the "without a block" version + # does not get turned into a "with a block" call. + let(:expected_hold_resume_calls_count) { 1 } + + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + expect(Dir.glob(['*1', '*2'], base: temporary_directory, flags: File::FNM_DOTMATCH, sort: true)) + .to eq(expected_files_result) + end + end + end + end + + describe '.home' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + expect(Dir.home).to start_with('/') + end + end + end + end + + describe 'DirInstanceMonkeyPatches' do + let(:dir) { Dir.new(temporary_directory) } + + describe '#each' do + let(:expected_hold_resume_calls_count) { 1 + temporary_files_count + ['.', '..'].size } + + context 'with a block' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + files = [] + + dir.each { |it| files << it } + + expect(files).to contain_exactly('file1', 'file2', 'file3', '.', '..') + end + end + + it 'allows signals to arrive inside the user block' do + test_with_monkey_patch do + ran_assertion = false + + dir.each do + expect_sigprof_to_be(:unblocked) + ran_assertion = true + end + + expect(ran_assertion).to be true + end + end + end + + context 'without a block' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + expect(dir.each.to_a).to contain_exactly('file1', 'file2', 'file3', '.', '..') + end + end + end + end + + describe '#each_child' do + before { skip('API not available on Ruby 2.5') if RUBY_VERSION.start_with?('2.5.') } + + let(:expected_hold_resume_calls_count) { 1 + temporary_files_count } + + context 'with a block' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + files = [] + + dir.each_child { |it| files << it } + + expect(files).to contain_exactly('file1', 'file2', 'file3') + end + end + + it 'allows signals to arrive inside the user block' do + test_with_monkey_patch do + ran_assertion = false + + dir.each_child do + expect_sigprof_to_be(:unblocked) + ran_assertion = true + end + + expect(ran_assertion).to be true + end + end + end + + context 'without a block' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + expect(dir.each_child.to_a).to include('file1', 'file2', 'file3') + end + end + end + end + + describe '#children' do + before { skip('API not available on Ruby 2.5') if RUBY_VERSION.start_with?('2.5.') } + + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + expect(dir.children).to contain_exactly('file1', 'file2', 'file3') + end + end + end + + describe '#tell' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + expect(dir.tell).to be_a_kind_of(Integer) + end + end + end + + describe '#pos' do + it 'matches the ruby behavior without monkey patching' do + test_with_and_without_monkey_patch do + expect(dir.pos).to be_a_kind_of(Integer) + end + end + end + end + + def test_with_and_without_monkey_patch(&testcase) + yield + test_with_monkey_patch(&testcase) + end + + def test_with_monkey_patch(in_fork: true, &testcase) + wrapped_testcase = proc do + RSpec::Mocks.space.proxy_for(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).reset + + allow(Datadog::Profiling::Collectors::CpuAndWallTimeWorker) + .to receive(:_native_hold_interruptions).and_call_original + allow(Datadog::Profiling::Collectors::CpuAndWallTimeWorker) + .to receive(:_native_resume_interruptions).and_call_original + + Datadog::Profiling::Ext::DirMonkeyPatches.apply! + yield + + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker) + .to have_received(:_native_hold_interruptions).exactly(expected_hold_resume_calls_count).times + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker) + .to have_received(:_native_resume_interruptions).exactly(expected_hold_resume_calls_count).times + end + + if in_fork + expect_in_fork(&wrapped_testcase) + else + wrapped_testcase.call + end + end + + def expect_sigprof_to_be(state) + raise ArgumentError unless [:blocked, :unblocked].include?(state) + + expect( + Datadog::Profiling::Collectors::CpuAndWallTimeWorker::Testing._native_is_sigprof_blocked_in_current_thread + ).to be(state == :blocked), "Sigprof was expected to be #{state}, but it's actually not" + end +end From 43a3e5ee5b4d79630a1368cc14f7a6ffe8f556b3 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 14 Jun 2024 12:17:50 +0100 Subject: [PATCH 11/21] Add comments to make Rubocop happy --- lib/datadog/profiling/component.rb | 2 +- lib/datadog/profiling/ext/dir_monkey_patches.rb | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index d6504a7582a..ed753e2dab4 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -453,7 +453,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) private_class_method def self.dir_interruption_workaround_enabled?(settings, no_signals_workaround_enabled) return false if no_signals_workaround_enabled - # Note: In the future this method will evolve to check for Ruby versions affected and not apply the workaround + # NOTE: In the future this method will evolve to check for Ruby versions affected and not apply the workaround # when it's not needed but currently all known Ruby versions are affected. settings.profiling.advanced.dir_interruption_workaround_enabled diff --git a/lib/datadog/profiling/ext/dir_monkey_patches.rb b/lib/datadog/profiling/ext/dir_monkey_patches.rb index 5184307c027..243acfb2c6f 100644 --- a/lib/datadog/profiling/ext/dir_monkey_patches.rb +++ b/lib/datadog/profiling/ext/dir_monkey_patches.rb @@ -25,6 +25,7 @@ def self.apply! end end + # Monkey patches for Dir.singleton_class. See DirMonkeyPatches above for more details. module DirClassMonkeyPatches def [](*args, &block) Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions @@ -142,6 +143,7 @@ def home(*args, &block) ruby2_keywords :home if respond_to?(:ruby2_keywords, true) end + # Monkey patches for Dir. See DirMonkeyPatches above for more details. module DirInstanceMonkeyPatches def each(*args, &block) if block From 9152470ad34b9abaf05ed4a0de4c0f6369fd9da3 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 14 Jun 2024 14:46:11 +0100 Subject: [PATCH 12/21] Add nice description to dir interruption workaround setting ...that mentions https://github.com/DataDog/dd-trace-rb/issues/3450 . --- lib/datadog/core/configuration/settings.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index 6b0d3917c6c..71e43d89f8d 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -397,7 +397,13 @@ def initialize(*_) end end - # TODO: description + # The profiler gathers data by sending `SIGPROF` unix signals to Ruby application threads. + # + # We've discovered that this can trigger a bug in a number of Ruby APIs in the `Dir` class, as + # described in https://github.com/DataDog/dd-trace-rb/issues/3450 . This workaround prevents the issue + # from happening by monkey patching the affected APIs. + # + # (In the future, once a fix lands upstream, we'll disable this workaround for Rubies that don't need it) # # @default `DD_PROFILING_DIR_INTERRUPTION_WORKAROUND_ENABLED` environment variable as a boolean, # otherwise `true` From b30ca799eff15e733b6b86029557f30a92dae9bf Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 14 Jun 2024 14:53:06 +0100 Subject: [PATCH 13/21] Tweak spec for compatibility with Ruby 2.7 Looks like the `sort:` was added later, and it wasn't particularly critical to what we were testing so I chose to omit it. --- .../datadog/profiling/ext/dir_monkey_patches_spec.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb b/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb index c634baa7a06..c6cc893bf10 100644 --- a/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb +++ b/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb @@ -32,7 +32,7 @@ describe '.[]' do it 'matches the ruby behavior without monkey patching' do test_with_and_without_monkey_patch do - expect(Dir['*1', '*2', base: temporary_directory, sort: true]).to eq ['file1', 'file2'] + expect(Dir['*1', '*2', base: temporary_directory]).to contain_exactly('file1', 'file2') end end end @@ -152,9 +152,9 @@ test_with_and_without_monkey_patch do files = [] - Dir.glob(['*1', '*2'], base: temporary_directory, flags: File::FNM_DOTMATCH, sort: true) { |it| files << it } + Dir.glob(['*1', '*2'], base: temporary_directory, flags: File::FNM_DOTMATCH) { |it| files << it } - expect(files).to eq(expected_files_result) + expect(files).to contain_exactly(*expected_files_result) end end @@ -162,7 +162,7 @@ test_with_monkey_patch do ran_assertion = false - Dir.glob(['*1', '*2'], base: temporary_directory, flags: File::FNM_DOTMATCH, sort: true) do + Dir.glob(['*1', '*2'], base: temporary_directory, flags: File::FNM_DOTMATCH) do expect_sigprof_to_be(:unblocked) ran_assertion = true end @@ -183,8 +183,8 @@ it 'matches the ruby behavior without monkey patching' do test_with_and_without_monkey_patch do - expect(Dir.glob(['*1', '*2'], base: temporary_directory, flags: File::FNM_DOTMATCH, sort: true)) - .to eq(expected_files_result) + expect(Dir.glob(['*1', '*2'], base: temporary_directory, flags: File::FNM_DOTMATCH)) + .to contain_exactly(*expected_files_result) end end end From d6eebe9810fc41622040f620e11326ded34eceb8 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 14 Jun 2024 14:56:13 +0100 Subject: [PATCH 14/21] Load `DirMonkeyPatches` before referencing it to avoid failing on JRuby --- spec/datadog/profiling/ext/dir_monkey_patches_spec.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb b/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb index c6cc893bf10..5777f9eb4b3 100644 --- a/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb +++ b/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb @@ -1,6 +1,7 @@ require 'datadog/profiling/spec_helper' require 'datadog/profiling/collectors/cpu_and_wall_time_worker' +require 'datadog/profiling/ext/dir_monkey_patches' # NOTE: Specs in this file are written so as to not leave the DirMonkeyPatches loaded into the Ruby VM after this # test executes. They do this by only applying these monkey patches in a separate process. From 30601a25a2d8dc79599425d3a9194f51b68e082d Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 14 Jun 2024 15:37:08 +0100 Subject: [PATCH 15/21] Tweak spec to remove assumption that no signals workaround is disabled by default This broke Ruby 2.5, which enables it by default. --- spec/datadog/profiling/component_spec.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index d0ff00ac63a..0ac649573fd 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -589,6 +589,10 @@ end describe 'dir interruption workaround' do + let(:no_signals_workaround_enabled) { false } + + before { expect(described_class).to receive(:no_signals_workaround_enabled?).and_return(no_signals_workaround_enabled) } + it 'is enabled by default' do expect(Datadog::Profiling::Ext::DirMonkeyPatches).to receive(:apply!) @@ -596,7 +600,7 @@ end context 'when the no signals workaround is enabled' do - before { expect(described_class).to receive(:no_signals_workaround_enabled?).and_return(true) } + let(:no_signals_workaround_enabled) { true } it 'is not applied' do expect(Datadog::Profiling::Ext::DirMonkeyPatches).to_not receive(:apply!) From 9ad82fe6797923ed09b15978e9e56bf176724aee Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 14 Jun 2024 15:43:42 +0100 Subject: [PATCH 16/21] Make rubocop happy --- spec/datadog/profiling/component_spec.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index 0ac649573fd..4523dfd4082 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -591,7 +591,9 @@ describe 'dir interruption workaround' do let(:no_signals_workaround_enabled) { false } - before { expect(described_class).to receive(:no_signals_workaround_enabled?).and_return(no_signals_workaround_enabled) } + before do + expect(described_class).to receive(:no_signals_workaround_enabled?).and_return(no_signals_workaround_enabled) + end it 'is enabled by default' do expect(Datadog::Profiling::Ext::DirMonkeyPatches).to receive(:apply!) From 8099c94bee0519da8d708821d46feef7449bfe48 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 18 Jun 2024 11:41:17 +0100 Subject: [PATCH 17/21] Tweak naming of functions to clarify that it's signals being held The interruptions part is just a consequence of signal delivery. --- .../profiler_hold_resume_interruptions.rb | 4 +- .../collectors_cpu_and_wall_time_worker.c | 12 +-- .../profiling/ext/dir_monkey_patches.rb | 76 +++++++++---------- .../collectors/cpu_and_wall_time_worker.rbs | 4 +- .../cpu_and_wall_time_worker_spec.rb | 6 +- .../profiling/ext/dir_monkey_patches_spec.rb | 12 +-- 6 files changed, 57 insertions(+), 57 deletions(-) diff --git a/benchmarks/profiler_hold_resume_interruptions.rb b/benchmarks/profiler_hold_resume_interruptions.rb index 657fbe4262c..02f22dfca06 100644 --- a/benchmarks/profiler_hold_resume_interruptions.rb +++ b/benchmarks/profiler_hold_resume_interruptions.rb @@ -26,8 +26,8 @@ def run_benchmark ) x.report("hold / resume") do - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end x.save! 'profiler_hold_resume_interruptions-results.json' unless VALIDATE_BENCHMARK_MODE diff --git a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index 80637899340..dff70397c16 100644 --- a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -222,8 +222,8 @@ static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self); static VALUE rescued_sample_allocation(VALUE tracepoint_data); static void delayed_error(struct cpu_and_wall_time_worker_state *state, const char *error); static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg); -static VALUE _native_hold_interruptions(DDTRACE_UNUSED VALUE self); -static VALUE _native_resume_interruptions(DDTRACE_UNUSED VALUE self); +static VALUE _native_hold_signals(DDTRACE_UNUSED VALUE self); +static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self); // Note on sampler global state safety: // @@ -287,8 +287,8 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) { rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_allocation_count", _native_allocation_count, 0); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_is_running?", _native_is_running, 1); rb_define_singleton_method(testing_module, "_native_current_sigprof_signal_handler", _native_current_sigprof_signal_handler, 0); - rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_hold_interruptions", _native_hold_interruptions, 0); - rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_resume_interruptions", _native_resume_interruptions, 0); + rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_hold_signals", _native_hold_signals, 0); + rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_resume_signals", _native_resume_signals, 0); // TODO: Remove `_native_is_running` from `testing_module` (should be in class) once `prof-correctness` has been updated to not need it rb_define_singleton_method(testing_module, "_native_is_running?", _native_is_running, 1); rb_define_singleton_method(testing_module, "_native_install_testing_signal_handler", _native_install_testing_signal_handler, 0); @@ -1166,14 +1166,14 @@ static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VA // Masks SIGPROF interruptions for the current thread. Please don't use this -- you may end up with incomplete // profiling data. -static VALUE _native_hold_interruptions(DDTRACE_UNUSED VALUE self) { +static VALUE _native_hold_signals(DDTRACE_UNUSED VALUE self) { block_sigprof_signal_handler_from_running_in_current_thread(); return Qtrue; } // Unmasks SIGPROF interruptions for the current thread. If there's a pending sample, it'll be triggered inside this // method. -static VALUE _native_resume_interruptions(DDTRACE_UNUSED VALUE self) { +static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self) { unblock_sigprof_signal_handler_from_running_in_current_thread(); return Qtrue; } diff --git a/lib/datadog/profiling/ext/dir_monkey_patches.rb b/lib/datadog/profiling/ext/dir_monkey_patches.rb index 243acfb2c6f..b6e9279ce0b 100644 --- a/lib/datadog/profiling/ext/dir_monkey_patches.rb +++ b/lib/datadog/profiling/ext/dir_monkey_patches.rb @@ -28,18 +28,18 @@ def self.apply! # Monkey patches for Dir.singleton_class. See DirMonkeyPatches above for more details. module DirClassMonkeyPatches def [](*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end ruby2_keywords :[] if respond_to?(:ruby2_keywords, true) def children(*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end ruby2_keywords :children if respond_to?(:ruby2_keywords, true) @@ -47,17 +47,17 @@ def each_child(*args, &block) if block begin # <-- Begin critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super do |entry_name| - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals # <-- We're safe now while running customer code yield entry_name # <-- We'll go back to the Dir internals, critical region again - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals end ensure # <-- End critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end else # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the @@ -68,18 +68,18 @@ def each_child(*args, &block) ruby2_keywords :each_child if respond_to?(:ruby2_keywords, true) def empty?(*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end ruby2_keywords :empty? if respond_to?(:ruby2_keywords, true) def entries(*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end ruby2_keywords :entries if respond_to?(:ruby2_keywords, true) @@ -87,17 +87,17 @@ def foreach(*args, &block) if block begin # <-- Begin critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super do |entry_name| - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals # <-- We're safe now while running customer code yield entry_name # <-- We'll go back to the Dir internals, critical region again - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals end ensure # <-- End critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end else # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the @@ -111,34 +111,34 @@ def glob(*args, &block) if block begin # <-- Begin critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super do |entry_name| - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals # <-- We're safe now while running customer code yield entry_name # <-- We'll go back to the Dir internals, critical region again - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals end ensure # <-- End critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end else begin - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end end end ruby2_keywords :glob if respond_to?(:ruby2_keywords, true) def home(*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end ruby2_keywords :home if respond_to?(:ruby2_keywords, true) end @@ -149,16 +149,16 @@ def each(*args, &block) if block begin # <-- Begin critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super do |entry_name| - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals # <-- We're safe now while running customer code yield entry_name # <-- We'll go back to the Dir internals, critical region again - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals end ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions # <-- End critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals # <-- End critical region end else # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the @@ -173,17 +173,17 @@ def each_child(*args, &block) if block begin # <-- Begin critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super do |entry_name| - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals # <-- We're safe now while running customer code yield entry_name # <-- We'll go back to the Dir internals, critical region again - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals end ensure # <-- End critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end else # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the @@ -194,27 +194,27 @@ def each_child(*args, &block) ruby2_keywords :each_child if respond_to?(:ruby2_keywords, true) def children(*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end ruby2_keywords :children if respond_to?(:ruby2_keywords, true) end def tell(*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end ruby2_keywords :tell if respond_to?(:ruby2_keywords, true) def pos(*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_interruptions + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end ruby2_keywords :pos if respond_to?(:ruby2_keywords, true) end diff --git a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs index 31d4992f579..bea60e8971b 100644 --- a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs +++ b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs @@ -44,8 +44,8 @@ module Datadog def self._native_is_running?: (CpuAndWallTimeWorker self_instance) -> bool def self._native_allocation_count: () -> ::Integer? def self._native_sampling_loop: (CpuAndWallTimeWorker self_instance) -> void - def self._native_hold_interruptions: () -> void - def self._native_resume_interruptions: () -> void + def self._native_hold_signals: () -> void + def self._native_resume_signals: () -> void def wait_until_running: (timeout_seconds: ::Integer?) -> true end diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index 121f9c9b128..633e1271ac6 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -1176,15 +1176,15 @@ end end - describe '._native_hold_interruptions and _native_resume_interruptions' do + describe '._native_hold_signals and _native_resume_signals' do it 'blocks/unblocks interruptions for the current thread' do expect(described_class::Testing._native_is_sigprof_blocked_in_current_thread).to be false - described_class._native_hold_interruptions + described_class._native_hold_signals expect(described_class::Testing._native_is_sigprof_blocked_in_current_thread).to be true - described_class._native_resume_interruptions + described_class._native_resume_signals expect(described_class::Testing._native_is_sigprof_blocked_in_current_thread).to be false end diff --git a/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb b/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb index 5777f9eb4b3..4af8c55f73c 100644 --- a/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb +++ b/spec/datadog/profiling/ext/dir_monkey_patches_spec.rb @@ -13,8 +13,8 @@ File.open("#{temporary_directory}/file2", 'w') { |f| f.write('file2') } File.open("#{temporary_directory}/file3", 'w') { |f| f.write('file3') } - expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to_not receive(:_native_hold_interruptions) - expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to_not receive(:_native_resume_interruptions) + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to_not receive(:_native_hold_signals) + expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to_not receive(:_native_resume_signals) end let(:temporary_directory) { Dir.mktmpdir } @@ -316,17 +316,17 @@ def test_with_monkey_patch(in_fork: true, &testcase) RSpec::Mocks.space.proxy_for(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).reset allow(Datadog::Profiling::Collectors::CpuAndWallTimeWorker) - .to receive(:_native_hold_interruptions).and_call_original + .to receive(:_native_hold_signals).and_call_original allow(Datadog::Profiling::Collectors::CpuAndWallTimeWorker) - .to receive(:_native_resume_interruptions).and_call_original + .to receive(:_native_resume_signals).and_call_original Datadog::Profiling::Ext::DirMonkeyPatches.apply! yield expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker) - .to have_received(:_native_hold_interruptions).exactly(expected_hold_resume_calls_count).times + .to have_received(:_native_hold_signals).exactly(expected_hold_resume_calls_count).times expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker) - .to have_received(:_native_resume_interruptions).exactly(expected_hold_resume_calls_count).times + .to have_received(:_native_resume_signals).exactly(expected_hold_resume_calls_count).times end if in_fork From ba0d6b511c95098f2d4d336c788013b0295bc4ae Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 18 Jun 2024 14:56:53 +0100 Subject: [PATCH 18/21] Avoid ruby2_keywords and instead have variants for Ruby 2 and 3 Loic suggested this may provide better compatibility, and it seems to make sense given we're monkey patching a quite core Ruby class. This diff is way easier to review without whitespace. What I did was copy/paste the module, remove the `ruby2_keywords`, add `**kwargs` to the Ruby 3 variants and clean up any things that don't make sense for Ruby 3 (e.g. testing for 2.5 for some methods). --- .../profiling/ext/dir_monkey_patches.rb | 423 ++++++++++++------ .../profiling/ext/dir_monkey_patches.rbs | 11 +- 2 files changed, 300 insertions(+), 134 deletions(-) diff --git a/lib/datadog/profiling/ext/dir_monkey_patches.rb b/lib/datadog/profiling/ext/dir_monkey_patches.rb index b6e9279ce0b..9af75c406c8 100644 --- a/lib/datadog/profiling/ext/dir_monkey_patches.rb +++ b/lib/datadog/profiling/ext/dir_monkey_patches.rb @@ -2,6 +2,7 @@ module Datadog module Profiling + # Monkey patches needed for profiler features and compatibility module Ext # All Ruby versions as of this writing have bugs in the dir class implementation, causing issues such as # https://github.com/DataDog/dd-trace-rb/issues/3450 . @@ -25,151 +26,325 @@ def self.apply! end end - # Monkey patches for Dir.singleton_class. See DirMonkeyPatches above for more details. - module DirClassMonkeyPatches - def [](*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals - super - ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals - end - ruby2_keywords :[] if respond_to?(:ruby2_keywords, true) + if RUBY_VERSION.start_with?('2.') + # Monkey patches for Dir.singleton_class (Ruby 2 version). See DirMonkeyPatches above for more details. + module DirClassMonkeyPatches + def [](*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end - def children(*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals - super - ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals - end - ruby2_keywords :children if respond_to?(:ruby2_keywords, true) + def children(*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end - def each_child(*args, &block) - if block - begin - # <-- Begin critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals - super do |entry_name| - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals - # <-- We're safe now while running customer code - yield entry_name - # <-- We'll go back to the Dir internals, critical region again + def each_child(*args, &block) + if block + begin + # <-- Begin critical region Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super do |entry_name| + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + # <-- We're safe now while running customer code + yield entry_name + # <-- We'll go back to the Dir internals, critical region again + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + end + ensure + # <-- End critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end - ensure - # <-- End critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + else + # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the + # other branch once it gets going. + super end - else - # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the - # other branch once it gets going. + end + + def empty?(*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end - end - ruby2_keywords :each_child if respond_to?(:ruby2_keywords, true) - def empty?(*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals - super - ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals - end - ruby2_keywords :empty? if respond_to?(:ruby2_keywords, true) + def entries(*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end - def entries(*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals - super - ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals - end - ruby2_keywords :entries if respond_to?(:ruby2_keywords, true) + def foreach(*args, &block) + if block + begin + # <-- Begin critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super do |entry_name| + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + # <-- We're safe now while running customer code + yield entry_name + # <-- We'll go back to the Dir internals, critical region again + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + end + ensure + # <-- End critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end + else + # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the + # other branch once it gets going. + super + end + end - def foreach(*args, &block) - if block - begin - # <-- Begin critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals - super do |entry_name| + def glob(*args, &block) + if block + begin + # <-- Begin critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super do |entry_name| + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + # <-- We're safe now while running customer code + yield entry_name + # <-- We'll go back to the Dir internals, critical region again + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + end + ensure + # <-- End critical region Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals - # <-- We're safe now while running customer code - yield entry_name - # <-- We'll go back to the Dir internals, critical region again + end + else + begin Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end - ensure - # <-- End critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end - else - # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the - # other branch once it gets going. + end + + def home(*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end end - ruby2_keywords :foreach if respond_to?(:ruby2_keywords, true) + else + # Monkey patches for Dir.singleton_class (Ruby 3 version). See DirMonkeyPatches above for more details. + module DirClassMonkeyPatches + def [](*args, **kwargs, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end - def glob(*args, &block) - if block - begin - # <-- Begin critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals - super do |entry_name| - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals - # <-- We're safe now while running customer code - yield entry_name - # <-- We'll go back to the Dir internals, critical region again + def children(*args, **kwargs, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end + + def each_child(*args, **kwargs, &block) + if block + begin + # <-- Begin critical region Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super do |entry_name| + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + # <-- We're safe now while running customer code + yield entry_name + # <-- We'll go back to the Dir internals, critical region again + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + end + ensure + # <-- End critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end - ensure - # <-- End critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + else + # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the + # other branch once it gets going. + super end - else - begin - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + end + + def empty?(*args, **kwargs, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end + + def entries(*args, **kwargs, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end + + def foreach(*args, **kwargs, &block) + if block + begin + # <-- Begin critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super do |entry_name| + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + # <-- We're safe now while running customer code + yield entry_name + # <-- We'll go back to the Dir internals, critical region again + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + end + ensure + # <-- End critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end + else + # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the + # other branch once it gets going. super - ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end end - end - ruby2_keywords :glob if respond_to?(:ruby2_keywords, true) - def home(*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals - super - ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + def glob(*args, **kwargs, &block) + if block + begin + # <-- Begin critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super do |entry_name| + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + # <-- We're safe now while running customer code + yield entry_name + # <-- We'll go back to the Dir internals, critical region again + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + end + ensure + # <-- End critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end + else + begin + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end + end + end + + def home(*args, **kwargs, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end end - ruby2_keywords :home if respond_to?(:ruby2_keywords, true) end - # Monkey patches for Dir. See DirMonkeyPatches above for more details. - module DirInstanceMonkeyPatches - def each(*args, &block) - if block - begin - # <-- Begin critical region - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals - super do |entry_name| - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals - # <-- We're safe now while running customer code - yield entry_name - # <-- We'll go back to the Dir internals, critical region again + if RUBY_VERSION.start_with?('2.') + # Monkey patches for Dir (Ruby 2 version). See DirMonkeyPatches above for more details. + module DirInstanceMonkeyPatches + def each(*args, &block) + if block + begin + # <-- Begin critical region Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super do |entry_name| + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + # <-- We're safe now while running customer code + yield entry_name + # <-- We'll go back to the Dir internals, critical region again + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + end + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals # <-- End critical region + end + else + # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the + # other branch once it gets going. + super + end + end + + unless RUBY_VERSION.start_with?('2.5.') # This is Ruby 2.6+ + def each_child(*args, &block) + if block + begin + # <-- Begin critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super do |entry_name| + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + # <-- We're safe now while running customer code + yield entry_name + # <-- We'll go back to the Dir internals, critical region again + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + end + ensure + # <-- End critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end + else + # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the + # other branch once it gets going. + super end + end + + def children(*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals # <-- End critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end - else - # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the - # other branch once it gets going. + end + + def tell(*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end + + def pos(*args, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end end - ruby2_keywords :each if respond_to?(:ruby2_keywords, true) + else + # Monkey patches for Dir (Ruby 3 version). See DirMonkeyPatches above for more details. + module DirInstanceMonkeyPatches + def each(*args, **kwargs, &block) + if block + begin + # <-- Begin critical region + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super do |entry_name| + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + # <-- We're safe now while running customer code + yield entry_name + # <-- We'll go back to the Dir internals, critical region again + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + end + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals # <-- End critical region + end + else + # This returns an enumerator. We don't want/need to intercede here, the enumerator will eventually call the + # other branch once it gets going. + super + end + end - unless RUBY_VERSION.start_with?('2.5.') # This is Ruby 2.6+ - def each_child(*args, &block) + def each_child(*args, **kwargs, &block) if block begin # <-- Begin critical region @@ -191,32 +366,28 @@ def each_child(*args, &block) super end end - ruby2_keywords :each_child if respond_to?(:ruby2_keywords, true) - def children(*args, &block) + def children(*args, **kwargs, &block) Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals super ensure Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end - ruby2_keywords :children if respond_to?(:ruby2_keywords, true) - end - def tell(*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals - super - ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals - end - ruby2_keywords :tell if respond_to?(:ruby2_keywords, true) + def tell(*args, **kwargs, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end - def pos(*args, &block) - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals - super - ensure - Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + def pos(*args, **kwargs, &block) + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_hold_signals + super + ensure + Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals + end end - ruby2_keywords :pos if respond_to?(:ruby2_keywords, true) end end end diff --git a/sig/datadog/profiling/ext/dir_monkey_patches.rbs b/sig/datadog/profiling/ext/dir_monkey_patches.rbs index 81e98dc8822..e6cb306dbc0 100644 --- a/sig/datadog/profiling/ext/dir_monkey_patches.rbs +++ b/sig/datadog/profiling/ext/dir_monkey_patches.rbs @@ -1,11 +1,3 @@ -# TODO: @ivoanjo As of June 2024 upstream rbs signatures do not include this method on module and even one of their -# tests adds this directly -# ( https://github.com/ruby/rbs/blob/28849d4522ba18f45d4f6edfca89a515a4816373/sig/unit_test/spy.rbs#L26C1-L28C4 ) -# so I'm taking the same approach. -class Module - def ruby2_keywords: (*Symbol) -> void -end - module Datadog module Profiling module Ext @@ -16,6 +8,9 @@ module Datadog # The signatures below are somehow inspired on # https://github.com/ruby/rbs/blob/28849d4522ba18f45d4f6edfca89a515a4816373/core/dir.rbs + # NOTE: Because we have variants for Ruby 2.x and 3.x of the code, there's a bunch of warnings from steep about + # that ("Unknown variable: (kwrestarg :kwargs)"). Any suggestions on how to clean that up are welcome :/ + module DirClassMonkeyPatches def []: (*untyped) -> Array[String] def children: (*untyped) -> Array[String] From a6366f9297ed23e813ffdc89e8f8970e62955266 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 19 Jun 2024 09:21:10 +0100 Subject: [PATCH 19/21] Add test to make sure we don't forget to register new profiler benchmarks ...which I totally was almost forgetting. --- spec/datadog/profiling/validate_benchmarks_spec.rb | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/spec/datadog/profiling/validate_benchmarks_spec.rb b/spec/datadog/profiling/validate_benchmarks_spec.rb index bec1576c986..5e3b123fb7f 100644 --- a/spec/datadog/profiling/validate_benchmarks_spec.rb +++ b/spec/datadog/profiling/validate_benchmarks_spec.rb @@ -9,15 +9,24 @@ end end - [ + benchmarks_to_validate = [ 'profiler_sample_loop_v2', 'profiler_http_transport', 'profiler_sample_serialize', 'profiler_memory_sample_serialize', 'profiler_gc' - ].each do |benchmark| + ].freeze + + benchmarks_to_validate.each do |benchmark| describe benchmark do it('runs without raising errors') { expect_in_fork { load "./benchmarks/#{benchmark}.rb" } } end end + + # This test validates that we don't forget to add new benchmarks to benchmarks_to_validate + it 'tests all expected benchmarks in the benchmarks folder' do + all_benchmarks = Dir['./benchmarks/profiler_*'].map { |it| it.gsub('./benchmarks/', '').gsub('.rb', '') } + + expect(benchmarks_to_validate).to contain_exactly(*all_benchmarks) + end end From 3501237819f08077eec7780fb1670b60a3b280d7 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 19 Jun 2024 09:22:12 +0100 Subject: [PATCH 20/21] Add new benchmark to validate_benchmarks_spec --- spec/datadog/profiling/validate_benchmarks_spec.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spec/datadog/profiling/validate_benchmarks_spec.rb b/spec/datadog/profiling/validate_benchmarks_spec.rb index 5e3b123fb7f..74cad9583bb 100644 --- a/spec/datadog/profiling/validate_benchmarks_spec.rb +++ b/spec/datadog/profiling/validate_benchmarks_spec.rb @@ -14,7 +14,8 @@ 'profiler_http_transport', 'profiler_sample_serialize', 'profiler_memory_sample_serialize', - 'profiler_gc' + 'profiler_gc', + 'profiler_hold_resume_interruptions', ].freeze benchmarks_to_validate.each do |benchmark| From 259e134092eae57cb2ccbea028adc8e8d0fc7a19 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 2 Jul 2024 09:41:10 +0100 Subject: [PATCH 21/21] Add clarification for expected behavior of yield --- lib/datadog/profiling/ext/dir_monkey_patches.rb | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/datadog/profiling/ext/dir_monkey_patches.rb b/lib/datadog/profiling/ext/dir_monkey_patches.rb index 9af75c406c8..efc34194055 100644 --- a/lib/datadog/profiling/ext/dir_monkey_patches.rb +++ b/lib/datadog/profiling/ext/dir_monkey_patches.rb @@ -6,6 +6,7 @@ module Profiling module Ext # All Ruby versions as of this writing have bugs in the dir class implementation, causing issues such as # https://github.com/DataDog/dd-trace-rb/issues/3450 . + # See also https://bugs.ruby-lang.org/issues/20586 for more details. # # This monkey patch for the Ruby `Dir` class works around these bugs for affected Ruby versions by temporarily # blocking the profiler from interrupting system calls. @@ -43,6 +44,12 @@ def children(*args, &block) Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end + # NOTE: When wrapping methods that yield, it's OK if the `yield` raises an exception while signals are + # enabled. This is because: + # * We can call `_native_resume_signals` many times in a row, both because it's idempotent, as well as it's + # very low overhead (see benchmarks/profiler_hold_resume_interruptions.rb) + # * When an exception is being raised, the iteration will stop anyway, so there's no longer a concern of a + # signal causing Ruby to return an incorrect value def each_child(*args, &block) if block begin @@ -80,6 +87,7 @@ def entries(*args, &block) Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end + # See note on methods that yield above. def foreach(*args, &block) if block begin @@ -103,6 +111,7 @@ def foreach(*args, &block) end end + # See note on methods that yield above. def glob(*args, &block) if block begin @@ -153,6 +162,7 @@ def children(*args, **kwargs, &block) Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end + # See note on methods that yield above. def each_child(*args, **kwargs, &block) if block begin @@ -190,6 +200,7 @@ def entries(*args, **kwargs, &block) Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_resume_signals end + # See note on methods that yield above. def foreach(*args, **kwargs, &block) if block begin @@ -213,6 +224,7 @@ def foreach(*args, **kwargs, &block) end end + # See note on methods that yield above. def glob(*args, **kwargs, &block) if block begin @@ -251,6 +263,7 @@ def home(*args, **kwargs, &block) if RUBY_VERSION.start_with?('2.') # Monkey patches for Dir (Ruby 2 version). See DirMonkeyPatches above for more details. module DirInstanceMonkeyPatches + # See note on methods that yield above. def each(*args, &block) if block begin @@ -274,6 +287,7 @@ def each(*args, &block) end unless RUBY_VERSION.start_with?('2.5.') # This is Ruby 2.6+ + # See note on methods that yield above. def each_child(*args, &block) if block begin @@ -322,6 +336,7 @@ def pos(*args, &block) else # Monkey patches for Dir (Ruby 3 version). See DirMonkeyPatches above for more details. module DirInstanceMonkeyPatches + # See note on methods that yield above. def each(*args, **kwargs, &block) if block begin @@ -344,6 +359,7 @@ def each(*args, **kwargs, &block) end end + # See note on methods that yield above. def each_child(*args, **kwargs, &block) if block begin