Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SDTEST-437] Auto test retries for RSpec #213

Merged
merged 8 commits into from
Aug 8, 2024
Merged
3 changes: 2 additions & 1 deletion lib/datadog/ci/configuration/components.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
require_relative "../test_optimisation/coverage/transport"
require_relative "../test_optimisation/coverage/writer"
require_relative "../test_retries/component"
require_relative "../test_retries/null_component"
require_relative "../test_visibility/component"
require_relative "../test_visibility/flush"
require_relative "../test_visibility/null_component"
Expand All @@ -35,7 +36,7 @@ def initialize(settings)
@test_visibility = TestVisibility::NullComponent.new
@git_tree_upload_worker = DummyWorker.new
@ci_remote = nil
@test_retries = nil
@test_retries = TestRetries::NullComponent.new

# Activate CI mode if enabled
if settings.ci.enabled
Expand Down
103 changes: 64 additions & 39 deletions lib/datadog/ci/contrib/rspec/example.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,50 +34,71 @@ def run(*args)

if ci_queue?
suite_name = "#{suite_name} (ci-queue running example [#{test_name}])"
test_suite_span = test_visibility_component.start_test_suite(suite_name)
ci_queue_test_span = test_visibility_component.start_test_suite(suite_name)
end

test_visibility_component.trace_test(
test_name,
suite_name,
tags: {
CI::Ext::Test::TAG_FRAMEWORK => Ext::FRAMEWORK,
CI::Ext::Test::TAG_FRAMEWORK_VERSION => CI::Contrib::RSpec::Integration.version.to_s,
CI::Ext::Test::TAG_SOURCE_FILE => Git::LocalRepository.relative_to_root(metadata[:file_path]),
CI::Ext::Test::TAG_SOURCE_START => metadata[:line_number].to_s,
CI::Ext::Test::TAG_PARAMETERS => Utils::TestRun.test_parameters(
metadata: {"scoped_id" => metadata[:scoped_id]}
)
},
service: datadog_configuration[:service_name]
) do |test_span|
test_span&.itr_unskippable! if metadata[CI::Ext::Test::ITR_UNSKIPPABLE_OPTION]

metadata[:skip] = CI::Ext::Test::ITR_TEST_SKIP_REASON if test_span&.skipped_by_itr?

result = super

case execution_result.status
when :passed
test_span&.passed!
test_suite_span&.passed!
when :failed
test_span&.failed!(exception: execution_result.exception)
test_suite_span&.failed!
else
# :pending or nil
test_span&.skipped!(
reason: execution_result.pending_message,
exception: execution_result.pending_exception
)

test_suite_span&.skipped!
# don't report test to RSpec::Core::Reporter until retries are done
@skip_reporting = true

test_retries_component.with_retries do |retry_callback|
test_visibility_component.trace_test(
test_name,
suite_name,
tags: {
CI::Ext::Test::TAG_FRAMEWORK => Ext::FRAMEWORK,
CI::Ext::Test::TAG_FRAMEWORK_VERSION => CI::Contrib::RSpec::Integration.version.to_s,
CI::Ext::Test::TAG_SOURCE_FILE => Git::LocalRepository.relative_to_root(metadata[:file_path]),
CI::Ext::Test::TAG_SOURCE_START => metadata[:line_number].to_s,
CI::Ext::Test::TAG_PARAMETERS => Utils::TestRun.test_parameters(
metadata: {"scoped_id" => metadata[:scoped_id]}
)
},
service: datadog_configuration[:service_name]
) do |test_span|
test_span&.itr_unskippable! if metadata[CI::Ext::Test::ITR_UNSKIPPABLE_OPTION]

metadata[:skip] = CI::Ext::Test::ITR_TEST_SKIP_REASON if test_span&.skipped_by_itr?

# before each run remove any previous exception
@exception = nil

super

case execution_result.status
when :passed
test_span&.passed!
when :failed
test_span&.failed!(exception: execution_result.exception)
else
# :pending or nil
test_span&.skipped!(
reason: execution_result.pending_message,
exception: execution_result.pending_exception
)
end

retry_callback.call(test_span)
end
end

test_suite_span&.finish
# after retries are done, we can report the test to RSpec
@skip_reporting = false

result
end
# this is a special case for ci-queue, we need to finish the test suite span
ci_queue_test_span&.finish

# Finish spec with latest retry's result
# TODO: when implementing new test retries make sure to clean @exception before calling this method
# if test passed at least once
finish(reporter)
end

def finish(reporter)
# by default finish test but do not report it to RSpec::Core::Reporter
# it is going to be reported once after retries are done
return super unless @skip_reporting

super(::RSpec::Core::NullReporter)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we're delaying the reporting until after the retries, then isn't it true that we would potentially hide the fact that there were flakes? or will the final trace data include the flake information?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is hiding reporting (for now) to RSpec formatter that prints the final message and tracks tests that failed (I might fix it in later versions to format retries correctly).

As tested in instrumentation_spec every retry is traced as a separate test span, so the test will be correctly identified as flaky. Also, test suite and test session statuses will be correctly reported as pass.

end

private
Expand All @@ -103,6 +124,10 @@ def test_visibility_component
Datadog.send(:components).test_visibility
end

def test_retries_component
Datadog.send(:components).test_retries
end

def ci_queue?
!!defined?(::RSpec::Queue::ExampleExtension) &&
self.class.ancestors.include?(::RSpec::Queue::ExampleExtension)
Expand Down
50 changes: 49 additions & 1 deletion lib/datadog/ci/test_retries/component.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
# frozen_string_literal: true

require_relative "strategy/no_retry"
require_relative "strategy/retry_failed"

module Datadog
module CI
module TestRetries
# Encapsulates the logic to enable test retries, including:
# - retrying failed tests - improve success rate of CI pipelines
# - retrying new tests - detect flaky tests as early as possible to prevent them from being merged
class Component
attr_reader :retry_failed_tests_enabled, :retry_failed_tests_max_attempts, :retry_failed_tests_total_limit
attr_reader :retry_failed_tests_enabled, :retry_failed_tests_max_attempts,
:retry_failed_tests_total_limit, :retry_failed_tests_count

def initialize(
retry_failed_tests_max_attempts:,
Expand All @@ -17,11 +21,55 @@ def initialize(
@retry_failed_tests_enabled = false
@retry_failed_tests_max_attempts = retry_failed_tests_max_attempts
@retry_failed_tests_total_limit = retry_failed_tests_total_limit
# counter that store the current number of failed tests retried
@retry_failed_tests_count = 0

@mutex = Mutex.new
end

def configure(library_settings)
@retry_failed_tests_enabled = library_settings.flaky_test_retries_enabled?
end

def with_retries(&block)
# @type var retry_strategy: Strategy::Base
retry_strategy = nil

test_finished_callback = lambda do |test_span|
if retry_strategy.nil?
# we always run test at least once and after first pass create a correct retry strategy
retry_strategy = build_strategy(test_span)
else
# after each retry we record the result, strategy will decide if we should retry again
retry_strategy&.record_retry(test_span)
end
end

loop do
yield test_finished_callback

break unless retry_strategy&.should_retry?
end
end

def build_strategy(test_span)
@mutex.synchronize do
if should_retry_failed_test?(test_span)
Datadog.logger.debug("Failed test retry starts")
@retry_failed_tests_count += 1

Strategy::RetryFailed.new(max_attempts: @retry_failed_tests_max_attempts)
else
Strategy::NoRetry.new
end
end
end

private

def should_retry_failed_test?(test_span)
@retry_failed_tests_enabled && !!test_span&.failed? && @retry_failed_tests_count < @retry_failed_tests_total_limit
end
end
end
end
Expand Down
28 changes: 28 additions & 0 deletions lib/datadog/ci/test_retries/null_component.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# frozen_string_literal: true

require_relative "component"

module Datadog
module CI
module TestRetries
class NullComponent < Component
attr_reader :retry_failed_tests_enabled, :retry_failed_tests_max_attempts, :retry_failed_tests_total_limit

def initialize
# enabled only by remote settings
@retry_failed_tests_enabled = false
@retry_failed_tests_max_attempts = 0
@retry_failed_tests_total_limit = 0
end

def configure(library_settings)
end

def with_retries(&block)
no_action = proc {}
yield no_action
end
end
end
end
end
19 changes: 19 additions & 0 deletions lib/datadog/ci/test_retries/strategy/base.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# frozen_string_literal: true

module Datadog
module CI
module TestRetries
module Strategy
class Base
def should_retry?
false
end

def record_retry(test_span)
test_span&.set_tag(Ext::Test::TAG_IS_RETRY, "true")
end
end
end
end
end
end
16 changes: 16 additions & 0 deletions lib/datadog/ci/test_retries/strategy/no_retry.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# frozen_string_literal: true

require_relative "base"

module Datadog
module CI
module TestRetries
module Strategy
class NoRetry < Base
def record_retry(test_span)
end
end
end
end
end
end
37 changes: 37 additions & 0 deletions lib/datadog/ci/test_retries/strategy/retry_failed.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# frozen_string_literal: true

require_relative "base"

require_relative "../../ext/test"

module Datadog
module CI
module TestRetries
module Strategy
class RetryFailed < Base
attr_reader :max_attempts

def initialize(max_attempts:)
@max_attempts = max_attempts

@attempts = 0
@passed_once = false
end

def should_retry?
@attempts < @max_attempts && !@passed_once
end

def record_retry(test_span)
super

@attempts += 1
@passed_once = true if test_span&.passed?

Datadog.logger.debug { "Retry Attempts [#{@attempts} / #{@max_attempts}], Passed: [#{@passed_once}]" }
end
end
end
end
end
end
5 changes: 3 additions & 2 deletions sig/datadog/ci/contrib/rspec/example.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ module Datadog
module RSpec
module Example
def self.included: (untyped base) -> untyped
module InstanceMethods
include ::RSpec::Core::Example
module InstanceMethods : ::RSpec::Core::Example
@skip_reporting: bool

def run: (untyped example_group_instance, untyped reporter) -> untyped

Expand All @@ -14,6 +14,7 @@ module Datadog
def fetch_top_level_example_group: () -> Hash[Symbol, untyped]
def datadog_configuration: () -> untyped
def test_visibility_component: () -> Datadog::CI::TestVisibility::Component
def test_retries_component: () -> Datadog::CI::TestRetries::Component
def ci_queue?: () -> bool
end
end
Expand Down
12 changes: 12 additions & 0 deletions sig/datadog/ci/test_retries/component.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,21 @@ module Datadog

attr_reader retry_failed_tests_total_limit: Integer

attr_reader retry_failed_tests_count: Integer

@mutex: Thread::Mutex

def initialize: (retry_failed_tests_max_attempts: Integer, retry_failed_tests_total_limit: Integer) -> void

def configure: (Datadog::CI::Remote::LibrarySettings library_settings) -> void

def with_retries: () { (untyped) -> void } -> void

def build_strategy: (Datadog::CI::Test test) -> Datadog::CI::TestRetries::Strategy::Base

private

def should_retry_failed_test?: (Datadog::CI::Test test) -> bool
end
end
end
Expand Down
25 changes: 25 additions & 0 deletions sig/datadog/ci/test_retries/null_component.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
module Datadog
module CI
module TestRetries
class NullComponent < Component
@retry_failed_tests_enabled: untyped

@retry_failed_tests_max_attempts: untyped

@retry_failed_tests_total_limit: untyped

attr_reader retry_failed_tests_enabled: untyped

attr_reader retry_failed_tests_max_attempts: untyped

attr_reader retry_failed_tests_total_limit: untyped

def initialize: () -> void

def configure: (untyped library_settings) -> nil

def with_retries: () { (untyped) -> untyped } -> untyped
end
end
end
end
13 changes: 13 additions & 0 deletions sig/datadog/ci/test_retries/strategy/base.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module Datadog
module CI
module TestRetries
module Strategy
class Base
def should_retry?: () -> bool

def record_retry: (Datadog::CI::Test test_span) -> void
end
end
end
end
end
Loading
Loading