Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix retry delay going negative for large retries with exponential delays #2164

Merged
merged 7 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions src/Polly.Core/Retry/RetryHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ internal static class RetryHelper

private const double ExponentialFactor = 2.0;

// Upper-bound to prevent overflow beyond TimeSpan.MaxValue. Potential truncation during conversion from double to long
// (as described at https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/builtin-types/numeric-conversions)
// is avoided by the arbitrary subtraction of 1,000.
private static readonly double MaxTimeSpanTicks = (double)TimeSpan.MaxValue.Ticks - 1_000;

public static bool IsValidDelay(TimeSpan delay) => delay >= TimeSpan.Zero;

public static TimeSpan GetRetryDelay(
Expand Down Expand Up @@ -101,20 +106,27 @@ private static TimeSpan DecorrelatedJitterBackoffV2(int attempt, TimeSpan baseDe
// This factor allows the median values to fall approximately at 1, 2, 4 etc seconds, instead of 1.4, 2.8, 5.6, 11.2.
const double RpScalingFactor = 1 / 1.4d;

// Upper-bound to prevent overflow beyond TimeSpan.MaxValue. Potential truncation during conversion from double to long
// (as described at https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/builtin-types/numeric-conversions)
// is avoided by the arbitrary subtraction of 1000. Validated by unit-test Backoff_should_not_overflow_to_give_negative_timespan.
double maxTimeSpanDouble = (double)TimeSpan.MaxValue.Ticks - 1000;

long targetTicksFirstDelay = baseDelay.Ticks;

double t = attempt + randomizer();
double next = Math.Pow(ExponentialFactor, t) * Math.Tanh(Math.Sqrt(PFactor * t));

// At t >=1024, the above will tend to infinity which would otherwise cause the
// ticks to go negative. See https://github.com/App-vNext/Polly/issues/2163.
if (double.IsInfinity(next))
{
prev = next;
return TimeSpan.FromTicks((long)MaxTimeSpanTicks);
}

double formulaIntrinsicValue = next - prev;
prev = next;

return TimeSpan.FromTicks((long)Math.Min(formulaIntrinsicValue * RpScalingFactor * targetTicksFirstDelay, maxTimeSpanDouble));
long ticks = (long)Math.Min(formulaIntrinsicValue * RpScalingFactor * targetTicksFirstDelay, MaxTimeSpanTicks);

Debug.Assert(ticks >= 0, "ticks cannot be negative");

return TimeSpan.FromTicks(ticks);
}

#pragma warning disable IDE0047 // Remove unnecessary parentheses which offer less mental gymnastics
Expand Down
2 changes: 2 additions & 0 deletions src/Polly.Core/Retry/RetryResilienceStrategy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ protected internal override async ValueTask<Outcome<T>> ExecuteCore<TState>(Func
}
}

Debug.Assert(delay >= TimeSpan.Zero, "The delay cannot be negative.");
martincostello marked this conversation as resolved.
Show resolved Hide resolved

var onRetryArgs = new OnRetryArguments<T>(context, outcome, attempt, delay, executionTime);
_telemetry.Report<OnRetryArguments<T>, T>(new(ResilienceEventSeverity.Warning, RetryConstants.OnRetryEvent), onRetryArgs);

Expand Down
59 changes: 59 additions & 0 deletions test/Polly.Core.Tests/Issues/IssuesTests.InfiniteRetry_2163.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
using Microsoft.Extensions.Time.Testing;
using Polly.Retry;

namespace Polly.Core.Tests.Issues;

public partial class IssuesTests
{
[Fact(Timeout = 15_000)]
public async Task InfiniteRetry_Delay_Does_Not_Overflow_2163()
{
// Arrange
int attempts = 0;
int succeedAfter = 2049;
martincostello marked this conversation as resolved.
Show resolved Hide resolved

var options = new RetryStrategyOptions<bool>
{
BackoffType = DelayBackoffType.Exponential,
Delay = TimeSpan.FromSeconds(2),
MaxDelay = TimeSpan.FromSeconds(30),
MaxRetryAttempts = int.MaxValue,
UseJitter = true,
OnRetry = (args) =>
{
args.RetryDelay.Should().BeGreaterThan(TimeSpan.Zero, $"RetryDelay is less than zero after {args.AttemptNumber} attempts");
attempts++;
return default;
},
ShouldHandle = (args) => new ValueTask<bool>(!args.Outcome.Result),
};

var listener = new FakeTelemetryListener();
var telemetry = TestUtilities.CreateResilienceTelemetry(listener);
var timeProvider = new FakeTimeProvider();

var strategy = new RetryResilienceStrategy<bool>(options, timeProvider, telemetry);
var pipeline = strategy.AsPipeline();

using var cts = new CancellationTokenSource(Debugger.IsAttached ? TimeSpan.MaxValue : TimeSpan.FromSeconds(10));

// Act
var executing = pipeline.ExecuteAsync((_) =>
{
return new ValueTask<bool>(attempts >= succeedAfter);
}, cts.Token);

while (!executing.IsCompleted && !cts.IsCancellationRequested)
{
timeProvider.Advance(TimeSpan.FromSeconds(1));
}

// Assert
cts.Token.ThrowIfCancellationRequested();

var actual = await executing;

actual.Should().BeTrue();
attempts.Should().Be(succeedAfter);
}
}
71 changes: 64 additions & 7 deletions test/Polly.Core.Tests/Retry/RetryHelperTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,24 @@ public class RetryHelperTests
{
private Func<double> _randomizer = new RandomUtil(0).NextDouble;

public static TheoryData<int> Attempts()
{
#pragma warning disable IDE0028
return new()
{
1,
2,
3,
4,
10,
100,
1_000,
1_024,
1_025,
};
#pragma warning restore IDE0028
}

[Fact]
public void IsValidDelay_Ok()
{
Expand Down Expand Up @@ -187,14 +205,51 @@ public void GetRetryDelay_OverflowWithMaxDelay_ReturnsMaxDelay()
RetryHelper.GetRetryDelay(DelayBackoffType.Exponential, false, 1000, TimeSpan.FromDays(1), TimeSpan.FromDays(2), ref state, _randomizer).Should().Be(TimeSpan.FromDays(2));
}

[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
[InlineData(4)]
[InlineData(10)]
[InlineData(100)]
[InlineData(1000)]
[Theory]
[MemberData(nameof(Attempts))]
public void GetRetryDelay_Exponential_Is_Positive_When_No_Maximum_Delay(int attempt)
{
var jitter = true;
var type = DelayBackoffType.Exponential;

var baseDelay = TimeSpan.FromSeconds(2);
TimeSpan? maxDelay = null;

var random = new RandomUtil(0).NextDouble;
double state = 0;

var first = RetryHelper.GetRetryDelay(type, jitter, attempt, baseDelay, maxDelay, ref state, random);
var second = RetryHelper.GetRetryDelay(type, jitter, attempt, baseDelay, maxDelay, ref state, random);

first.Should().BePositive();
second.Should().BePositive();
}

[Theory]
[MemberData(nameof(Attempts))]
public void GetRetryDelay_Exponential_Does_Not_Exceed_MaxDelay(int attempt)
{
var jitter = true;
var type = DelayBackoffType.Exponential;

var baseDelay = TimeSpan.FromSeconds(2);
var maxDelay = TimeSpan.FromSeconds(30);

var random = new RandomUtil(0).NextDouble;
double state = 0;

var first = RetryHelper.GetRetryDelay(type, jitter, attempt, baseDelay, maxDelay, ref state, random);
var second = RetryHelper.GetRetryDelay(type, jitter, attempt, baseDelay, maxDelay, ref state, random);

first.Should().BePositive();
first.Should().BeLessThanOrEqualTo(maxDelay);

second.Should().BePositive();
second.Should().BeLessThanOrEqualTo(maxDelay);
}

[Theory]
[MemberData(nameof(Attempts))]
public void ExponentialWithJitter_Ok(int count)
{
var delay = TimeSpan.FromSeconds(7.8);
Expand All @@ -203,6 +258,7 @@ public void ExponentialWithJitter_Ok(int count)

newDelays.Should().ContainInConsecutiveOrder(oldDelays);
newDelays.Should().HaveCount(oldDelays.Count);
newDelays.Should().AllSatisfy(delay => delay.Should().BePositive());
}

[Fact]
Expand All @@ -213,6 +269,7 @@ public void ExponentialWithJitter_EnsureRandomness()
var delays2 = GetExponentialWithJitterBackoff(false, delay, 100, RandomUtil.Instance.NextDouble);

delays1.SequenceEqual(delays2).Should().BeFalse();
delays1.Should().AllSatisfy(delay => delay.Should().BePositive());
}

private static IReadOnlyList<TimeSpan> GetExponentialWithJitterBackoff(bool contrib, TimeSpan baseDelay, int retryCount, Func<double>? randomizer = null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ public void ArgValidation_Ok()
pool.Get(System.Threading.Timeout.InfiniteTimeSpan).Should().NotBeNull();
}

#pragma warning disable xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[MemberData(nameof(TimeProviders))]
#pragma warning restore xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[Theory]
public void RentReturn_Reusable_EnsureProperBehavior(object timeProvider)
{
Expand All @@ -48,7 +50,9 @@ public void RentReturn_Reusable_EnsureProperBehavior(object timeProvider)
#endif
}

#pragma warning disable xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[MemberData(nameof(TimeProviders))]
#pragma warning restore xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[Theory]
public void RentReturn_NotReusable_EnsureProperBehavior(object timeProvider)
{
Expand All @@ -63,7 +67,9 @@ public void RentReturn_NotReusable_EnsureProperBehavior(object timeProvider)
cts2.Token.Should().NotBeNull();
}

#pragma warning disable xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[MemberData(nameof(TimeProviders))]
#pragma warning restore xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[Theory]
public async Task Rent_Cancellable_EnsureCancelled(object timeProvider)
{
Expand All @@ -80,7 +86,9 @@ public async Task Rent_Cancellable_EnsureCancelled(object timeProvider)
await TestUtilities.AssertWithTimeoutAsync(() => cts.IsCancellationRequested.Should().BeTrue());
}

#pragma warning disable xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[MemberData(nameof(TimeProviders))]
#pragma warning restore xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[Theory]
public async Task Rent_NotCancellable_EnsureNotCancelled(object timeProvider)
{
Expand Down