Skip to content

Commit

Permalink
fix: add timeout for health checks (#1388)
Browse files Browse the repository at this point in the history
<!--- Provide a general summary of your changes in the Title above -->

## Description

- Adds a timeout of 40 sec which will then be reported as unhealthy
- Adds a timeout of 5 sec which is resolved to degraded

<!--- Describe your changes in detail -->

## Related Issue(s)

- #{issue number}

## Verification

- [ ] **Your** code builds clean without any errors or warnings
- [ ] Manual testing done (required)
- [ ] Relevant automated test added (if you find this hard, leave it and
we'll help out)

## Documentation

- [ ] Documentation is updated (either in `docs`-directory, Altinnpedia
or a separate linked PR in
[altinn-studio-docs.](https://github.com/Altinn/altinn-studio-docs), if
applicable)


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Enhanced health check functionality for Redis connections with logging
capabilities and degradation status for slow responses.
- Improved HTTP request health checks with timeout mechanisms, including
detailed logging for slow responses.

- **Bug Fixes**
- Updated error handling for connection timeouts and exceptions to
provide clearer status messages and improved logging.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: Ole Jørgen Skogstad <skogstad@softis.net>
  • Loading branch information
arealmaas and oskogstad authored Nov 8, 2024
1 parent bb832d8 commit d68cc65
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -1,34 +1,65 @@
using System.Diagnostics;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using StackExchange.Redis;
using Microsoft.Extensions.Options;
using Microsoft.Extensions.Logging;

namespace Digdir.Domain.Dialogporten.Infrastructure.HealthChecks;

internal sealed class RedisHealthCheck : IHealthCheck
{
private readonly InfrastructureSettings _settings;
private readonly ILogger<RedisHealthCheck> _logger;
private const int DegradationThresholdInSeconds = 5;

public RedisHealthCheck(IOptions<InfrastructureSettings> options)
public RedisHealthCheck(IOptions<InfrastructureSettings> options, ILogger<RedisHealthCheck> logger)
{
_settings = options?.Value ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}

public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
{
var startTime = Stopwatch.GetTimestamp();
try
{
using var redis = await ConnectionMultiplexer.ConnectAsync(_settings.Redis.ConnectionString);
const int timeout = 15_000;

var options = ConfigurationOptions.Parse(_settings.Redis.ConnectionString);

options.AsyncTimeout = timeout;
options.ConnectTimeout = timeout;
options.SyncTimeout = timeout;

await using var redis = await ConnectionMultiplexer.ConnectAsync(options);
var db = redis.GetDatabase();
await db.PingAsync();

var responseTime = Stopwatch.GetElapsedTime(startTime);

if (responseTime > TimeSpan.FromSeconds(DegradationThresholdInSeconds))
{
_logger.LogWarning("Redis connection is slow ({Elapsed:N1}s).", responseTime.TotalSeconds);
return HealthCheckResult.Degraded($"Redis connection is slow ({responseTime.TotalSeconds:N1}s).");
}

return HealthCheckResult.Healthy("Redis connection is healthy.");
}
catch (RedisTimeoutException ex)
{
var responseTime = Stopwatch.GetElapsedTime(startTime);
_logger.LogWarning("Redis connection timed out ({Elapsed:N1}s).", responseTime.TotalSeconds);
return HealthCheckResult.Unhealthy($"Redis connection timed out after {responseTime.TotalSeconds:N1}s.", exception: ex);
}
catch (RedisConnectionException ex)
{
_logger.LogWarning(ex, "Unable to connect to Redis.");
return HealthCheckResult.Unhealthy("Unable to connect to Redis.", exception: ex);
}
catch (Exception ex)
{
return HealthCheckResult.Unhealthy("An unexpected error occurred while checking Redis health.", exception: ex);
_logger.LogError(ex, "An unexpected error occurred while checking Redis' health.");
return HealthCheckResult.Unhealthy("An unexpected error occurred while checking Redis' health.", exception: ex);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using System.Collections.Concurrent;
using System.Diagnostics;

namespace Digdir.Library.Utils.AspNet.HealthChecks;

Expand All @@ -10,6 +11,8 @@ internal sealed class EndpointsHealthCheck : IHealthCheck
private readonly IHttpClientFactory _httpClientFactory;
private readonly ILogger<EndpointsHealthCheck> _logger;
private readonly List<string> _endpoints;
private const int DegradationThresholdInSeconds = 5;
private const int TimeoutInSeconds = 40;

public EndpointsHealthCheck(
IHttpClientFactory httpClientFactory,
Expand All @@ -32,12 +35,28 @@ public async Task<HealthCheckResult> CheckHealthAsync(
{
try
{
var response = await client.GetAsync(url, cancellationToken);
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
cts.CancelAfter(TimeSpan.FromSeconds(TimeoutInSeconds));

var startTime = Stopwatch.GetTimestamp();
var response = await client.GetAsync(url, cts.Token);
var responseTime = Stopwatch.GetElapsedTime(startTime);

if (!response.IsSuccessStatusCode)
{
_logger.LogWarning("Health check failed for endpoint: {Url}. Status Code: {StatusCode}", url, response.StatusCode);
unhealthyEndpoints.Add($"{url} (Status Code: {response.StatusCode})");
}
else if (responseTime > TimeSpan.FromSeconds(DegradationThresholdInSeconds))
{
_logger.LogWarning("Health check response was slow for endpoint: {Url}. Elapsed time: {Elapsed:N1}s", url, responseTime.TotalSeconds);
unhealthyEndpoints.Add($"{url} (Degraded - Response time: {responseTime.TotalSeconds:N1}s)");
}
}
catch (OperationCanceledException)
{
_logger.LogWarning("Health check timed out for endpoint: {Url}", url);
unhealthyEndpoints.Add($"{url} (Timeout after {TimeoutInSeconds}s)");
}
catch (Exception ex)
{
Expand Down

0 comments on commit d68cc65

Please sign in to comment.