Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ResourceUtilizationHealthCheck - Report both CPU and Memory issues #5407

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Diagnostics.ResourceMonitoring;
Expand All @@ -14,7 +15,6 @@ namespace Microsoft.Extensions.Diagnostics.HealthChecks;
/// </summary>
internal sealed class ResourceUtilizationHealthCheck : IHealthCheck
{
private static readonly Task<HealthCheckResult> _healthy = Task.FromResult(HealthCheckResult.Healthy());
private readonly ResourceUtilizationHealthCheckOptions _options;
private readonly IResourceMonitor _dataTracker;

Expand All @@ -39,26 +39,58 @@ public ResourceUtilizationHealthCheck(IOptions<ResourceUtilizationHealthCheckOpt
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
{
var utilization = _dataTracker.GetUtilization(_options.SamplingWindow);
if (utilization.CpuUsedPercentage > _options.CpuThresholds.UnhealthyUtilizationPercentage)
IReadOnlyDictionary<string, object> data = new Dictionary<string, object>
{
return Task.FromResult(HealthCheckResult.Unhealthy("CPU usage is above the limit"));
}
{ nameof(utilization.CpuUsedPercentage), utilization.CpuUsedPercentage },
{ nameof(utilization.MemoryUsedPercentage), utilization.MemoryUsedPercentage },
};

if (utilization.MemoryUsedPercentage > _options.MemoryThresholds.UnhealthyUtilizationPercentage)
{
return Task.FromResult(HealthCheckResult.Unhealthy("Memory usage is above the limit"));
}
bool cpuUnhealthy = utilization.CpuUsedPercentage > _options.CpuThresholds.UnhealthyUtilizationPercentage;
bool memoryUnhealthy = utilization.MemoryUsedPercentage > _options.MemoryThresholds.UnhealthyUtilizationPercentage;

if (utilization.CpuUsedPercentage > _options.CpuThresholds.DegradedUtilizationPercentage)
if (cpuUnhealthy || memoryUnhealthy)
{
return Task.FromResult(HealthCheckResult.Degraded("CPU usage is close to the limit"));
string message = string.Empty;
if (cpuUnhealthy && memoryUnhealthy)
{
message = "CPU and Memory";
RussKie marked this conversation as resolved.
Show resolved Hide resolved
}
else if (cpuUnhealthy)
{
message = "CPU";
}
else
{
message = "Memory";
}

message += " usage is above the limit";
RussKie marked this conversation as resolved.
Show resolved Hide resolved
return Task.FromResult(HealthCheckResult.Unhealthy(message, default, data));
}

if (utilization.MemoryUsedPercentage > _options.MemoryThresholds.DegradedUtilizationPercentage)
bool cpuDegraded = utilization.CpuUsedPercentage > _options.CpuThresholds.DegradedUtilizationPercentage;
bool memoryDegraded = utilization.MemoryUsedPercentage > _options.MemoryThresholds.DegradedUtilizationPercentage;

if (cpuDegraded || memoryDegraded)
{
return Task.FromResult(HealthCheckResult.Degraded("Memory usage is close to the limit"));
string message = string.Empty;
if (cpuDegraded && memoryDegraded)
{
message = "CPU and Memory";
}
else if (cpuDegraded)
{
message = "CPU";
}
else
{
message = "Memory";
}

message += " usage is close to the limit";
return Task.FromResult(HealthCheckResult.Degraded(message, default, data));
}

return _healthy;
return Task.FromResult(HealthCheckResult.Healthy(default, data));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public class ResourceHealthCheckTests
0UL,
1000UL,
new ResourceUsageThresholds(),
new ResourceUsageThresholds(),
"",
},
new object[]
Expand All @@ -32,6 +33,7 @@ public class ResourceHealthCheckTests
0UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.2 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.2 },
""
},
new object[]
Expand All @@ -41,6 +43,7 @@ public class ResourceHealthCheckTests
2UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.2 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.2 },
""
},
new object[]
Expand All @@ -50,7 +53,8 @@ public class ResourceHealthCheckTests
3UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
" usage is close to the limit"
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
"CPU and Memory usage is close to the limit"
},
new object[]
{
Expand All @@ -59,7 +63,8 @@ public class ResourceHealthCheckTests
5UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
" usage is above the limit"
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
"CPU and Memory usage is above the limit"
},
new object[]
{
Expand All @@ -68,7 +73,8 @@ public class ResourceHealthCheckTests
5UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.4, UnhealthyUtilizationPercentage = 0.2 },
" usage is above the limit"
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.4, UnhealthyUtilizationPercentage = 0.2 },
"CPU and Memory usage is above the limit"
},
new object[]
{
Expand All @@ -77,7 +83,8 @@ public class ResourceHealthCheckTests
3UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2 },
" usage is close to the limit"
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2 },
"CPU and Memory usage is close to the limit"
},
new object[]
{
Expand All @@ -86,67 +93,79 @@ public class ResourceHealthCheckTests
5UL,
1000UL,
new ResourceUsageThresholds { UnhealthyUtilizationPercentage = 0.4 },
" usage is above the limit"
new ResourceUsageThresholds { UnhealthyUtilizationPercentage = 0.4 },
"CPU and Memory usage is above the limit"
},
new object[]
{
HealthStatus.Degraded,
0.3,
3UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.9, UnhealthyUtilizationPercentage = 0.9 },
"CPU usage is close to the limit"
},
new object[]
{
HealthStatus.Degraded,
0.1,
3UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.9, UnhealthyUtilizationPercentage = 0.9 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
"Memory usage is close to the limit"
},
new object[]
{
HealthStatus.Unhealthy,
0.5,
5UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.9, UnhealthyUtilizationPercentage = 0.9 },
"CPU usage is above the limit"
},
new object[]
{
HealthStatus.Unhealthy,
0.1,
5UL,
1000UL,
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.9, UnhealthyUtilizationPercentage = 0.9 },
new ResourceUsageThresholds { DegradedUtilizationPercentage = 0.2, UnhealthyUtilizationPercentage = 0.4 },
"Memory usage is above the limit"
},
};

[Theory]
[MemberData(nameof(Data))]
#pragma warning disable xUnit1026 // Theory methods should use all of their parameters
public async Task TestCpuChecks(HealthStatus expected, double utilization, ulong _, ulong totalMemory, ResourceUsageThresholds thresholds, string expectedDescription)
#pragma warning restore xUnit1026 // Theory methods should use all of their parameters
{
var systemResources = new SystemResources(1.0, 1.0, totalMemory, totalMemory);
var dataTracker = new Mock<IResourceMonitor>();
var samplingWindow = TimeSpan.FromSeconds(1);
dataTracker
.Setup(tracker => tracker.GetUtilization(samplingWindow))
.Returns(new ResourceUtilization(cpuUsedPercentage: utilization, memoryUsedInBytes: 0, systemResources));

var checkContext = new HealthCheckContext();
var cpuCheckOptions = new ResourceUtilizationHealthCheckOptions
{
CpuThresholds = thresholds,
SamplingWindow = samplingWindow
};

var options = Microsoft.Extensions.Options.Options.Create(cpuCheckOptions);
var healthCheck = new ResourceUtilizationHealthCheck(options, dataTracker.Object);
var healthCheckResult = await healthCheck.CheckHealthAsync(checkContext);
Assert.Equal(expected, healthCheckResult.Status);
if (healthCheckResult.Status != HealthStatus.Healthy)
{
Assert.Equal("CPU" + expectedDescription, healthCheckResult.Description);
}
}

[Theory]
[MemberData(nameof(Data))]
#pragma warning disable xUnit1026 // Theory methods should use all of their parameters
public async Task TestMemoryChecks(HealthStatus expected, double _, ulong memoryUsed, ulong totalMemory, ResourceUsageThresholds thresholds, string expectedDescription)
#pragma warning restore xUnit1026 // Theory methods should use all of their parameters
public async Task TestCpuAndMemoryChecks(HealthStatus expected, double utilization, ulong memoryUsed, ulong totalMemory,
ResourceUsageThresholds cpuThresholds, ResourceUsageThresholds memoryThresholds, string expectedDescription)
{
var systemResources = new SystemResources(1.0, 1.0, totalMemory, totalMemory);
var dataTracker = new Mock<IResourceMonitor>();
var samplingWindow = TimeSpan.FromSeconds(1);
dataTracker
.Setup(tracker => tracker.GetUtilization(samplingWindow))
.Returns(new ResourceUtilization(cpuUsedPercentage: 0, memoryUsedInBytes: memoryUsed, systemResources));
.Returns(new ResourceUtilization(cpuUsedPercentage: utilization, memoryUsedInBytes: memoryUsed, systemResources));

var checkContext = new HealthCheckContext();
var memCheckOptions = new ResourceUtilizationHealthCheckOptions
var checkOptions = new ResourceUtilizationHealthCheckOptions
{
MemoryThresholds = thresholds,
CpuThresholds = cpuThresholds,
MemoryThresholds = memoryThresholds,
SamplingWindow = samplingWindow
};

var options = Microsoft.Extensions.Options.Options.Create(memCheckOptions);
var options = Microsoft.Extensions.Options.Options.Create(checkOptions);
var healthCheck = new ResourceUtilizationHealthCheck(options, dataTracker.Object);
var healthCheckResult = await healthCheck.CheckHealthAsync(checkContext);
Assert.Equal(expected, healthCheckResult.Status);
Assert.NotEmpty(healthCheckResult.Data);
if (healthCheckResult.Status != HealthStatus.Healthy)
{
Assert.Equal("Memory" + expectedDescription, healthCheckResult.Description);
Assert.Equal(expectedDescription, healthCheckResult.Description);
}
}

Expand Down