From 86318b6bc8b2b8cfa38a9d48f365bcfb8e22c647 Mon Sep 17 00:00:00 2001 From: JunYang Date: Wed, 2 Aug 2023 12:11:29 +0800 Subject: [PATCH] [release-0.6] cherrypick #1300 kernel bug lead to incorrect cpu usage --- pkg/storage/types.go | 3 +++ pkg/storage/types_test.go | 57 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/pkg/storage/types.go b/pkg/storage/types.go index 7868f2393..445281de2 100644 --- a/pkg/storage/types.go +++ b/pkg/storage/types.go @@ -51,6 +51,9 @@ type MetricsPoint struct { } func resourceUsage(last, prev MetricsPoint) (corev1.ResourceList, api.TimeInfo, error) { + if last.StartTime.Before(prev.StartTime) { + return corev1.ResourceList{}, api.TimeInfo{}, fmt.Errorf("unexpected decrease in startTime of node/container") + } if last.CumulativeCpuUsed < prev.CumulativeCpuUsed { return corev1.ResourceList{}, api.TimeInfo{}, fmt.Errorf("unexpected decrease in cumulative CPU usage value") } diff --git a/pkg/storage/types_test.go b/pkg/storage/types_test.go index be2d14bcd..0b2f521dc 100644 --- a/pkg/storage/types_test.go +++ b/pkg/storage/types_test.go @@ -16,8 +16,13 @@ package storage import ( "math" + "reflect" "testing" + "time" + "sigs.k8s.io/metrics-server/pkg/api" + + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" ) @@ -41,3 +46,55 @@ func TestUint64Quantity(t *testing.T) { }) } } + +func Test_resourceUsage(t *testing.T) { + start := time.Now() + tcs := []struct { + name string + last MetricsPoint + prev MetricsPoint + wantResourceList v1.ResourceList + wantTimeInfo api.TimeInfo + wantErr bool + }{ + { + name: "get resource usage successfully", + last: newMetricsPoint(start, start.Add(20*time.Millisecond), 500, 600), + prev: newMetricsPoint(start, start.Add(10*time.Millisecond), 300, 400), + wantResourceList: v1.ResourceList{v1.ResourceCPU: uint64Quantity(uint64(20000), resource.DecimalSI, -9), + v1.ResourceMemory: uint64Quantity(600, resource.BinarySI, 0)}, + wantTimeInfo: api.TimeInfo{Timestamp: start.Add(20 * time.Millisecond), Window: 10 * time.Millisecond}, + }, + { + name: "get resource usage failed because of unexpected decrease in startTime", + last: newMetricsPoint(start, start.Add(20*time.Millisecond), 500, 600), + prev: newMetricsPoint(start.Add(20*time.Millisecond), start.Add(10*time.Millisecond), 300, 400), + wantResourceList: v1.ResourceList{}, + wantTimeInfo: api.TimeInfo{}, + wantErr: true, + }, + { + name: "get resource usage failed because of unexpected decrease in cumulative CPU usage value", + last: newMetricsPoint(start, start.Add(20*time.Millisecond), 100, 600), + prev: newMetricsPoint(start, start.Add(10*time.Millisecond), 300, 400), + wantResourceList: v1.ResourceList{}, + wantTimeInfo: api.TimeInfo{}, + wantErr: true, + }, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + resourceList, timeInfo, err := resourceUsage(tc.last, tc.prev) + if (err != nil) != tc.wantErr { + t.Errorf("resourceUsage() error = %v, wantErr %v", err, tc.wantErr) + return + } + if !reflect.DeepEqual(resourceList, tc.wantResourceList) { + t.Errorf("resourceUsage() resourceList = %v, want %v", resourceList, tc.wantResourceList) + } + if !reflect.DeepEqual(timeInfo, tc.wantTimeInfo) { + t.Errorf("resourceUsage() timeInfo = %v, want %v", timeInfo, tc.wantTimeInfo) + } + }) + } +}