From 36e623a93f0ab6fe66c32693986c0e80a6fdd578 Mon Sep 17 00:00:00 2001 From: andyzhangx Date: Mon, 25 Mar 2024 13:01:13 +0000 Subject: [PATCH] fix: refine check disk lun collsion logic --- pkg/azuredisk/azuredisk.go | 14 +++++++++----- pkg/azuredisk/controllerserver.go | 8 ++++++-- pkg/azuredisk/fake_azuredisk.go | 1 + 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pkg/azuredisk/azuredisk.go b/pkg/azuredisk/azuredisk.go index aec0025a01..d138ab12d5 100644 --- a/pkg/azuredisk/azuredisk.go +++ b/pkg/azuredisk/azuredisk.go @@ -114,6 +114,8 @@ type Driver struct { volumeLocks *volumehelper.VolumeLocks // a timed cache for throttling throttlingCache azcache.Resource + // a timed cache for disk lun collision check throttling + checkDiskLunThrottlingCache azcache.Resource } // newDriverV1 Creates a NewCSIDriver object. Assumes vendor version is equal to driver version & @@ -160,13 +162,15 @@ func newDriverV1(options *DriverOptions) *Driver { } topologyKey = fmt.Sprintf("topology.%s/zone", driver.Name) - cache, err := azcache.NewTimedCache(5*time.Minute, func(key string) (interface{}, error) { - return nil, nil - }, false) - if err != nil { + getter := func(key string) (interface{}, error) { return nil, nil } + var err error + if driver.throttlingCache, err = azcache.NewTimedCache(5*time.Minute, getter, false); err != nil { klog.Fatalf("%v", err) } - driver.throttlingCache = cache + if driver.checkDiskLunThrottlingCache, err = azcache.NewTimedCache(30*time.Minute, getter, false); err != nil { + klog.Fatalf("%v", err) + } + userAgent := GetUserAgent(driver.Name, driver.customUserAgent, driver.userAgentSuffix) klog.V(2).Infof("driver userAgent: %s", userAgent) diff --git a/pkg/azuredisk/controllerserver.go b/pkg/azuredisk/controllerserver.go index 2097364bb1..9d1498023d 100644 --- a/pkg/azuredisk/controllerserver.go +++ b/pkg/azuredisk/controllerserver.go @@ -592,6 +592,10 @@ func (d *Driver) ValidateVolumeCapabilities(ctx context.Context, req *csi.Valida func (d *Driver) getOccupiedLunsFromNode(ctx context.Context, nodeName types.NodeName, diskURI string) []int { var occupiedLuns []int if d.checkDiskLUNCollision && !d.isCheckDiskLunThrottled() { + timer := time.AfterFunc(checkDiskLunThrottleLatency, func() { + klog.Warningf("checkDiskLun(%s) on node %s took longer than %v, disable disk lun check temporarily", diskURI, nodeName, checkDiskLunThrottleLatency) + d.checkDiskLunThrottlingCache.Set(consts.CheckDiskLunThrottlingKey, "") + }) now := time.Now() if usedLunsFromVA, err := d.getUsedLunsFromVolumeAttachments(ctx, string(nodeName)); err == nil { if len(usedLunsFromVA) > 0 { @@ -611,9 +615,9 @@ func (d *Driver) getOccupiedLunsFromNode(ctx context.Context, nodeName types.Nod } latency := time.Since(now) if latency > checkDiskLunThrottleLatency { - klog.Warningf("checkDiskLun(%s) on node %s took %v (limit: %v), disable disk lun check temporarily", diskURI, nodeName, latency, checkDiskLunThrottleLatency) - d.throttlingCache.Set(consts.CheckDiskLunThrottlingKey, "") + klog.Warningf("checkDiskLun(%s) on node %s took %v (limit: %v)", diskURI, nodeName, latency, checkDiskLunThrottleLatency) } else { + timer.Stop() // cancel the timer klog.V(6).Infof("checkDiskLun(%s) on node %s took %v", diskURI, nodeName, latency) } } diff --git a/pkg/azuredisk/fake_azuredisk.go b/pkg/azuredisk/fake_azuredisk.go index c499504cb4..fa88806f9f 100644 --- a/pkg/azuredisk/fake_azuredisk.go +++ b/pkg/azuredisk/fake_azuredisk.go @@ -139,6 +139,7 @@ func newFakeDriverV1(ctrl *gomock.Controller) (*fakeDriverV1, error) { return nil, err } driver.throttlingCache = cache + driver.checkDiskLunThrottlingCache = cache driver.deviceHelper = mockoptimization.NewMockInterface(ctrl) driver.AddControllerServiceCapabilities(