From 6eaa0c3e75210f0ebb57dc327dfb6be36d50cd20 Mon Sep 17 00:00:00 2001 From: Miranda Craghead Date: Wed, 16 Mar 2022 19:01:57 +0000 Subject: [PATCH 1/2] Merged PR 1379: added retry for creatingAzureManager in case of throttled requests added retry for forceRefresh in case of throttled requests ran tests MallocNanoZone=0 go test -race k8s.io/autoscaler/cluster-autoscaler/cloudprovider/azure -- passed and commented out unit test -- commented out as it takes 10 minutes to complete func TestCreateAzureManagerWithRetryError(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockVMClient := mockvmclient.NewMockInterface(ctrl) mockVMSSClient := mockvmssclient.NewMockInterface(ctrl) mockVMSSClient.EXPECT().List(gomock.Any(), "fakeId").Return([]compute.VirtualMachineScaleSet{}, retry.NewError(true, errors.New("test"))).AnyTimes() mockAzClient := &azClient{ virtualMachinesClient: mockVMClient, virtualMachineScaleSetsClient: mockVMSSClient, } manager, err := createAzureManagerInternal(strings.NewReader(validAzureCfg), cloudprovider.NodeGroupDiscoveryOptions{}, config.AutoscalingOptions{}, mockAzClient) assert.Nil(t, manager) assert.NotNil(t, err) } --- .../cloudprovider/azure/azure_manager.go | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/cluster-autoscaler/cloudprovider/azure/azure_manager.go b/cluster-autoscaler/cloudprovider/azure/azure_manager.go index f2fc79b1ca9c..77fe0bea019c 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_manager.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_manager.go @@ -26,10 +26,13 @@ import ( "time" "github.com/Azure/go-autorest/autorest/azure" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" + kretry "k8s.io/client-go/util/retry" klog "k8s.io/klog/v2" + "sigs.k8s.io/cloud-provider-azure/pkg/retry" ) const ( @@ -106,8 +109,22 @@ func createAzureManagerInternal(configReader io.Reader, discoveryOpts cloudprovi return nil, err } + retryBackoff := wait.Backoff{ + Duration: 2 * time.Minute, + Factor: 1.0, + Jitter: 0.1, + Steps: 6, + Cap: 10 * time.Minute, + } + if err := manager.forceRefresh(); err != nil { - return nil, err + err = kretry.OnError(retryBackoff, retry.IsErrorRetriable, func() (err error) { + return manager.forceRefresh() + }) + if err != nil { + return nil, err + } + return manager, nil } return manager, nil From 0d5a71e867c0c3f9b8207b582b16cbcd0745bf08 Mon Sep 17 00:00:00 2001 From: prachigandhi Date: Thu, 28 Mar 2024 08:52:50 -0700 Subject: [PATCH 2/2] review comments - simplify retry logic --- .../cloudprovider/azure/azure_manager.go | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/azure/azure_manager.go b/cluster-autoscaler/cloudprovider/azure/azure_manager.go index 77fe0bea019c..daa449f3cf09 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_manager.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_manager.go @@ -117,14 +117,11 @@ func createAzureManagerInternal(configReader io.Reader, discoveryOpts cloudprovi Cap: 10 * time.Minute, } - if err := manager.forceRefresh(); err != nil { - err = kretry.OnError(retryBackoff, retry.IsErrorRetriable, func() (err error) { - return manager.forceRefresh() - }) - if err != nil { - return nil, err - } - return manager, nil + err = kretry.OnError(retryBackoff, retry.IsErrorRetriable, func() (err error) { + return manager.forceRefresh() + }) + if err != nil { + return nil, err } return manager, nil