diff --git a/cluster-autoscaler/cloudprovider/azure/azure_cache.go b/cluster-autoscaler/cloudprovider/azure/azure_cache.go index bb68567e8f40..709ed5209b43 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_cache.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_cache.go @@ -18,12 +18,14 @@ package azure import ( "context" + "fmt" "reflect" "regexp" "strings" "sync" "time" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute" "github.com/Azure/go-autorest/autorest/to" "github.com/Azure/skewer" @@ -66,12 +68,17 @@ type azureCache struct { // Cache content. // resourceGroup specifies the name of the resource group that this cache tracks - resourceGroup string + resourceGroup string + clusterResourceGroup string + clusterName string + + // enableVMsAgentPool specifies whether VMs agent pool type is supported. + enableVMsAgentPool bool // vmType can be one of vmTypeVMSS (default), vmTypeStandard vmType string - vmsPoolSet map[string]struct{} // track the nodepools that're vms pool + vmsPoolMap map[string]armcontainerservice.AgentPool // track the nodepools that're vms pool // scaleSets keeps the set of all known scalesets in the resource group, populated/refreshed via VMSS.List() call. // It is only used/populated if vmType is vmTypeVMSS (default). @@ -104,8 +111,11 @@ func newAzureCache(client *azClient, cacheTTL time.Duration, config Config) (*az azClient: client, refreshInterval: cacheTTL, resourceGroup: config.ResourceGroup, + clusterResourceGroup: config.ClusterResourceGroup, + clusterName: config.ClusterName, + enableVMsAgentPool: config.EnableVMsAgentPool, vmType: config.VMType, - vmsPoolSet: make(map[string]struct{}), + vmsPoolMap: make(map[string]armcontainerservice.AgentPool), scaleSets: make(map[string]compute.VirtualMachineScaleSet), virtualMachines: make(map[string][]compute.VirtualMachine), registeredNodeGroups: make([]cloudprovider.NodeGroup, 0), @@ -126,11 +136,11 @@ func newAzureCache(client *azClient, cacheTTL time.Duration, config Config) (*az return cache, nil } -func (m *azureCache) getVMsPoolSet() map[string]struct{} { +func (m *azureCache) getVMsPoolMap() map[string]armcontainerservice.AgentPool { m.mutex.Lock() defer m.mutex.Unlock() - return m.vmsPoolSet + return m.vmsPoolMap } func (m *azureCache) getVirtualMachines() map[string][]compute.VirtualMachine { @@ -226,13 +236,19 @@ func (m *azureCache) fetchAzureResources() error { return err } m.scaleSets = vmssResult - vmResult, vmsPoolSet, err := m.fetchVirtualMachines() + // we fetch both sets of resources since CAS may operate on mixed nodepools + vmResult, err := m.fetchVirtualMachines() if err != nil { return err } - // we fetch both sets of resources since CAS may operate on mixed nodepools m.virtualMachines = vmResult - m.vmsPoolSet = vmsPoolSet + if m.enableVMsAgentPool { + vmsPoolMap, err := m.fetchVMsPools() + if err != nil { + return err + } + m.vmsPoolMap = vmsPoolMap + } return nil } @@ -245,19 +261,17 @@ const ( ) // fetchVirtualMachines returns the updated list of virtual machines in the config resource group using the Azure API. -func (m *azureCache) fetchVirtualMachines() (map[string][]compute.VirtualMachine, map[string]struct{}, error) { +func (m *azureCache) fetchVirtualMachines() (map[string][]compute.VirtualMachine, error) { ctx, cancel := getContextWithCancel() defer cancel() result, err := m.azClient.virtualMachinesClient.List(ctx, m.resourceGroup) if err != nil { klog.Errorf("VirtualMachinesClient.List in resource group %q failed: %v", m.resourceGroup, err) - return nil, nil, err.Error() + return nil, err.Error() } instances := make(map[string][]compute.VirtualMachine) - // track the nodepools that're vms pools - vmsPoolSet := make(map[string]struct{}) for _, instance := range result { if instance.Tags == nil { continue @@ -274,20 +288,8 @@ func (m *azureCache) fetchVirtualMachines() (map[string][]compute.VirtualMachine } instances[to.String(vmPoolName)] = append(instances[to.String(vmPoolName)], instance) - - // if the nodepool is already in the map, skip it - if _, ok := vmsPoolSet[to.String(vmPoolName)]; ok { - continue - } - - // nodes from vms pool will have tag "aks-managed-agentpool-type" set to "VirtualMachines" - if agentpoolType := tags[agentpoolTypeTag]; agentpoolType != nil { - if strings.EqualFold(to.String(agentpoolType), vmsPoolType) { - vmsPoolSet[to.String(vmPoolName)] = struct{}{} - } - } } - return instances, vmsPoolSet, nil + return instances, nil } // fetchScaleSets returns the updated list of scale sets in the config resource group using the Azure API. @@ -308,6 +310,39 @@ func (m *azureCache) fetchScaleSets() (map[string]compute.VirtualMachineScaleSet return sets, nil } +// fetchVMsPools returns the a set of VMs pools in the cluster +func (m *azureCache) fetchVMsPools() (map[string]armcontainerservice.AgentPool, error) { + ctx, cancel := getContextWithTimeout(vmsListRequestContextTimeout) + defer cancel() + + if m.azClient.agentPoolClient == nil { + return nil, fmt.Errorf("agentPoolClient is nil") + } + + vmsPoolMap := make(map[string]armcontainerservice.AgentPool) + + pager := m.azClient.agentPoolClient.NewListPager(m.clusterResourceGroup, m.clusterName, nil) + var aps []*armcontainerservice.AgentPool + for pager.More() { + resp, err := pager.NextPage(ctx) + if err != nil { + klog.Errorf("agentPoolClient.pager.NextPage in cluster %s resource group %s failed: %v", + m.clusterName, m.clusterResourceGroup, err) + return nil, err + } + aps = append(aps, resp.Value...) + } + + for _, ap := range aps { + if ap != nil && ap.Name != nil && ap.Properties != nil && ap.Properties.Type != nil && + *ap.Properties.Type == armcontainerservice.AgentPoolTypeVirtualMachines { + vmsPoolMap[*ap.Name] = *ap + } + } + + return vmsPoolMap, nil +} + // Register registers a node group if it hasn't been registered. func (m *azureCache) Register(nodeGroup cloudprovider.NodeGroup) bool { m.mutex.Lock() @@ -417,7 +452,7 @@ func (m *azureCache) HasInstance(providerID string) (bool, error) { // FindForInstance returns node group of the given Instance func (m *azureCache) FindForInstance(instance *azureRef, vmType string) (cloudprovider.NodeGroup, error) { - vmsPoolSet := m.getVMsPoolSet() + vmsPoolMap := m.getVMsPoolMap() m.mutex.Lock() defer m.mutex.Unlock() @@ -436,7 +471,7 @@ func (m *azureCache) FindForInstance(instance *azureRef, vmType string) (cloudpr } // cluster with vmss pool only - if vmType == vmTypeVMSS && len(vmsPoolSet) == 0 { + if vmType == vmTypeVMSS && len(vmsPoolMap) == 0 { if m.areAllScaleSetsUniform() { // Omit virtual machines not managed by vmss only in case of uniform scale set. if ok := virtualMachineRE.Match([]byte(inst.Name)); ok { diff --git a/cluster-autoscaler/cloudprovider/azure/azure_cache_test.go b/cluster-autoscaler/cloudprovider/azure/azure_cache_test.go index 2b87ab938486..8ca1fcce1154 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_cache_test.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_cache_test.go @@ -19,8 +19,10 @@ package azure import ( "testing" + "go.uber.org/mock/gomock" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5" "github.com/stretchr/testify/assert" ) @@ -70,3 +72,35 @@ func TestFindForInstance(t *testing.T) { assert.NoError(t, err) assert.True(t, ac.unownedInstances[inst]) } + +func TestFetchVMsPools(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + provider := newTestProvider(t) + ac := provider.azureManager.azureCache + mockAgentpoolclient := NewMockAgentPoolsClient(ctrl) + ac.azClient.agentPoolClient = mockAgentpoolclient + + vmsPoolName := "vmspool1" + vmsPool := getTestVMsAgentPool(vmsPoolName, false) + vmssPoolName := "vmsspool1" + vmssPoolType := armcontainerservice.AgentPoolTypeVirtualMachineScaleSets + vmssPool := armcontainerservice.AgentPool{ + Name: &vmssPoolName, + Properties: &armcontainerservice.ManagedClusterAgentPoolProfileProperties{ + Type: &vmssPoolType, + }, + } + invalidPool := armcontainerservice.AgentPool{} + fakeAPListPager := getFakeAgentpoolListPager(&vmsPool, &vmssPool, &invalidPool) + mockAgentpoolclient.EXPECT().NewListPager(gomock.Any(), gomock.Any(), nil). + Return(fakeAPListPager) + + vmsPoolMap, err := ac.fetchVMsPools() + assert.NoError(t, err) + assert.Equal(t, 1, len(vmsPoolMap)) + + _, ok := vmsPoolMap[vmsPoolName] + assert.True(t, ok) +} diff --git a/cluster-autoscaler/cloudprovider/azure/azure_client.go b/cluster-autoscaler/cloudprovider/azure/azure_client.go index 2bf337a4e8d4..14f464376cde 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_client.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_client.go @@ -33,7 +33,7 @@ import ( azurecore_policy "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" "github.com/Azure/azure-sdk-for-go/sdk/azidentity" - "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute" "github.com/Azure/azure-sdk-for-go/services/resources/mgmt/2017-05-10/resources" "github.com/Azure/azure-sdk-for-go/services/storage/mgmt/2021-02-01/storage" @@ -148,7 +148,7 @@ func (az *azDeploymentsClient) Delete(ctx context.Context, resourceGroupName, de return future.Response(), err } -//go:generate sh -c "mockgen k8s.io/autoscaler/cluster-autoscaler/cloudprovider/azure AgentPoolsClient >./agentpool_client.go" +//go:generate sh -c "mockgen -source=azure_client.go -destination azure_mock_agentpool_client.go -package azure -exclude_interfaces DeploymentsClient" // AgentPoolsClient interface defines the methods needed for scaling vms pool. // it is implemented by track2 sdk armcontainerservice.AgentPoolsClient @@ -169,41 +169,47 @@ type AgentPoolsClient interface { machines armcontainerservice.AgentPoolDeleteMachinesParameter, options *armcontainerservice.AgentPoolsClientBeginDeleteMachinesOptions) ( *runtime.Poller[armcontainerservice.AgentPoolsClientDeleteMachinesResponse], error) + NewListPager( + resourceGroupName, resourceName string, + options *armcontainerservice.AgentPoolsClientListOptions, + ) *runtime.Pager[armcontainerservice.AgentPoolsClientListResponse] } func getAgentpoolClientCredentials(cfg *Config) (azcore.TokenCredential, error) { - var cred azcore.TokenCredential - var err error - if cfg.AuthMethod == authMethodCLI { - cred, err = azidentity.NewAzureCLICredential(&azidentity.AzureCLICredentialOptions{ - TenantID: cfg.TenantID}) - if err != nil { - klog.Errorf("NewAzureCLICredential failed: %v", err) - return nil, err + if cfg.AuthMethod == "" || cfg.AuthMethod == authMethodPrincipal { + // Use MSI + if cfg.UseManagedIdentityExtension { + // Use System Assigned MSI + if len(cfg.UserAssignedIdentityID) == 0 { + klog.V(4).Info("Agentpool client: using System Assigned MSI to retrieve access token") + return azidentity.NewManagedIdentityCredential(nil) + } + // Use User Assigned MSI + klog.V(4).Info("Agentpool client: using User Assigned MSI to retrieve access token") + return azidentity.NewManagedIdentityCredential(&azidentity.ManagedIdentityCredentialOptions{ + ID: azidentity.ClientID(cfg.UserAssignedIdentityID), + }) } - } else if cfg.AuthMethod == "" || cfg.AuthMethod == authMethodPrincipal { - cred, err = azidentity.NewClientSecretCredential(cfg.TenantID, cfg.AADClientID, cfg.AADClientSecret, nil) - if err != nil { - klog.Errorf("NewClientSecretCredential failed: %v", err) - return nil, err + + // Use Service Principal + if len(cfg.AADClientID) > 0 && len(cfg.AADClientSecret) > 0 { + klog.V(2).Infoln("Agentpool client: using client_id+client_secret to retrieve access token") + return azidentity.NewClientSecretCredential(cfg.TenantID, cfg.AADClientID, cfg.AADClientSecret, nil) } - } else { - return nil, fmt.Errorf("unsupported authorization method: %s", cfg.AuthMethod) } - return cred, nil -} -func getAgentpoolClientRetryOptions(cfg *Config) azurecore_policy.RetryOptions { - if cfg.AuthMethod == authMethodCLI { - return azurecore_policy.RetryOptions{ - MaxRetries: -1, // no retry when using CLI auth for UT - } + if cfg.UseWorkloadIdentityExtension { + klog.V(4).Info("Agentpool client: using workload identity for access token") + return azidentity.NewWorkloadIdentityCredential(&azidentity.WorkloadIdentityCredentialOptions{ + TokenFilePath: cfg.AADFederatedTokenFile, + }) } - return azextensions.DefaultRetryOpts() + + return nil, fmt.Errorf("unsupported authorization method: %s", cfg.AuthMethod) } func newAgentpoolClient(cfg *Config) (AgentPoolsClient, error) { - retryOptions := getAgentpoolClientRetryOptions(cfg) + retryOptions := azextensions.DefaultRetryOpts() if cfg.ARMBaseURLForAPClient != "" { klog.V(10).Infof("Using ARMBaseURLForAPClient to create agent pool client") @@ -404,9 +410,11 @@ func newAzClient(cfg *Config, env *azure.Environment) (*azClient, error) { agentPoolClient, err := newAgentpoolClient(cfg) if err != nil { - // we don't want to fail the whole process so we don't break any existing functionality - // since this may not be fatal - it is only used by vms pool which is still under development. - klog.Warningf("newAgentpoolClient failed with error: %s", err) + klog.Errorf("newAgentpoolClient failed with error: %s", err) + if cfg.EnableVMsAgentPool { + // only return error if VMs agent pool is supported + return nil, err + } } return &azClient{ diff --git a/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider_test.go index da37ed8492da..7b17be9fba74 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider_test.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider_test.go @@ -20,6 +20,7 @@ import ( "fmt" "testing" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute" "github.com/Azure/azure-sdk-for-go/services/resources/mgmt/2017-05-10/resources" "github.com/Azure/go-autorest/autorest/to" @@ -142,9 +143,14 @@ func TestHasInstance(t *testing.T) { mockVMSSClient := mockvmssclient.NewMockInterface(ctrl) mockVMClient := mockvmclient.NewMockInterface(ctrl) mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl) + mockAgentpoolclient := NewMockAgentPoolsClient(ctrl) provider.azureManager.azClient.virtualMachinesClient = mockVMClient provider.azureManager.azClient.virtualMachineScaleSetsClient = mockVMSSClient provider.azureManager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient + provider.azureManager.azClient.agentPoolClient = mockAgentpoolclient + provider.azureManager.azureCache.clusterName = "test-cluster" + provider.azureManager.azureCache.clusterResourceGroup = "test-rg" + provider.azureManager.azureCache.enableVMsAgentPool = true // enable VMs agent pool to support mixed node group types // Simulate node groups and instances expectedScaleSets := newTestVMSSList(3, "test-asg", "eastus", compute.Uniform) @@ -154,6 +160,18 @@ func TestHasInstance(t *testing.T) { mockVMSSClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedScaleSets, nil).AnyTimes() mockVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedVMsPoolVMs, nil).AnyTimes() mockVMSSVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup, "test-asg", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes() + vmssType := armcontainerservice.AgentPoolTypeVirtualMachines + vmssPool := armcontainerservice.AgentPool{ + Name: to.StringPtr("test-asg"), + Properties: &armcontainerservice.ManagedClusterAgentPoolProfileProperties{ + Type: &vmssType, + }, + } + + vmsPool := getTestVMsAgentPool("test-vms-pool", false) + fakeAPListPager := getFakeAgentpoolListPager(&vmssPool, &vmsPool) + mockAgentpoolclient.EXPECT().NewListPager(provider.azureManager.azureCache.clusterResourceGroup, provider.azureManager.azureCache.clusterName, nil). + Return(fakeAPListPager).AnyTimes() // Register node groups assert.Equal(t, len(provider.NodeGroups()), 0) @@ -257,12 +275,17 @@ func TestHasInstanceProviderIDErrorValidation(t *testing.T) { func TestMixedNodeGroups(t *testing.T) { ctrl := gomock.NewController(t) provider := newTestProvider(t) + provider.azureManager.azureCache.clusterName = "test-cluster" + provider.azureManager.azureCache.clusterResourceGroup = "test-rg" + provider.azureManager.azureCache.enableVMsAgentPool = true // enable VMs agent pool to support mixed node group types mockVMSSClient := mockvmssclient.NewMockInterface(ctrl) mockVMClient := mockvmclient.NewMockInterface(ctrl) mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl) + mockAgentpoolclient := NewMockAgentPoolsClient(ctrl) provider.azureManager.azClient.virtualMachinesClient = mockVMClient provider.azureManager.azClient.virtualMachineScaleSetsClient = mockVMSSClient provider.azureManager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient + provider.azureManager.azClient.agentPoolClient = mockAgentpoolclient expectedScaleSets := newTestVMSSList(3, "test-asg", "eastus", compute.Uniform) expectedVMsPoolVMs := newTestVMsPoolVMList(3) @@ -272,6 +295,19 @@ func TestMixedNodeGroups(t *testing.T) { mockVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedVMsPoolVMs, nil).AnyTimes() mockVMSSVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup, "test-asg", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes() + vmssType := armcontainerservice.AgentPoolTypeVirtualMachines + vmssPool := armcontainerservice.AgentPool{ + Name: to.StringPtr("test-asg"), + Properties: &armcontainerservice.ManagedClusterAgentPoolProfileProperties{ + Type: &vmssType, + }, + } + + vmsPool := getTestVMsAgentPool("test-vms-pool", false) + fakeAPListPager := getFakeAgentpoolListPager(&vmssPool, &vmsPool) + mockAgentpoolclient.EXPECT().NewListPager(provider.azureManager.azureCache.clusterResourceGroup, provider.azureManager.azureCache.clusterName, nil). + Return(fakeAPListPager).AnyTimes() + assert.Equal(t, len(provider.NodeGroups()), 0) registered := provider.azureManager.RegisterNodeGroup( newTestScaleSet(provider.azureManager, "test-asg"), diff --git a/cluster-autoscaler/cloudprovider/azure/azure_config.go b/cluster-autoscaler/cloudprovider/azure/azure_config.go index 6c354c2a23e4..6617030f0d19 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_config.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_config.go @@ -66,6 +66,7 @@ const ( // toggle dynamicInstanceListDefault = false enableVmssFlexDefault = false + enableVMsAgentPoolsDefault = false ) // CloudProviderRateLimitConfig indicates the rate limit config for each clients. @@ -155,6 +156,9 @@ type Config struct { // (DEPRECATED, DO NOT USE) GetVmssSizeRefreshPeriod (seconds) defines how frequently to call GET VMSS API to fetch VMSS info per nodegroup instance GetVmssSizeRefreshPeriod int `json:"getVmssSizeRefreshPeriod,omitempty" yaml:"getVmssSizeRefreshPeriod,omitempty"` + + // EnableVMsAgentPool defines whether to support VMs agentpool type in addition to VMSS type + EnableVMsAgentPool bool `json:"enableVMsAgentPool,omitempty" yaml:"enableVMsAgentPool,omitempty"` } // BuildAzureConfig returns a Config object for the Azure clients @@ -283,6 +287,16 @@ func BuildAzureConfig(configReader io.Reader) (*Config, error) { cfg.EnableVmssFlex = enableVmssFlexDefault } + // if enableVMsAgentPools is true, CAS can handle both vmss and vms pool at the same time + if enableVMsAgentPools := os.Getenv("AZURE_ENABLE_VMS_AGENT_POOLS"); enableVMsAgentPools != "" { + cfg.EnableVMsAgentPool, err = strconv.ParseBool(enableVMsAgentPools) + if err != nil { + return nil, fmt.Errorf("failed to parse AZURE_ENABLE_VMS_AGENT_POOLS %q: %v", enableVMsAgentPools, err) + } + } else { + cfg.EnableVMsAgentPool = enableVMsAgentPoolsDefault + } + if cfg.CloudProviderBackoff { if backoffRetries := os.Getenv("BACKOFF_RETRIES"); backoffRetries != "" { retries, err := strconv.ParseInt(backoffRetries, 10, 0) diff --git a/cluster-autoscaler/cloudprovider/azure/azure_instance.go b/cluster-autoscaler/cloudprovider/azure/azure_instance.go index 9a04c441b51f..29d31f6897d9 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_instance.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_instance.go @@ -22,80 +22,79 @@ import ( "regexp" "strings" - "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute" "k8s.io/klog/v2" ) // GetVMSSTypeStatically uses static list of vmss generated at azure_instance_types.go to fetch vmss instance information. // It is declared as a variable for testing purpose. -var GetVMSSTypeStatically = func(template compute.VirtualMachineScaleSet) (*InstanceType, error) { - var vmssType *InstanceType +var GetVMSSTypeStatically = func(template NodeTemplate) (*InstanceType, error) { + var instanceType *InstanceType for k := range InstanceTypes { - if strings.EqualFold(k, *template.Sku.Name) { - vmssType = InstanceTypes[k] + if strings.EqualFold(k, template.SkuName) { + instanceType = InstanceTypes[k] break } } promoRe := regexp.MustCompile(`(?i)_promo`) - if promoRe.MatchString(*template.Sku.Name) { - if vmssType == nil { + if promoRe.MatchString(template.SkuName) { + if instanceType == nil { // We didn't find an exact match but this is a promo type, check for matching standard - klog.V(4).Infof("No exact match found for %s, checking standard types", *template.Sku.Name) - skuName := promoRe.ReplaceAllString(*template.Sku.Name, "") + klog.V(4).Infof("No exact match found for %s, checking standard types", template.SkuName) + skuName := promoRe.ReplaceAllString(template.SkuName, "") for k := range InstanceTypes { if strings.EqualFold(k, skuName) { - vmssType = InstanceTypes[k] + instanceType = InstanceTypes[k] break } } } } - if vmssType == nil { - return vmssType, fmt.Errorf("instance type %q not supported", *template.Sku.Name) + if instanceType == nil { + return instanceType, fmt.Errorf("instance type %q not supported", template.SkuName) } - return vmssType, nil + return instanceType, nil } // GetVMSSTypeDynamically fetched vmss instance information using sku api calls. // It is declared as a variable for testing purpose. -var GetVMSSTypeDynamically = func(template compute.VirtualMachineScaleSet, azCache *azureCache) (InstanceType, error) { +var GetVMSSTypeDynamically = func(template NodeTemplate, azCache *azureCache) (InstanceType, error) { ctx := context.Background() - var vmssType InstanceType + var instanceType InstanceType - sku, err := azCache.GetSKU(ctx, *template.Sku.Name, *template.Location) + sku, err := azCache.GetSKU(ctx, template.SkuName, template.Location) if err != nil { // We didn't find an exact match but this is a promo type, check for matching standard promoRe := regexp.MustCompile(`(?i)_promo`) - skuName := promoRe.ReplaceAllString(*template.Sku.Name, "") - if skuName != *template.Sku.Name { - klog.V(1).Infof("No exact match found for %q, checking standard type %q. Error %v", *template.Sku.Name, skuName, err) - sku, err = azCache.GetSKU(ctx, skuName, *template.Location) + skuName := promoRe.ReplaceAllString(template.SkuName, "") + if skuName != template.SkuName { + klog.V(1).Infof("No exact match found for %q, checking standard type %q. Error %v", template.SkuName, skuName, err) + sku, err = azCache.GetSKU(ctx, skuName, template.Location) } if err != nil { - return vmssType, fmt.Errorf("instance type %q not supported. Error %v", *template.Sku.Name, err) + return instanceType, fmt.Errorf("instance type %q not supported. Error %v", template.SkuName, err) } } - vmssType.VCPU, err = sku.VCPU() + instanceType.VCPU, err = sku.VCPU() if err != nil { - klog.V(1).Infof("Failed to parse vcpu from sku %q %v", *template.Sku.Name, err) - return vmssType, err + klog.V(1).Infof("Failed to parse vcpu from sku %q %v", template.SkuName, err) + return instanceType, err } gpu, err := getGpuFromSku(sku) if err != nil { - klog.V(1).Infof("Failed to parse gpu from sku %q %v", *template.Sku.Name, err) - return vmssType, err + klog.V(1).Infof("Failed to parse gpu from sku %q %v", template.SkuName, err) + return instanceType, err } - vmssType.GPU = gpu + instanceType.GPU = gpu memoryGb, err := sku.Memory() if err != nil { - klog.V(1).Infof("Failed to parse memoryMb from sku %q %v", *template.Sku.Name, err) - return vmssType, err + klog.V(1).Infof("Failed to parse memoryMb from sku %q %v", template.SkuName, err) + return instanceType, err } - vmssType.MemoryMb = int64(memoryGb) * 1024 + instanceType.MemoryMb = int64(memoryGb) * 1024 - return vmssType, nil + return instanceType, nil } diff --git a/cluster-autoscaler/cloudprovider/azure/azure_manager.go b/cluster-autoscaler/cloudprovider/azure/azure_manager.go index 0802d40e085c..bc145ef4591c 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_manager.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_manager.go @@ -173,9 +173,11 @@ func (m *AzureManager) buildNodeGroupFromSpec(spec string) (cloudprovider.NodeGr if err != nil { return nil, fmt.Errorf("failed to parse node group spec: %v", err) } - vmsPoolSet := m.azureCache.getVMsPoolSet() - if _, ok := vmsPoolSet[s.Name]; ok { - return NewVMsPool(s, m), nil + if m.config.EnableVMsAgentPool { + vmsPoolMap := m.azureCache.getVMsPoolMap() + if _, ok := vmsPoolMap[s.Name]; ok { + return NewVMsPool(s, m) + } } switch m.config.VMType { diff --git a/cluster-autoscaler/cloudprovider/azure/azure_manager_test.go b/cluster-autoscaler/cloudprovider/azure/azure_manager_test.go index baddccff26b1..f5a66f209ba1 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_manager_test.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_manager_test.go @@ -167,6 +167,7 @@ func TestCreateAzureManagerValidConfig(t *testing.T) { VmssVmsCacheTTL: 240, VmssVmsCacheJitter: 120, MaxDeploymentsCount: 8, + EnableVMsAgentPool: false, CloudProviderRateLimitConfig: CloudProviderRateLimitConfig{ RateLimitConfig: azclients.RateLimitConfig{ CloudProviderRateLimit: false, @@ -344,9 +345,14 @@ func TestCreateAzureManagerWithNilConfig(t *testing.T) { mockVMSSClient := mockvmssclient.NewMockInterface(ctrl) mockVMSSClient.EXPECT().List(gomock.Any(), "resourceGroup").Return([]compute.VirtualMachineScaleSet{}, nil).AnyTimes() mockVMClient.EXPECT().List(gomock.Any(), "resourceGroup").Return([]compute.VirtualMachine{}, nil).AnyTimes() + mockAgentpoolclient := NewMockAgentPoolsClient(ctrl) + vmspool := getTestVMsAgentPool("vmspool", false) + fakeAPListPager := getFakeAgentpoolListPager(&vmspool) + mockAgentpoolclient.EXPECT().NewListPager(gomock.Any(), gomock.Any(), nil).Return(fakeAPListPager).AnyTimes() mockAzClient := &azClient{ virtualMachinesClient: mockVMClient, virtualMachineScaleSetsClient: mockVMSSClient, + agentPoolClient: mockAgentpoolclient, } expectedConfig := &Config{ @@ -376,6 +382,7 @@ func TestCreateAzureManagerWithNilConfig(t *testing.T) { CloudProviderBackoffExponent: 1, CloudProviderBackoffDuration: 1, CloudProviderBackoffJitter: 1, + EnableVMsAgentPool: true, CloudProviderRateLimitConfig: CloudProviderRateLimitConfig{ RateLimitConfig: azclients.RateLimitConfig{ CloudProviderRateLimit: true, @@ -459,6 +466,7 @@ func TestCreateAzureManagerWithNilConfig(t *testing.T) { t.Setenv("CLUSTER_NAME", "mycluster") t.Setenv("ARM_CLUSTER_RESOURCE_GROUP", "myrg") t.Setenv("ARM_BASE_URL_FOR_AP_CLIENT", "nodeprovisioner-svc.nodeprovisioner.svc.cluster.local") + t.Setenv("AZURE_ENABLE_VMS_AGENT_POOLS", "true") t.Run("environment variables correctly set", func(t *testing.T) { manager, err := createAzureManagerInternal(nil, cloudprovider.NodeGroupDiscoveryOptions{}, mockAzClient) diff --git a/cluster-autoscaler/cloudprovider/azure/azure_mock_agentpool_client.go b/cluster-autoscaler/cloudprovider/azure/azure_mock_agentpool_client.go index eaad11f01d4c..0e63d30b6465 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_mock_agentpool_client.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_mock_agentpool_client.go @@ -21,7 +21,7 @@ import ( reflect "reflect" runtime "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" - armcontainerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4" + armcontainerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5" gomock "go.uber.org/mock/gomock" ) @@ -49,46 +49,60 @@ func (m *MockAgentPoolsClient) EXPECT() *MockAgentPoolsClientMockRecorder { } // BeginCreateOrUpdate mocks base method. -func (m *MockAgentPoolsClient) BeginCreateOrUpdate(arg0 context.Context, arg1, arg2, arg3 string, arg4 armcontainerservice.AgentPool, arg5 *armcontainerservice.AgentPoolsClientBeginCreateOrUpdateOptions) (*runtime.Poller[armcontainerservice.AgentPoolsClientCreateOrUpdateResponse], error) { +func (m *MockAgentPoolsClient) BeginCreateOrUpdate(ctx context.Context, resourceGroupName, resourceName, agentPoolName string, parameters armcontainerservice.AgentPool, options *armcontainerservice.AgentPoolsClientBeginCreateOrUpdateOptions) (*runtime.Poller[armcontainerservice.AgentPoolsClientCreateOrUpdateResponse], error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "BeginCreateOrUpdate", arg0, arg1, arg2, arg3, arg4, arg5) + ret := m.ctrl.Call(m, "BeginCreateOrUpdate", ctx, resourceGroupName, resourceName, agentPoolName, parameters, options) ret0, _ := ret[0].(*runtime.Poller[armcontainerservice.AgentPoolsClientCreateOrUpdateResponse]) ret1, _ := ret[1].(error) return ret0, ret1 } // BeginCreateOrUpdate indicates an expected call of BeginCreateOrUpdate. -func (mr *MockAgentPoolsClientMockRecorder) BeginCreateOrUpdate(arg0, arg1, arg2, arg3, arg4, arg5 any) *gomock.Call { +func (mr *MockAgentPoolsClientMockRecorder) BeginCreateOrUpdate(ctx, resourceGroupName, resourceName, agentPoolName, parameters, options any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "BeginCreateOrUpdate", reflect.TypeOf((*MockAgentPoolsClient)(nil).BeginCreateOrUpdate), arg0, arg1, arg2, arg3, arg4, arg5) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "BeginCreateOrUpdate", reflect.TypeOf((*MockAgentPoolsClient)(nil).BeginCreateOrUpdate), ctx, resourceGroupName, resourceName, agentPoolName, parameters, options) } // BeginDeleteMachines mocks base method. -func (m *MockAgentPoolsClient) BeginDeleteMachines(arg0 context.Context, arg1, arg2, arg3 string, arg4 armcontainerservice.AgentPoolDeleteMachinesParameter, arg5 *armcontainerservice.AgentPoolsClientBeginDeleteMachinesOptions) (*runtime.Poller[armcontainerservice.AgentPoolsClientDeleteMachinesResponse], error) { +func (m *MockAgentPoolsClient) BeginDeleteMachines(ctx context.Context, resourceGroupName, resourceName, agentPoolName string, machines armcontainerservice.AgentPoolDeleteMachinesParameter, options *armcontainerservice.AgentPoolsClientBeginDeleteMachinesOptions) (*runtime.Poller[armcontainerservice.AgentPoolsClientDeleteMachinesResponse], error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "BeginDeleteMachines", arg0, arg1, arg2, arg3, arg4, arg5) + ret := m.ctrl.Call(m, "BeginDeleteMachines", ctx, resourceGroupName, resourceName, agentPoolName, machines, options) ret0, _ := ret[0].(*runtime.Poller[armcontainerservice.AgentPoolsClientDeleteMachinesResponse]) ret1, _ := ret[1].(error) return ret0, ret1 } // BeginDeleteMachines indicates an expected call of BeginDeleteMachines. -func (mr *MockAgentPoolsClientMockRecorder) BeginDeleteMachines(arg0, arg1, arg2, arg3, arg4, arg5 any) *gomock.Call { +func (mr *MockAgentPoolsClientMockRecorder) BeginDeleteMachines(ctx, resourceGroupName, resourceName, agentPoolName, machines, options any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "BeginDeleteMachines", reflect.TypeOf((*MockAgentPoolsClient)(nil).BeginDeleteMachines), arg0, arg1, arg2, arg3, arg4, arg5) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "BeginDeleteMachines", reflect.TypeOf((*MockAgentPoolsClient)(nil).BeginDeleteMachines), ctx, resourceGroupName, resourceName, agentPoolName, machines, options) } // Get mocks base method. -func (m *MockAgentPoolsClient) Get(arg0 context.Context, arg1, arg2, arg3 string, arg4 *armcontainerservice.AgentPoolsClientGetOptions) (armcontainerservice.AgentPoolsClientGetResponse, error) { +func (m *MockAgentPoolsClient) Get(ctx context.Context, resourceGroupName, resourceName, agentPoolName string, options *armcontainerservice.AgentPoolsClientGetOptions) (armcontainerservice.AgentPoolsClientGetResponse, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "Get", arg0, arg1, arg2, arg3, arg4) + ret := m.ctrl.Call(m, "Get", ctx, resourceGroupName, resourceName, agentPoolName, options) ret0, _ := ret[0].(armcontainerservice.AgentPoolsClientGetResponse) ret1, _ := ret[1].(error) return ret0, ret1 } // Get indicates an expected call of Get. -func (mr *MockAgentPoolsClientMockRecorder) Get(arg0, arg1, arg2, arg3, arg4 any) *gomock.Call { +func (mr *MockAgentPoolsClientMockRecorder) Get(ctx, resourceGroupName, resourceName, agentPoolName, options any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Get", reflect.TypeOf((*MockAgentPoolsClient)(nil).Get), arg0, arg1, arg2, arg3, arg4) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Get", reflect.TypeOf((*MockAgentPoolsClient)(nil).Get), ctx, resourceGroupName, resourceName, agentPoolName, options) +} + +// NewListPager mocks base method. +func (m *MockAgentPoolsClient) NewListPager(resourceGroupName, resourceName string, options *armcontainerservice.AgentPoolsClientListOptions) *runtime.Pager[armcontainerservice.AgentPoolsClientListResponse] { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "NewListPager", resourceGroupName, resourceName, options) + ret0, _ := ret[0].(*runtime.Pager[armcontainerservice.AgentPoolsClientListResponse]) + return ret0 +} + +// NewListPager indicates an expected call of NewListPager. +func (mr *MockAgentPoolsClientMockRecorder) NewListPager(resourceGroupName, resourceName, options any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "NewListPager", reflect.TypeOf((*MockAgentPoolsClient)(nil).NewListPager), resourceGroupName, resourceName, options) } diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go index 3d03591f5e0c..058f570dd56d 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go @@ -628,11 +628,12 @@ func (scaleSet *ScaleSet) Debug() string { // TemplateNodeInfo returns a node template for this scale set. func (scaleSet *ScaleSet) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { - template, err := scaleSet.getVMSSFromCache() + vmss, err := scaleSet.getVMSSFromCache() if err != nil { return nil, err } + template := buildNodeTemplateFromVMSS(vmss) node, err := buildNodeFromTemplate(scaleSet.Name, template, scaleSet.manager, scaleSet.enableDynamicInstanceList) if err != nil { diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go index f3aa0dc356d8..af269fcb732d 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go @@ -1131,7 +1131,7 @@ func TestTemplateNodeInfo(t *testing.T) { t.Run("Checking dynamic workflow", func(t *testing.T) { asg.enableDynamicInstanceList = true - GetVMSSTypeDynamically = func(template compute.VirtualMachineScaleSet, azCache *azureCache) (InstanceType, error) { + GetVMSSTypeDynamically = func(template NodeTemplate, azCache *azureCache) (InstanceType, error) { vmssType := InstanceType{} vmssType.VCPU = 1 vmssType.GPU = 2 @@ -1149,10 +1149,10 @@ func TestTemplateNodeInfo(t *testing.T) { t.Run("Checking static workflow if dynamic fails", func(t *testing.T) { asg.enableDynamicInstanceList = true - GetVMSSTypeDynamically = func(template compute.VirtualMachineScaleSet, azCache *azureCache) (InstanceType, error) { + GetVMSSTypeDynamically = func(template NodeTemplate, azCache *azureCache) (InstanceType, error) { return InstanceType{}, fmt.Errorf("dynamic error exists") } - GetVMSSTypeStatically = func(template compute.VirtualMachineScaleSet) (*InstanceType, error) { + GetVMSSTypeStatically = func(template NodeTemplate) (*InstanceType, error) { vmssType := InstanceType{} vmssType.VCPU = 1 vmssType.GPU = 2 @@ -1170,10 +1170,10 @@ func TestTemplateNodeInfo(t *testing.T) { t.Run("Fails to find vmss instance information using static and dynamic workflow, instance not supported", func(t *testing.T) { asg.enableDynamicInstanceList = true - GetVMSSTypeDynamically = func(template compute.VirtualMachineScaleSet, azCache *azureCache) (InstanceType, error) { + GetVMSSTypeDynamically = func(template NodeTemplate, azCache *azureCache) (InstanceType, error) { return InstanceType{}, fmt.Errorf("dynamic error exists") } - GetVMSSTypeStatically = func(template compute.VirtualMachineScaleSet) (*InstanceType, error) { + GetVMSSTypeStatically = func(template NodeTemplate) (*InstanceType, error) { return &InstanceType{}, fmt.Errorf("static error exists") } nodeInfo, err := asg.TemplateNodeInfo() @@ -1186,7 +1186,7 @@ func TestTemplateNodeInfo(t *testing.T) { t.Run("Checking static-only workflow", func(t *testing.T) { asg.enableDynamicInstanceList = false - GetVMSSTypeStatically = func(template compute.VirtualMachineScaleSet) (*InstanceType, error) { + GetVMSSTypeStatically = func(template NodeTemplate) (*InstanceType, error) { vmssType := InstanceType{} vmssType.VCPU = 1 vmssType.GPU = 2 diff --git a/cluster-autoscaler/cloudprovider/azure/azure_template.go b/cluster-autoscaler/cloudprovider/azure/azure_template.go index 39afb09af222..03bb486a24b3 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_template.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_template.go @@ -24,6 +24,7 @@ import ( "strings" "time" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute" apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -41,7 +42,88 @@ const ( AKSLabelKeyPrefixValue = AKSLabelPrefixValue + "/" ) -func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachineScaleSet, manager *AzureManager, enableDynamicInstanceList bool) (*apiv1.Node, error) { +// NodeTemplate represents a template for a Azure nodepool +type NodeTemplate struct { + SkuName string + InstanceOS string + Location string + Zones *[]string + Tags map[string]*string + Taints []string +} + +func buildNodeTemplateFromVMSS(vmss compute.VirtualMachineScaleSet) NodeTemplate { + instanceOS := cloudprovider.DefaultOS + if vmss.VirtualMachineProfile != nil && + vmss.VirtualMachineProfile.OsProfile != nil && + vmss.VirtualMachineProfile.OsProfile.WindowsConfiguration != nil { + instanceOS = "windows" + } + return NodeTemplate{ + SkuName: *vmss.Sku.Name, + Tags: vmss.Tags, + Location: *vmss.Location, + Zones: vmss.Zones, + InstanceOS: instanceOS, + } +} + +func buildNodeTemplateFromVMsPool(vmsPool armcontainerservice.AgentPool, location string) NodeTemplate { + var skuName string + if vmsPool.Properties != nil && + vmsPool.Properties.VirtualMachinesProfile != nil && + vmsPool.Properties.VirtualMachinesProfile.Scale != nil { + if len(vmsPool.Properties.VirtualMachinesProfile.Scale.Manual) > 0 && + len(vmsPool.Properties.VirtualMachinesProfile.Scale.Manual[0].Sizes) > 0 && + vmsPool.Properties.VirtualMachinesProfile.Scale.Manual[0].Sizes[0] != nil { + skuName = *vmsPool.Properties.VirtualMachinesProfile.Scale.Manual[0].Sizes[0] + } + if len(vmsPool.Properties.VirtualMachinesProfile.Scale.Autoscale) > 0 && + len(vmsPool.Properties.VirtualMachinesProfile.Scale.Autoscale[0].Sizes) > 0 && + vmsPool.Properties.VirtualMachinesProfile.Scale.Autoscale[0].Sizes[0] != nil { + skuName = *vmsPool.Properties.VirtualMachinesProfile.Scale.Autoscale[0].Sizes[0] + } + } + + var labels map[string]*string + if vmsPool.Properties != nil && vmsPool.Properties.NodeLabels != nil { + labels = vmsPool.Properties.NodeLabels + } + + var taints []string + if vmsPool.Properties != nil && vmsPool.Properties.NodeTaints != nil { + for _, taint := range vmsPool.Properties.NodeTaints { + if taint != nil { + taints = append(taints, *taint) + } + } + } + + var zones []string + if vmsPool.Properties != nil && vmsPool.Properties.AvailabilityZones != nil { + for _, zone := range vmsPool.Properties.AvailabilityZones { + if zone != nil { + zones = append(zones, *zone) + } + } + } + + var instanceOS string + if vmsPool.Properties != nil && vmsPool.Properties.OSType != nil { + instanceOS = strings.ToLower(string(*vmsPool.Properties.OSType)) + } + + return NodeTemplate{ + SkuName: skuName, + Tags: labels, + Taints: taints, + Zones: &zones, + InstanceOS: instanceOS, + Location: location, + } +} + +func buildNodeFromTemplate(nodeGroupName string, template NodeTemplate, manager *AzureManager, enableDynamicInstanceList bool) (*apiv1.Node, error) { node := apiv1.Node{} nodeName := fmt.Sprintf("%s-asg-%d", nodeGroupName, rand.Int63()) @@ -61,7 +143,7 @@ func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachine var dynamicErr error if enableDynamicInstanceList { var vmssTypeDynamic InstanceType - klog.V(1).Infof("Fetching instance information for SKU: %s from SKU API", *template.Sku.Name) + klog.V(1).Infof("Fetching instance information for SKU: %s from SKU API", template.SkuName) vmssTypeDynamic, dynamicErr = GetVMSSTypeDynamically(template, manager.azureCache) if dynamicErr == nil { vcpu = vmssTypeDynamic.VCPU @@ -72,7 +154,7 @@ func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachine } } if !enableDynamicInstanceList || dynamicErr != nil { - klog.V(1).Infof("Falling back to static SKU list for SKU: %s", *template.Sku.Name) + klog.V(1).Infof("Falling back to static SKU list for SKU: %s", template.SkuName) // fall-back on static list of vmss if dynamic workflow fails. vmssTypeStatic, staticErr := GetVMSSTypeStatically(template) if staticErr == nil { @@ -81,7 +163,7 @@ func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachine memoryMb = vmssTypeStatic.MemoryMb } else { // return error if neither of the workflows results with vmss data. - klog.V(1).Infof("Instance type %q not supported, err: %v", *template.Sku.Name, staticErr) + klog.V(1).Infof("Instance type %q not supported, err: %v", template.SkuName, staticErr) return nil, staticErr } } @@ -90,7 +172,7 @@ func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachine node.Status.Capacity[apiv1.ResourceCPU] = *resource.NewQuantity(vcpu, resource.DecimalSI) // isNPSeries returns if a SKU is an NP-series SKU // SKU API reports GPUs for NP-series but it's actually FPGAs - if !isNPSeries(*template.Sku.Name) { + if !isNPSeries(template.SkuName) { node.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(gpuCount, resource.DecimalSI) } @@ -121,35 +203,29 @@ func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachine node.Status.Capacity[apiv1.ResourceName(resourceName)] = *val } - // Taints from the Scale Set's Tags - node.Spec.Taints = extractTaintsFromScaleSet(template.Tags) + if len(template.Taints) > 0 { + node.Spec.Taints = extractTaintsFromTemplate(template.Taints) // Taints from the VMs Node Template + } else { + node.Spec.Taints = extractTaintsFromScaleSet(template.Tags) // Taints from the Scale Set's Tags + } node.Status.Conditions = cloudprovider.BuildReadyConditions() return &node, nil } -func buildInstanceOS(template compute.VirtualMachineScaleSet) string { - instanceOS := cloudprovider.DefaultOS - if template.VirtualMachineProfile != nil && template.VirtualMachineProfile.OsProfile != nil && template.VirtualMachineProfile.OsProfile.WindowsConfiguration != nil { - instanceOS = "windows" - } - - return instanceOS -} - -func buildGenericLabels(template compute.VirtualMachineScaleSet, nodeName string) map[string]string { +func buildGenericLabels(template NodeTemplate, nodeName string) map[string]string { result := make(map[string]string) result[apiv1.LabelArchStable] = cloudprovider.DefaultArch - result[apiv1.LabelOSStable] = buildInstanceOS(template) + result[apiv1.LabelOSStable] = template.InstanceOS - result[apiv1.LabelInstanceTypeStable] = *template.Sku.Name - result[apiv1.LabelTopologyRegion] = strings.ToLower(*template.Location) + result[apiv1.LabelInstanceTypeStable] = template.SkuName + result[apiv1.LabelTopologyRegion] = strings.ToLower(template.Location) if template.Zones != nil && len(*template.Zones) > 0 { failureDomains := make([]string, len(*template.Zones)) for k, v := range *template.Zones { - failureDomains[k] = strings.ToLower(*template.Location) + "-" + v + failureDomains[k] = strings.ToLower(template.Location) + "-" + v } //Picks random zones for Multi-zone nodepool when scaling from zero. //This random zone will not be the same as the zone of the VMSS that is being created, the purpose of creating @@ -212,6 +288,54 @@ func extractTaintsFromScaleSet(tags map[string]*string) []apiv1.Taint { return taints } +func extractTaintsFromTemplate(taints []string) []apiv1.Taint { + result := make([]apiv1.Taint, 0) + for _, taint := range taints { + parsedTaint, err := parseTaint(taint) + if err != nil { + klog.Warningf("failed to parse taint %q: %v", taint, err) + continue + } + result = append(result, parsedTaint) + } + + return result +} + +// parseTaint parses a taint string, whose format must be either +// '=:', ':', or ''. +func parseTaint(taintStr string) (apiv1.Taint, error) { + var taint apiv1.Taint + var key string + var value string + var effect apiv1.TaintEffect + + parts := strings.Split(taintStr, ":") + switch len(parts) { + case 1: + key = parts[0] + case 2: + effect = apiv1.TaintEffect(parts[1]) + + partsKV := strings.Split(parts[0], "=") + if len(partsKV) > 2 { + return taint, fmt.Errorf("invalid taint spec: %v", taintStr) + } + key = partsKV[0] + if len(partsKV) == 2 { + value = partsKV[1] + } + default: + return taint, fmt.Errorf("invalid taint spec: %v", taintStr) + } + + taint.Key = key + taint.Value = value + taint.Effect = effect + + return taint, nil +} + func extractAutoscalingOptionsFromScaleSetTags(tags map[string]*string) map[string]string { options := make(map[string]string) for tagName, tagValue := range tags { diff --git a/cluster-autoscaler/cloudprovider/azure/azure_template_test.go b/cluster-autoscaler/cloudprovider/azure/azure_template_test.go index d8eaeb1d8fe0..38e8e2546844 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_template_test.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_template_test.go @@ -18,8 +18,10 @@ package azure import ( "fmt" + "strings" "testing" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute" "github.com/Azure/go-autorest/autorest" "github.com/Azure/go-autorest/autorest/to" @@ -143,8 +145,9 @@ func TestTopologyFromScaleSet(t *testing.T) { Zones: &[]string{"1", "2", "3"}, Location: to.StringPtr("westus"), } + template := buildNodeTemplateFromVMSS(testVmss) expectedZoneValues := []string{"westus-1", "westus-2", "westus-3"} - labels := buildGenericLabels(testVmss, testNodeName) + labels := buildGenericLabels(template, testNodeName) topologyZone, ok := labels[apiv1.LabelTopologyZone] assert.True(t, ok) azureDiskTopology, ok := labels[azureDiskTopologyKey] @@ -167,7 +170,8 @@ func TestEmptyTopologyFromScaleSet(t *testing.T) { } expectedTopologyZone := "0" expectedAzureDiskTopology := "" - labels := buildGenericLabels(testVmss, testNodeName) + template := buildNodeTemplateFromVMSS(testVmss) + labels := buildGenericLabels(template, testNodeName) topologyZone, ok := labels[apiv1.LabelTopologyZone] assert.True(t, ok) @@ -177,3 +181,29 @@ func TestEmptyTopologyFromScaleSet(t *testing.T) { assert.True(t, ok) assert.Equal(t, expectedAzureDiskTopology, azureDiskTopology) } + +func TestBuildNodeTemplateFromVMsPool(t *testing.T) { + vmsPool := getTestVMsAgentPool("vmspool", false) + vmsPool.Properties.NodeLabels = map[string]*string{ + "foo": to.StringPtr("bar"), + } + taint := "dedicated=myapp:NoSchedule" + vmsPool.Properties.NodeTaints = []*string{&taint} + vmsPool.Properties.AvailabilityZones = []*string{to.StringPtr("1"), to.StringPtr("3")} + osType := armcontainerservice.OSTypeWindows + vmsPool.Properties.OSType = &osType + + location := "westus" + template := buildNodeTemplateFromVMsPool(vmsPool, location) + assert.Equal(t, location, template.Location) + assert.Equal(t, "Standard_D2_v2", template.SkuName) + + assert.Equal(t, len(vmsPool.Properties.NodeLabels), len(template.Tags)) + assert.Contains(t, template.Tags, "foo") + assert.Equal(t, *template.Tags["foo"], "bar") + assert.Equal(t, len(vmsPool.Properties.NodeTaints), len(template.Taints)) + assert.Contains(t, template.Taints, taint) + + assert.Equal(t, 2, len(*template.Zones)) + assert.Equal(t, strings.ToLower(string(osType)), template.InstanceOS) +} diff --git a/cluster-autoscaler/cloudprovider/azure/azure_vms_pool.go b/cluster-autoscaler/cloudprovider/azure/azure_vms_pool.go index c86eff66ce6e..a3f8bfc2b397 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_vms_pool.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_vms_pool.go @@ -18,48 +18,77 @@ package azure import ( "fmt" + "strings" + "sync" + "time" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute" + "github.com/Azure/go-autorest/autorest/to" + apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" + klog "k8s.io/klog/v2" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" ) // VMsPool is single instance VM pool -// this is a placeholder for now, no real implementation type VMsPool struct { azureRef - manager *AzureManager - resourceGroup string + manager *AzureManager + resourceGroup string // MC_ resource group for nodes + clusterResourceGroup string // resource group for the cluster itself + clusterName string + location string minSize int maxSize int - curSize int64 - // sizeMutex sync.Mutex - // lastSizeRefresh time.Time + curSize int64 + sizeMutex sync.Mutex + lastSizeRefresh time.Time + sizeRefreshPeriod time.Duration + + enableDynamicInstanceList bool } // NewVMsPool creates a new VMsPool -func NewVMsPool(spec *dynamic.NodeGroupSpec, am *AzureManager) *VMsPool { +func NewVMsPool(spec *dynamic.NodeGroupSpec, am *AzureManager) (*VMsPool, error) { + if am.azClient.agentPoolClient == nil { + return nil, fmt.Errorf("agentPoolClient is nil") + } nodepool := &VMsPool{ azureRef: azureRef{ Name: spec.Name, }, - manager: am, - resourceGroup: am.config.ResourceGroup, + manager: am, + resourceGroup: am.config.ResourceGroup, + clusterResourceGroup: am.config.ClusterResourceGroup, + clusterName: am.config.ClusterName, + location: am.config.Location, + sizeRefreshPeriod: am.azureCache.refreshInterval, + enableDynamicInstanceList: am.config.EnableDynamicInstanceList, curSize: -1, minSize: spec.MinSize, maxSize: spec.MaxSize, } - return nodepool + return nodepool, nil } +const ( + vmsGetRequestContextTimeout = 1 * time.Minute + vmsListRequestContextTimeout = 3 * time.Minute + vmsPutRequestContextTimeout = 5 * time.Minute + vmsDeleteRequestContextTimeout = 10 * time.Minute + + spotPoolCacheTTL = 15 * time.Second +) + // MinSize returns the minimum size the cluster is allowed to scaled down // to as provided by the node spec in --node parameter. func (agentPool *VMsPool) MinSize() int { @@ -89,8 +118,8 @@ func (agentPool *VMsPool) Autoprovisioned() bool { // GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular // NodeGroup. Returning a nil will result in using default options. func (agentPool *VMsPool) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) { - // TODO(wenxuan): Implement this method - return nil, cloudprovider.ErrNotImplemented + // TODO(wenxuan): implement this method when vms pool can fully support GPU nodepool + return nil, nil } // MaxSize returns the maximum size scale limit provided by --node @@ -102,28 +131,264 @@ func (agentPool *VMsPool) MaxSize() int { // TargetSize returns the current TARGET size of the node group. It is possible that the // number is different from the number of nodes registered in Kubernetes. func (agentPool *VMsPool) TargetSize() (int, error) { - // TODO(wenxuan): Implement this method - return -1, cloudprovider.ErrNotImplemented + size, err := agentPool.getVMsPoolSize() + return int(size), err +} + +func (agentPool *VMsPool) getAgentpoolFromAzure() (armcontainerservice.AgentPool, error) { + ctx, cancel := getContextWithTimeout(vmsGetRequestContextTimeout) + defer cancel() + resp, err := agentPool.manager.azClient.agentPoolClient.Get( + ctx, + agentPool.clusterResourceGroup, + agentPool.clusterName, + agentPool.Name, nil) + if err != nil { + return resp.AgentPool, fmt.Errorf("failed to get agentpool %s in cluster %s with error: %v", + agentPool.Name, agentPool.clusterName, err) + } + return resp.AgentPool, nil } -// IncreaseSize increase the size through a PUT AP call. It calculates the expected size -// based on a delta provided as parameter +// getVMsPoolSize returns the current size of the vms pool, which is the vm count in the vms pool +func (agentPool *VMsPool) getVMsPoolSize() (int64, error) { + size, err := agentPool.getCurSize() + if err != nil { + klog.Errorf("Failed to get vms pool %s node count with error: %s", agentPool.Name, err) + return size, err + } + if size == -1 { + klog.Errorf("vms pool %s size is -1, it is still being initialized", agentPool.Name) + return size, fmt.Errorf("getVMsPoolSize: size is -1 for vms pool %s", agentPool.Name) + } + return size, nil +} + +// IncreaseSize increase the size through a PUT AP call. func (agentPool *VMsPool) IncreaseSize(delta int) error { - // TODO(wenxuan): Implement this method - return cloudprovider.ErrNotImplemented + if delta <= 0 { + return fmt.Errorf("size increase must be positive, current delta: %d", delta) + } + + currentSize, err := agentPool.getVMsPoolSize() + if err != nil { + return err + } + + if currentSize == -1 { + return fmt.Errorf("the vms pool %s is still under initialization, skip the size increase", agentPool.Name) + } + + if int(currentSize)+delta > agentPool.MaxSize() { + return fmt.Errorf("size-increasing request of %d is bigger than max size %d", int(currentSize)+delta, agentPool.MaxSize()) + } + + return agentPool.scaleUpToCount(currentSize + int64(delta)) +} + +func (agentPool *VMsPool) buildRequestBodyForScaleUp(count int64) (armcontainerservice.AgentPool, error) { + versionedAP, err := agentPool.getAgentpoolFromCache() + if err != nil { + klog.Errorf("Failed to get vms pool %s, error: %s", agentPool.Name, err) + return armcontainerservice.AgentPool{}, err + } + + if versionedAP.Properties != nil && + versionedAP.Properties.VirtualMachinesProfile != nil && + versionedAP.Properties.VirtualMachinesProfile.Scale != nil { + requestBody := armcontainerservice.AgentPool{ + Properties: &armcontainerservice.ManagedClusterAgentPoolProfileProperties{ + Type: versionedAP.Properties.Type, + }, + } + + // the request body must have the same mode as the original agentpool + // otherwise the PUT request will fail + if versionedAP.Properties.Mode != nil && + *versionedAP.Properties.Mode == armcontainerservice.AgentPoolModeSystem { + systemMode := armcontainerservice.AgentPoolModeSystem + requestBody.Properties.Mode = &systemMode + } + + // self-hosted CAS will be using Manual scale profile + if len(versionedAP.Properties.VirtualMachinesProfile.Scale.Manual) > 0 { + // set the count for first manual scale profile to the new target value + virtualMachineProfile := *versionedAP.Properties.VirtualMachinesProfile + virtualMachineProfile.Scale.Manual[0].Count = to.Int32Ptr(int32(count)) + requestBody.Properties.VirtualMachinesProfile = &virtualMachineProfile + return requestBody, nil + } + + // aks-managed CAS will be using Auto scale Profile + if len(versionedAP.Properties.VirtualMachinesProfile.Scale.Autoscale) > 0 { + // set the MinCount and MaxCount for first AutoscaleProfile to the new target value + virtualMachineProfile := *versionedAP.Properties.VirtualMachinesProfile + virtualMachineProfile.Scale.Autoscale[0].MinCount = to.Int32Ptr(int32(count)) + virtualMachineProfile.Scale.Autoscale[0].MaxCount = to.Int32Ptr(int32(count)) + requestBody.Properties.VirtualMachinesProfile = &virtualMachineProfile + return requestBody, nil + } + } + return armcontainerservice.AgentPool{}, fmt.Errorf("failed to build request body for scale up, agentpool doesn't have valid virtualMachinesProfile") +} + +// scaleUpToCount sets node count for vms agent pool to target value through PUT AP call. +func (agentPool *VMsPool) scaleUpToCount(count int64) error { + agentPool.sizeMutex.Lock() + defer agentPool.sizeMutex.Unlock() + + updateCtx, updateCancel := getContextWithCancel() + defer updateCancel() + + requestBody, err := agentPool.buildRequestBodyForScaleUp(count) + if err != nil { + klog.Errorf("Failed to build request body for scale up, error: %s", err) + return err + } + + poller, err := agentPool.manager.azClient.agentPoolClient.BeginCreateOrUpdate( + updateCtx, + agentPool.clusterResourceGroup, + agentPool.clusterName, + agentPool.Name, + requestBody, nil) + + if err != nil { + klog.Errorf("Failed to update agentpool %s in cluster %s with error: %v", + agentPool.azureRef.Name, agentPool.clusterName, err) + return err + } + + updateCtx, cancel := getContextWithTimeout(vmsPutRequestContextTimeout) + defer cancel() + + if _, err = poller.PollUntilDone(updateCtx, nil); err == nil { + // success path + agentPool.curSize = count + agentPool.lastSizeRefresh = time.Now() + agentPool.manager.invalidateCache() + return nil + } + + klog.Errorf("agentPoolClient.BeginCreateOrUpdate for aks cluster %s agentpool %s failed with error %s", + agentPool.clusterName, agentPool.Name, err) + return nil } // DeleteNodes extracts the providerIDs from the node spec and -// delete or deallocate the nodes from the agent pool based on the scale down policy. +// delete or deallocate the nodes based on the scale down policy of agentpool. func (agentPool *VMsPool) DeleteNodes(nodes []*apiv1.Node) error { - // TODO(wenxuan): Implement this method - return cloudprovider.ErrNotImplemented + currentSize, err := agentPool.getVMsPoolSize() + if err != nil { + return err + } + + // if the target size is smaller than the min size, return an error + if int(currentSize) <= agentPool.MinSize() { + return fmt.Errorf("min size %d reached, nodes will not be deleted", agentPool.MinSize()) + } + + var providerIDs []string + for _, node := range nodes { + belongs, err := agentPool.Belongs(node) + if err != nil { + return err + } + if !belongs { + return fmt.Errorf("node %s does not belong to agent pool %s", node.Name, agentPool.Name) + } + + providerIDs = append(providerIDs, node.Spec.ProviderID) + } + + return agentPool.scaleDownNodes(providerIDs) +} + +// scaleDownNodes delete or deallocate the nodes from the agent pool based on the scale down policy. +func (agentPool *VMsPool) scaleDownNodes(providerIDs []string) error { + agentPool.sizeMutex.Lock() + defer agentPool.sizeMutex.Unlock() + + if len(providerIDs) == 0 { + return nil + } + + klog.V(3).Infof("Deleting nodes from agent pool %s: %v", agentPool.Name, providerIDs) + + deleteCtx, deleteCancel := getContextWithTimeout(vmsDeleteRequestContextTimeout) + defer deleteCancel() + + machineNames := make([]*string, len(providerIDs)) + for i, providerID := range providerIDs { + // extract the machine name from the providerID by splitting the providerID by '/' and get the last element + // The providerID look like this: + // "azure:///subscriptions/0000000-0000-0000-0000-00000000000/resourceGroups/mc_wxrg_play-vms_eastus/providers/Microsoft.Compute/virtualMachines/aks-nodes-32301838-vms0" + machineName, err := resourceName(providerID) + if err != nil { + return err + } + machineNames[i] = &machineName + } + + requestBody := armcontainerservice.AgentPoolDeleteMachinesParameter{ + MachineNames: machineNames, + } + + poller, err := agentPool.manager.azClient.agentPoolClient.BeginDeleteMachines( + deleteCtx, + agentPool.clusterResourceGroup, + agentPool.clusterName, + agentPool.Name, + requestBody, nil) + if err != nil { + klog.Errorf("Failed to update agentpool %s in cluster %s with error: %v", + agentPool.azureRef.Name, agentPool.clusterName, err) + return err + } + + defer agentPool.manager.invalidateCache() + + updateCtx, cancel := getContextWithTimeout(vmsPutRequestContextTimeout) + defer cancel() + if _, err = poller.PollUntilDone(updateCtx, nil); err == nil { + return nil + } + + klog.Errorf("agentPoolClient.BeginDeleteMachines for aks cluster %s agentpool %s failed with error %s", + agentPool.clusterName, agentPool.Name, err) + + return nil +} + +// Belongs returns true if the given k8s node belongs to this vms nodepool. +func (agentPool *VMsPool) Belongs(node *apiv1.Node) (bool, error) { + klog.V(6).Infof("Check if node belongs to this vms pool:%s, node:%v\n", agentPool, node) + + ref := &azureRef{ + Name: node.Spec.ProviderID, + } + + nodeGroup, err := agentPool.manager.GetNodeGroupForInstance(ref) + if err != nil { + return false, err + } + if nodeGroup == nil { + return false, fmt.Errorf("%s doesn't belong to a known node group", node.Name) + } + if !strings.EqualFold(nodeGroup.Id(), agentPool.Id()) { + return false, nil + } + return true, nil } // DecreaseTargetSize decreases the target size of the node group. func (agentPool *VMsPool) DecreaseTargetSize(delta int) error { - // TODO(wenxuan): Implement this method - return cloudprovider.ErrNotImplemented + agentPool.manager.invalidateCache() + _, err := agentPool.getVMsPoolSize() + if err != nil { + klog.Warningf("DecreaseTargetSize: failed with error: %v", err) + } + return err } // Id returns the name of the agentPool @@ -136,14 +401,89 @@ func (agentPool *VMsPool) Debug() string { return fmt.Sprintf("%s (%d:%d)", agentPool.Id(), agentPool.MinSize(), agentPool.MaxSize()) } +func isSpotVMsPool(ap armcontainerservice.AgentPool) bool { + if ap.Properties != nil && ap.Properties.ScaleSetPriority != nil { + return strings.EqualFold(string(*ap.Properties.ScaleSetPriority), "Spot") + } + return false +} + +func getNodeCountFromAgentPool(ap armcontainerservice.AgentPool) int32 { + size := int32(0) + if ap.Properties != nil { + // the VirtualMachineNodesStatus returned by AKS-RP is constructed from the vm list + // returned from CRP. It excludes the nodes in deallocated/deallocating states, thus + // can be used as the source of truth for VMs agent pools + for _, status := range ap.Properties.VirtualMachineNodesStatus { + if status.Count != nil { + size += *status.Count + } + } + } + return size +} + +// getCurSize returns the current vm count in the vms agent pool +// It uses azure cache as the source of truth for non-spot pools, and resorts to AKS-RP for spot pools +func (agentPool *VMsPool) getCurSize() (int64, error) { + agentPool.sizeMutex.Lock() + defer agentPool.sizeMutex.Unlock() + + vmsPool, err := agentPool.getAgentpoolFromCache() + if err != nil { + klog.Errorf("Failed to get vms pool %s from cache with error: %v", agentPool.Name, err) + return -1, err + } + + // spot pool has a shorter cache TTL + cacheTTL := agentPool.sizeRefreshPeriod + if isSpotVMsPool(vmsPool) { + cacheTTL = spotPoolCacheTTL + } + + if agentPool.lastSizeRefresh.Add(cacheTTL).After(time.Now()) { + klog.V(3).Infof("VMs Agentpool: %s, returning in-memory size: %d", agentPool.Name, agentPool.curSize) + return agentPool.curSize, nil + } + + if isSpotVMsPool(vmsPool) { + vmsPool, err = agentPool.getAgentpoolFromAzure() + if err != nil { + klog.Errorf("Failed to get vms pool %s from azure with error: %v", agentPool.Name, err) + return -1, err + } + } + + realSize := int64(getNodeCountFromAgentPool(vmsPool)) + + if agentPool.curSize != realSize { + // invalidate the instance cache if the size has changed. + klog.V(5).Infof("VMs Agentpool %s getCurSize: curSize(%d) != real size (%d), invalidating azure cache", agentPool.Name, agentPool.curSize, realSize) + agentPool.manager.invalidateCache() + } + + agentPool.curSize = realSize + agentPool.lastSizeRefresh = time.Now() + return agentPool.curSize, nil +} + func (agentPool *VMsPool) getVMsFromCache() ([]compute.VirtualMachine, error) { - // vmsPoolMap is a map of agent pool name to the list of virtual machines - vmsPoolMap := agentPool.manager.azureCache.getVirtualMachines() - if _, ok := vmsPoolMap[agentPool.Name]; !ok { - return []compute.VirtualMachine{}, fmt.Errorf("vms pool %s not found in the cache", agentPool.Name) + curSize, err := agentPool.getCurSize() + if err != nil { + klog.Errorf("Failed to get current size for VMs pool %q: %v", agentPool.Name, err) + return nil, err } - return vmsPoolMap[agentPool.Name], nil + // vmsMap is a map of agent pool name to the list of virtual machines belongs to the agent pool + // this method may return an empty list if the agentpool has no nodes inside, i.e. minCount is set to 0 + vmsMap := agentPool.manager.azureCache.getVirtualMachines() + if int64(len(vmsMap[agentPool.Name])) != curSize { + klog.V(5).Infof("VMs Agentpool %s vm list size (%d) != curSize (%d), invalidating azure cache", + agentPool.Name, len(vmsMap[agentPool.Name]), curSize) + agentPool.manager.invalidateCache() + } + + return vmsMap[agentPool.Name], nil } // Nodes returns the list of nodes in the vms agentPool. @@ -170,7 +510,29 @@ func (agentPool *VMsPool) Nodes() ([]cloudprovider.Instance, error) { // TemplateNodeInfo is not implemented. func (agentPool *VMsPool) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { - return nil, cloudprovider.ErrNotImplemented + vmsPool, err := agentPool.getAgentpoolFromCache() + if err != nil { + return nil, err + } + + template := buildNodeTemplateFromVMsPool(vmsPool, agentPool.location) + node, err := buildNodeFromTemplate(agentPool.Name, template, agentPool.manager, agentPool.enableDynamicInstanceList) + + if err != nil { + return nil, err + } + + nodeInfo := schedulerframework.NewNodeInfo(cloudprovider.BuildKubeProxy(agentPool.Name)) + nodeInfo.SetNode(node) + return nodeInfo, nil +} + +func (agentPool *VMsPool) getAgentpoolFromCache() (armcontainerservice.AgentPool, error) { + vmsPoolMap := agentPool.manager.azureCache.getVMsPoolMap() + if _, exists := vmsPoolMap[agentPool.Name]; !exists { + return armcontainerservice.AgentPool{}, fmt.Errorf("VMs agent pool %s not found in cache", agentPool.Name) + } + return vmsPoolMap[agentPool.Name], nil } // AtomicIncreaseSize is not implemented. diff --git a/cluster-autoscaler/cloudprovider/azure/azure_vms_pool_test.go b/cluster-autoscaler/cloudprovider/azure/azure_vms_pool_test.go index a3b0ebe45e4a..95b1524ad96c 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_vms_pool_test.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_vms_pool_test.go @@ -17,11 +17,25 @@ limitations under the License. package azure import ( + "context" "fmt" + "net/http" + "testing" + "time" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute" "github.com/Azure/go-autorest/autorest/to" + "go.uber.org/mock/gomock" + + "github.com/stretchr/testify/assert" apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" + "sigs.k8s.io/cloud-provider-azure/pkg/azureclients/vmclient/mockvmclient" ) func newTestVMsPool(manager *AzureManager, name string) *VMsPool { @@ -29,9 +43,14 @@ func newTestVMsPool(manager *AzureManager, name string) *VMsPool { azureRef: azureRef{ Name: name, }, - manager: manager, - minSize: 3, - maxSize: 10, + manager: manager, + minSize: 3, + maxSize: 10, + clusterName: manager.config.ClusterName, + resourceGroup: manager.config.ResourceGroup, + clusterResourceGroup: manager.config.ClusterResourceGroup, + enableDynamicInstanceList: true, + sizeRefreshPeriod: 30 * time.Second, } } @@ -65,3 +84,378 @@ func newVMsNode(vmID int64) *apiv1.Node { } return node } + +func getTestVMsAgentPool(agentpoolName string, isSystemPool bool) armcontainerservice.AgentPool { + mode := armcontainerservice.AgentPoolModeUser + if isSystemPool { + mode = armcontainerservice.AgentPoolModeSystem + } + vmsPoolType := armcontainerservice.AgentPoolTypeVirtualMachines + return armcontainerservice.AgentPool{ + Name: &agentpoolName, + Properties: &armcontainerservice.ManagedClusterAgentPoolProfileProperties{ + Type: &vmsPoolType, + Mode: &mode, + VirtualMachinesProfile: &armcontainerservice.VirtualMachinesProfile{ + Scale: &armcontainerservice.ScaleProfile{ + Manual: []*armcontainerservice.ManualScaleProfile{ + { + Count: to.Int32Ptr(3), + Sizes: []*string{to.StringPtr("Standard_D2_v2"), to.StringPtr("Standard_D4_v2")}, + }, + }, + }, + }, + VirtualMachineNodesStatus: []*armcontainerservice.VirtualMachineNodes{ + { + Count: to.Int32Ptr(3), + Size: to.StringPtr("Standard_D2_v2"), + }, + }, + }, + } +} + +func TestNewVMsPool(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockAgentpoolclient := NewMockAgentPoolsClient(ctrl) + manager := newTestAzureManager(t) + manager.azClient.agentPoolClient = mockAgentpoolclient + manager.config.ResourceGroup = "MC_rg" + manager.config.ClusterResourceGroup = "rg" + manager.config.ClusterName = "mycluster" + + agentpoolName := "pool1" + spec := &dynamic.NodeGroupSpec{ + Name: agentpoolName, + MinSize: 1, + MaxSize: 10, + } + + ap, err := NewVMsPool(spec, manager) + assert.NoError(t, err) + assert.Equal(t, agentpoolName, ap.azureRef.Name) + assert.Equal(t, "MC_rg", ap.resourceGroup) + assert.Equal(t, "rg", ap.clusterResourceGroup) + assert.Equal(t, "mycluster", ap.clusterName) + assert.Equal(t, 1, ap.minSize) + assert.Equal(t, 10, ap.maxSize) +} + +func TestGetVMsFromCacheForVMsPool(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + agentpoolName := "pool1" + + ap := newTestVMsPool(newTestAzureManager(t), agentpoolName) + expectedVMs := []compute.VirtualMachine{ + { + Name: to.StringPtr("aks-pool1-13222729-vms0"), + Tags: map[string]*string{"aks-managed-poolName": &agentpoolName}, + }, + { + Name: to.StringPtr("aks-pool1-13222729-vms1"), + Tags: map[string]*string{"aks-managed-poolName": &agentpoolName}, + }, + } + + mockVMClient := mockvmclient.NewMockInterface(ctrl) + ap.manager.azClient.virtualMachinesClient = mockVMClient + ap.manager.config.EnableVMsAgentPool = true + mockAgentpoolclient := NewMockAgentPoolsClient(ctrl) + ap.manager.azClient.agentPoolClient = mockAgentpoolclient + mockVMClient.EXPECT().List(gomock.Any(), ap.resourceGroup).Return(expectedVMs, nil) + + agentpool := getTestVMsAgentPool(agentpoolName, false) + fakeAPListPager := getFakeAgentpoolListPager(&agentpool) + mockAgentpoolclient.EXPECT().NewListPager(gomock.Any(), gomock.Any(), nil). + Return(fakeAPListPager) + + ac, err := newAzureCache(ap.manager.azClient, refreshInterval, *ap.manager.config) + assert.NoError(t, err) + ac.enableVMsAgentPool = true + ap.manager.azureCache = ac + + vms, err := ap.getVMsFromCache() + assert.Equal(t, 2, len(vms)) + assert.NoError(t, err) +} + +func TestNodes(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + agentpoolName := "pool1" + vmssPoolName := "test-vmss-pool" + + ap := newTestVMsPool(newTestAzureManager(t), agentpoolName) + expectedVMs := []compute.VirtualMachine{ + { + ID: to.StringPtr(fmt.Sprintf(fakeVMsPoolVMID, 0)), + Name: to.StringPtr("aks-pool1-13222729-vms0"), + Tags: map[string]*string{"aks-managed-poolName": &agentpoolName}, + }, + { + ID: to.StringPtr(fmt.Sprintf(fakeVMsPoolVMID, 1)), + Name: to.StringPtr("aks-pool1-13222729-vms1"), + Tags: map[string]*string{"aks-managed-poolName": &agentpoolName}, + }, + { + ID: to.StringPtr(fmt.Sprint("vmss-", 4)), + Name: to.StringPtr("aks-vmssnp-38484957-vmss000004"), + Tags: map[string]*string{"aks-managed-poolName": &vmssPoolName}, + }, + } + + mockVMClient := mockvmclient.NewMockInterface(ctrl) + ap.manager.azClient.virtualMachinesClient = mockVMClient + mockVMClient.EXPECT().List(gomock.Any(), ap.resourceGroup).Return(expectedVMs, nil) + + ap.manager.config.EnableVMsAgentPool = true + mockAgentpoolclient := NewMockAgentPoolsClient(ctrl) + ap.manager.azClient.agentPoolClient = mockAgentpoolclient + agentpool := getTestVMsAgentPool(agentpoolName, false) + fakeAPListPager := getFakeAgentpoolListPager(&agentpool) + mockAgentpoolclient.EXPECT().NewListPager(gomock.Any(), gomock.Any(), nil). + Return(fakeAPListPager) + + ac, err := newAzureCache(ap.manager.azClient, refreshInterval, *ap.manager.config) + assert.NoError(t, err) + ap.manager.azureCache = ac + + vms, err := ap.Nodes() + assert.Equal(t, 2, len(vms)) + assert.NoError(t, err) +} + +func TestGetCurSizeForVMsPool(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + agentpoolName := "pool1" + + ap := newTestVMsPool(newTestAzureManager(t), agentpoolName) + expectedVMs := []compute.VirtualMachine{ + { + Name: to.StringPtr("aks-pool1-13222729-vms0"), + Tags: map[string]*string{"aks-managed-poolName": &agentpoolName}, + }, + { + Name: to.StringPtr("aks-pool1-13222729-vms1"), + Tags: map[string]*string{"aks-managed-poolName": &agentpoolName}, + }, + { + Name: to.StringPtr("aks-pool1-13222729-vms2"), + Tags: map[string]*string{"aks-managed-poolName": &agentpoolName}, + }, + } + + mockVMClient := mockvmclient.NewMockInterface(ctrl) + ap.manager.azClient.virtualMachinesClient = mockVMClient + mockVMClient.EXPECT().List(gomock.Any(), ap.resourceGroup).Return(expectedVMs, nil) + + ap.manager.config.EnableVMsAgentPool = true + mockAgentpoolclient := NewMockAgentPoolsClient(ctrl) + ap.manager.azClient.agentPoolClient = mockAgentpoolclient + agentpool := getTestVMsAgentPool(agentpoolName, false) + fakeAPListPager := getFakeAgentpoolListPager(&agentpool) + mockAgentpoolclient.EXPECT().NewListPager(gomock.Any(), gomock.Any(), nil). + Return(fakeAPListPager) + + ac, err := newAzureCache(ap.manager.azClient, refreshInterval, *ap.manager.config) + assert.NoError(t, err) + ap.manager.azureCache = ac + ap.curSize = -1 // not initialized + + ap.lastSizeRefresh = time.Now() + curSize, err := ap.getCurSize() + assert.NoError(t, err) + assert.Equal(t, int64(-1), curSize) + + ap.lastSizeRefresh = time.Now().Add(-1 * 30 * time.Second) + curSize, err = ap.getCurSize() + assert.NoError(t, err) + assert.Equal(t, int64(3), curSize) +} + +func TestGetVMsPoolSize(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + agentpoolName := "pool1" + + ap := newTestVMsPool(newTestAzureManager(t), agentpoolName) + ap.curSize = -1 // not initialized + ap.lastSizeRefresh = time.Now().Add(-1 * time.Second) + + curSize, err := ap.getVMsPoolSize() + assert.Equal(t, int64(-1), curSize) + assert.Error(t, err) + assert.Contains(t, err.Error(), "VMs agent pool pool1 not found in cache") +} + +func TestVMsPoolIncreaseSize(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + manager := newTestAzureManager(t) + agentpoolName := "pool1" + + ap := newTestVMsPool(manager, agentpoolName) + ap.curSize = 3 + ap.lastSizeRefresh = time.Now().Add(-1 * time.Second) + expectedVMs := []compute.VirtualMachine{ + { + Name: to.StringPtr("aks-pool1-13222729-vms0"), + Tags: map[string]*string{"aks-managed-poolName": &agentpoolName}, + }, + { + Name: to.StringPtr("aks-pool1-13222729-vms1"), + Tags: map[string]*string{"aks-managed-poolName": &agentpoolName}, + }, + { + Name: to.StringPtr("aks-pool1-13222729-vms2"), + Tags: map[string]*string{"aks-managed-poolName": &agentpoolName}, + }, + } + + mockVMClient := mockvmclient.NewMockInterface(ctrl) + ap.manager.azClient.virtualMachinesClient = mockVMClient + mockVMClient.EXPECT().List(gomock.Any(), ap.resourceGroup).Return(expectedVMs, nil) + + ap.manager.config.EnableVMsAgentPool = true + mockAgentpoolclient := NewMockAgentPoolsClient(ctrl) + ap.manager.azClient.agentPoolClient = mockAgentpoolclient + agentpool := getTestVMsAgentPool(agentpoolName, false) + fakeAPListPager := getFakeAgentpoolListPager(&agentpool) + mockAgentpoolclient.EXPECT().NewListPager(gomock.Any(), gomock.Any(), nil). + Return(fakeAPListPager) + + ac, err := newAzureCache(ap.manager.azClient, refreshInterval, *ap.manager.config) + assert.NoError(t, err) + ap.manager.azureCache = ac + + // failure case 1 + err = ap.IncreaseSize(-1) + expectedErr := fmt.Errorf("size increase must be positive, current delta: -1") + assert.Equal(t, expectedErr, err) + + // failure case 2 + err = ap.IncreaseSize(8) + expectedErr = fmt.Errorf("size-increasing request of 11 is bigger than max size 10") + assert.Equal(t, expectedErr, err) + + // success case 3 + resp := &http.Response{ + Header: map[string][]string{ + "Fake-Poller-Status": {"Done"}, + }, + } + + fakePoller, err := runtime.NewPoller(resp, runtime.Pipeline{}, + &runtime.NewPollerOptions[armcontainerservice.AgentPoolsClientCreateOrUpdateResponse]{ + Handler: &fakehandler[armcontainerservice.AgentPoolsClientCreateOrUpdateResponse]{}, + }) + + assert.NoError(t, err) + + mockAgentpoolclient.EXPECT().BeginCreateOrUpdate( + gomock.Any(), manager.config.ClusterResourceGroup, + manager.config.ClusterName, + agentpoolName, + gomock.Any(), gomock.Any()).Return(fakePoller, nil) + + err = ap.IncreaseSize(1) + assert.NoError(t, err) +} + +func TestDeleteVMsPoolNodes(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockAgentpoolclient := NewMockAgentPoolsClient(ctrl) + manager := newTestAzureManager(t) + manager.azClient.agentPoolClient = mockAgentpoolclient + agentpoolName := "pool1" + nodeName := "aks-pool1-13222729-vms0" + providerID := "azure:///subscriptions/feb5b150-60fe-4441-be73-8c02a524f55a/resourceGroups/rg/providers/Microsoft.Compute/virtualMachines/aks-pool1-13222729-vms0" + + node := &apiv1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + Spec: apiv1.NodeSpec{ + ProviderID: providerID, + }, + } + + ap := newTestVMsPool(manager, agentpoolName) + + manager.azureCache = &azureCache{ + vmsPoolMap: map[string]armcontainerservice.AgentPool{ + agentpoolName: getTestVMsAgentPool(agentpoolName, false), + }, + instanceToNodeGroup: map[azureRef]cloudprovider.NodeGroup{ + {Name: providerID}: ap, + }, + } + + // failure case + ap.curSize = 2 + ap.lastSizeRefresh = time.Now().Add(-1 * time.Second) + err := ap.DeleteNodes([]*apiv1.Node{node}) + assert.Error(t, err) + assert.Contains(t, err.Error(), "min size 3 reached, nodes will not be deleted") + + // success case + ap.curSize = 4 + ap.lastSizeRefresh = time.Now().Add(-1 * time.Second) + + resp := &http.Response{ + Header: map[string][]string{ + "Fake-Poller-Status": {"Done"}, + }, + } + fakePoller, err := runtime.NewPoller(resp, runtime.Pipeline{}, + &runtime.NewPollerOptions[armcontainerservice.AgentPoolsClientDeleteMachinesResponse]{ + Handler: &fakehandler[armcontainerservice.AgentPoolsClientDeleteMachinesResponse]{}, + }) + assert.NoError(t, err) + + mockAgentpoolclient.EXPECT().BeginDeleteMachines( + gomock.Any(), manager.config.ClusterResourceGroup, + manager.config.ClusterName, + agentpoolName, + gomock.Any(), gomock.Any()).Return(fakePoller, nil) + + derr := ap.DeleteNodes([]*apiv1.Node{node}) + assert.NoError(t, derr) +} + +type fakehandler[T any] struct{} + +func (f *fakehandler[T]) Done() bool { + return true +} + +func (f *fakehandler[T]) Poll(ctx context.Context) (*http.Response, error) { + return nil, nil +} + +func (f *fakehandler[T]) Result(ctx context.Context, out *T) error { + return nil +} + +func getFakeAgentpoolListPager(agentpool ...*armcontainerservice.AgentPool) *runtime.Pager[armcontainerservice.AgentPoolsClientListResponse] { + fakeFetcher := func(ctx context.Context, response *armcontainerservice.AgentPoolsClientListResponse) (armcontainerservice.AgentPoolsClientListResponse, error) { + return armcontainerservice.AgentPoolsClientListResponse{ + AgentPoolListResult: armcontainerservice.AgentPoolListResult{ + Value: agentpool, + }, + }, nil + } + + return runtime.NewPager(runtime.PagingHandler[armcontainerservice.AgentPoolsClientListResponse]{ + More: func(response armcontainerservice.AgentPoolsClientListResponse) bool { + return false + }, + Fetcher: fakeFetcher, + }) + +} diff --git a/cluster-autoscaler/go.mod b/cluster-autoscaler/go.mod index 7341cbd08217..c3d21f690fbd 100644 --- a/cluster-autoscaler/go.mod +++ b/cluster-autoscaler/go.mod @@ -8,9 +8,9 @@ require ( cloud.google.com/go/compute/metadata v0.3.0 github.com/Azure/azure-sdk-for-go v68.0.0+incompatible github.com/Azure/azure-sdk-for-go-extensions v0.1.6 - github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1 - github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.5.2 - github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4 v4.9.0-beta.1 + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.13.0 + github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 + github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5 v5.1.0-beta.2 github.com/Azure/go-autorest/autorest v0.11.29 github.com/Azure/go-autorest/autorest/adal v0.9.23 github.com/Azure/go-autorest/autorest/azure/auth v0.5.8 @@ -34,9 +34,9 @@ require ( github.com/stretchr/testify v1.9.0 github.com/vburenin/ifacemaker v1.2.1 go.uber.org/mock v0.4.0 - golang.org/x/net v0.26.0 + golang.org/x/net v0.27.0 golang.org/x/oauth2 v0.21.0 - golang.org/x/sys v0.21.0 + golang.org/x/sys v0.22.0 google.golang.org/api v0.151.0 google.golang.org/grpc v1.65.0 google.golang.org/protobuf v1.34.2 @@ -61,11 +61,12 @@ require ( ) require ( - github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.2 // indirect + github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/keyvault/azsecrets v0.12.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/keyvault/internal v0.7.1 // indirect github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.6.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerregistry/armcontainerregistry v1.2.0 // indirect + github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4 v4.8.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/keyvault/armkeyvault v1.4.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4 v4.3.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/privatedns/armprivatedns v1.2.0 // indirect @@ -187,11 +188,11 @@ require ( go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.26.0 // indirect - golang.org/x/crypto v0.24.0 // indirect + golang.org/x/crypto v0.25.0 // indirect golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect golang.org/x/mod v0.17.0 // indirect golang.org/x/sync v0.7.0 // indirect - golang.org/x/term v0.21.0 // indirect + golang.org/x/term v0.22.0 // indirect golang.org/x/text v0.16.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect diff --git a/cluster-autoscaler/go.sum b/cluster-autoscaler/go.sum index d00b52672e1d..c47ead8591c3 100644 --- a/cluster-autoscaler/go.sum +++ b/cluster-autoscaler/go.sum @@ -8,12 +8,12 @@ github.com/Azure/azure-sdk-for-go v68.0.0+incompatible h1:fcYLmCpyNYRnvJbPerq7U0 github.com/Azure/azure-sdk-for-go v68.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= github.com/Azure/azure-sdk-for-go-extensions v0.1.6 h1:EXGvDcj54u98XfaI/Cy65Ds6vNsIJeGKYf0eNLB1y4Q= github.com/Azure/azure-sdk-for-go-extensions v0.1.6/go.mod h1:27StPiXJp6Xzkq2AQL7gPK7VC0hgmCnUKlco1dO1jaM= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1 h1:E+OJmp2tPvt1W+amx48v1eqbjDYsgN+RzP4q16yV5eM= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1/go.mod h1:a6xsAQUZg+VsS3TJ05SRp524Hs4pZ/AeFSr5ENf0Yjo= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.5.2 h1:FDif4R1+UUR+00q6wquyX90K7A8dN+R5E8GEadoP7sU= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.5.2/go.mod h1:aiYBYui4BJ/BJCAIKs92XiPyQfTaBWqvHujDwKb6CBU= -github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.2 h1:LqbJ/WzJUwBf8UiaSzgX7aMclParm9/5Vgp+TY51uBQ= -github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.2/go.mod h1:yInRyqWXAuaPrgI7p70+lDDgh3mlBohis29jGMISnmc= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.13.0 h1:GJHeeA2N7xrG3q30L2UXDyuWRzDM900/65j70wcM4Ww= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.13.0/go.mod h1:l38EPgmsp71HHLq9j7De57JcKOWPyhrsW1Awm1JS6K0= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 h1:tfLQ34V6F7tVSwoTf/4lH5sE0o6eCJuNDTmH09nDpbc= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0/go.mod h1:9kIvujWAA58nmPmWB1m23fyWic1kYZMxD9CxaWn4Qpg= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 h1:ywEEhmNahHBihViHepv3xPBn1663uRv2t2q/ESv9seY= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0/go.mod h1:iZDifYGJTIgIIkYRNWPENUnqx6bJ2xnSDFI2tjwZNuY= github.com/Azure/azure-sdk-for-go/sdk/keyvault/azsecrets v0.12.0 h1:xnO4sFyG8UH2fElBkcqLTOZsAajvKfnSlgBBW8dXYjw= github.com/Azure/azure-sdk-for-go/sdk/keyvault/azsecrets v0.12.0/go.mod h1:XD3DIOOVgBCO03OleB1fHjgktVRFxlT++KwKgIOewdM= github.com/Azure/azure-sdk-for-go/sdk/keyvault/internal v0.7.1 h1:FbH3BbSb4bvGluTesZZ+ttN/MDsnMmQP36OSnDuSXqw= @@ -24,10 +24,14 @@ github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerregistry/armconta github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerregistry/armcontainerregistry v1.2.0/go.mod h1:E7ltexgRDmeJ0fJWv0D/HLwY2xbDdN+uv+X2uZtOx3w= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v2 v2.4.0 h1:1u/K2BFv0MwkG6he8RYuUcbbeK22rkoZbg4lKa/msZU= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v2 v2.4.0/go.mod h1:U5gpsREQZE6SLk1t/cFfc1eMhYAlYpEzvaYXuDfefy8= -github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4 v4.9.0-beta.1 h1:iqhrjj9w9/AQZsHjaOVyloamkeAFRbWI0iHNy6INMYk= -github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4 v4.9.0-beta.1/go.mod h1:gYq8wyDgv6JLhGbAU6gg8amCPgQWRE+aCvrV2gyzdfs= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4 v4.8.0 h1:0nGmzwBv5ougvzfGPCO2ljFRHvun57KpNrVCMrlk0ns= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4 v4.8.0/go.mod h1:gYq8wyDgv6JLhGbAU6gg8amCPgQWRE+aCvrV2gyzdfs= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5 v5.1.0-beta.2 h1:re+BEe/OafvSyRy2vM+Fyu+EcUK34O2o/Fa6WO3ITZM= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5 v5.1.0-beta.2/go.mod h1:5zx285T5OLk+iQbfOuexhhO7J6dfzkqVkFgS/+s7XaA= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v2 v2.0.0 h1:PTFGRSlMKCQelWwxUyYVEUqseBJVemLyqWJjvMyt0do= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v2 v2.0.0/go.mod h1:LRr2FzBTQlONPPa5HREE5+RjSCTXl7BwOvYOaWTqCaI= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v3 v3.1.0 h1:2qsIIvxVT+uE6yrNldntJKlLRgxGbZ85kgtz5SNBhMw= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v3 v3.1.0/go.mod h1:AW8VEadnhw9xox+VaVd9sP7NjzOAnaZBLRH6Tq3cJ38= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/keyvault/armkeyvault v1.4.0 h1:HlZMUZW8S4P9oob1nCHxCCKrytxyLc+24nUJGssoEto= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/keyvault/armkeyvault v1.4.0/go.mod h1:StGsLbuJh06Bd8IBfnAlIFV3fLb+gkczONWf15hpX2E= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/managementgroups/armmanagementgroups v1.0.0 h1:pPvTJ1dY0sA35JOeFq6TsY2xj6Z85Yo23Pj4wCCvu4o= @@ -565,8 +569,8 @@ golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= -golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI= -golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM= +golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30= +golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= @@ -598,8 +602,8 @@ golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= -golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= +golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= @@ -639,14 +643,14 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= -golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= +golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= -golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= +golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= +golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=