Skip to content

Commit

Permalink
Merge branch 'kubernetes:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
Dharma-09 authored Jul 9, 2024
2 parents a48f732 + 7dfb90a commit 084242e
Show file tree
Hide file tree
Showing 51 changed files with 3,278 additions and 300 deletions.
2 changes: 1 addition & 1 deletion cluster-autoscaler/FAQ.md
Original file line number Diff line number Diff line change
Expand Up @@ -836,7 +836,7 @@ The following startup parameters are supported for cluster autoscaler:
| `ok-total-unready-count` | Number of allowed unready nodes, irrespective of max-total-unready-percentage | 3
| `max-node-provision-time` | Maximum time CA waits for node to be provisioned | 15 minutes
| `nodes` | sets min,max size and other configuration data for a node group in a format accepted by cloud provider. Can be used multiple times. Format: \<min>:\<max>:<other...> | ""
| `node-group-auto-discovery` | One or more definition(s) of node group auto-discovery.<br>A definition is expressed `<name of discoverer>:[<key>[=<value>]]`<br>The `aws`, `gce`, and `azure` cloud providers are currently supported. AWS matches by ASG tags, e.g. `asg:tag=tagKey,anotherTagKey`<br>GCE matches by IG name prefix, and requires you to specify min and max nodes per IG, e.g. `mig:namePrefix=pfx,min=0,max=10`<br> Azure matches by tags on VMSS, e.g. `label:foo=bar`, and will auto-detect `min` and `max` tags on the VMSS to set scaling limits.<br>Can be used multiple times | ""
| `node-group-auto-discovery` | One or more definition(s) of node group auto-discovery.<br>A definition is expressed `<name of discoverer>:[<key>[=<value>]]`<br>The `aws`, `gce`, and `azure` cloud providers are currently supported. AWS matches by ASG tags, e.g. `asg:tag=tagKey,anotherTagKey`<br>GCE matches by IG name prefix, and requires you to specify min and max nodes per IG, e.g. `mig:namePrefix=pfx,min=0,max=10`<br> Azure matches by VMSS tags, similar to AWS. And you can optionally specify a default min and max size for VMSSs, e.g. `label:tag=tagKey,anotherTagKey=bar,min=0,max=600`.<br>Can be used multiple times | ""
| `emit-per-nodegroup-metrics` | If true, emit per node group metrics. | false
| `estimator` | Type of resource estimator to be used in scale up | binpacking
| `expander` | Type of node group expander to be used in scale up. | random
Expand Down
64 changes: 55 additions & 9 deletions cluster-autoscaler/cloudprovider/azure/azure_autodiscovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,31 @@ package azure

import (
"fmt"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"strconv"
"strings"

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
)

const (
autoDiscovererTypeLabel = "label"
autoDiscovererTypeLabel = "label"
vmssAutoDiscovererKeyMinNodes = "min"
vmssAutoDiscovererKeyMaxNodes = "max"
)

// A labelAutoDiscoveryConfig specifies how to auto-discover Azure node groups.
type labelAutoDiscoveryConfig struct {
// Key-values to match on.
Selector map[string]string
// MinSize specifies the minimum size for all VMSSs that match Selector.
MinSize *int
// MazSize specifies the maximum size for all VMSSs that match Selector.
MaxSize *int
}

type autoDiscoveryConfigSizes struct {
Min int
Max int
}

// ParseLabelAutoDiscoverySpecs returns any provided NodeGroupAutoDiscoverySpecs
Expand Down Expand Up @@ -70,34 +83,67 @@ func parseLabelAutoDiscoverySpec(spec string) (labelAutoDiscoveryConfig, error)
if k == "" || v == "" {
return cfg, fmt.Errorf("empty value not allowed in key=value tag pairs")
}
cfg.Selector[k] = v

switch k {
case vmssAutoDiscovererKeyMinNodes:
minSize, err := strconv.Atoi(v)
if err != nil || minSize < 0 {
return cfg, fmt.Errorf("invalid minimum nodes: %s", v)
}
cfg.MinSize = &minSize
case vmssAutoDiscovererKeyMaxNodes:
maxSize, err := strconv.Atoi(v)
if err != nil || maxSize < 0 {
return cfg, fmt.Errorf("invalid maximum nodes: %s", v)
}
cfg.MaxSize = &maxSize
default:
cfg.Selector[k] = v
}
}
if cfg.MaxSize != nil && cfg.MinSize != nil && *cfg.MaxSize < *cfg.MinSize {
return cfg, fmt.Errorf("maximum size %d must be greater than or equal to minimum size %d", *cfg.MaxSize, *cfg.MinSize)
}
return cfg, nil
}

func matchDiscoveryConfig(labels map[string]*string, configs []labelAutoDiscoveryConfig) bool {
// returns an autoDiscoveryConfigSizes struct if the VMSS's tags match the autodiscovery configs
// if the VMSS's tags do not match then return nil
// if there are multiple min/max sizes defined, return the highest min value and the lowest max value
func matchDiscoveryConfig(labels map[string]*string, configs []labelAutoDiscoveryConfig) *autoDiscoveryConfigSizes {
if len(configs) == 0 {
return false
return nil
}
minSize := -1
maxSize := -1

for _, c := range configs {
if len(c.Selector) == 0 {
return false
return nil
}

for k, v := range c.Selector {
value, ok := labels[k]
if !ok {
return false
return nil
}

if len(v) > 0 {
if value == nil || *value != v {
return false
return nil
}
}
}
if c.MinSize != nil && minSize < *c.MinSize {
minSize = *c.MinSize
}
if c.MaxSize != nil && (maxSize == -1 || maxSize > *c.MaxSize) {
maxSize = *c.MaxSize
}
}

return true
return &autoDiscoveryConfigSizes{
Min: minSize,
Max: maxSize,
}
}
71 changes: 69 additions & 2 deletions cluster-autoscaler/cloudprovider/azure/azure_autodiscovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@ limitations under the License.
package azure

import (
"testing"

"github.com/stretchr/testify/assert"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"testing"
)

func TestParseLabelAutoDiscoverySpecs(t *testing.T) {
minVal := 1
maxVal := 2
testCases := []struct {
name string
specs []string
Expand All @@ -46,7 +49,7 @@ func TestParseLabelAutoDiscoverySpecs(t *testing.T) {
expectedErr: true,
},
{
name: "InvalidAutoDiscoerLabel",
name: "InvalidAutoDiscoverLabel",
specs: []string{"invalid:test-tag=test-value,another-test-tag"},
expectedErr: true,
},
Expand All @@ -60,6 +63,70 @@ func TestParseLabelAutoDiscoverySpecs(t *testing.T) {
specs: []string{"label:=test-val"},
expectedErr: true,
},
{
name: "ValidSpecWithSizes",
specs: []string{
"label:cluster-autoscaler-enabled=true,cluster-autoscaler-name=fake-cluster,min=1,max=2",
"label:test-tag=test-value,another-test-tag=another-test-value,min=1,max=2",
},
expected: []labelAutoDiscoveryConfig{
{Selector: map[string]string{"cluster-autoscaler-enabled": "true", "cluster-autoscaler-name": "fake-cluster"}, MinSize: &minVal, MaxSize: &maxVal},
{Selector: map[string]string{"test-tag": "test-value", "another-test-tag": "another-test-value"}, MinSize: &minVal, MaxSize: &maxVal},
},
},
{
name: "ValidSpecWithSizesOnlyMax",
specs: []string{
"label:cluster-autoscaler-enabled=true,max=2",
},
expected: []labelAutoDiscoveryConfig{
{Selector: map[string]string{"cluster-autoscaler-enabled": "true"}, MaxSize: &maxVal},
},
},
{
name: "ValidSpecWithSizesOnlyMin",
specs: []string{
"label:cluster-autoscaler-enabled=true,min=1",
},
expected: []labelAutoDiscoveryConfig{
{Selector: map[string]string{"cluster-autoscaler-enabled": "true"}, MinSize: &minVal},
},
},
{
name: "NonIntegerMin",
specs: []string{
"label:cluster-autoscaler-enabled=true,min=random,max=2",
},
expectedErr: true,
},
{
name: "NegativeMin",
specs: []string{
"label:cluster-autoscaler-enabled=true,min=-5,max=2",
},
expectedErr: true,
},
{
name: "NonIntegerMax",
specs: []string{
"label:cluster-autoscaler-enabled=true,min=1,max=random",
},
expectedErr: true,
},
{
name: "NegativeMax",
specs: []string{
"label:cluster-autoscaler-enabled=true,min=1,max=-5",
},
expectedErr: true,
},
{
name: "LowerMaxThanMin",
specs: []string{
"label:cluster-autoscaler-enabled=true,min=5,max=1",
},
expectedErr: true,
},
}

for _, tc := range testCases {
Expand Down
9 changes: 7 additions & 2 deletions cluster-autoscaler/cloudprovider/azure/azure_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,12 +309,13 @@ func (m *AzureManager) getFilteredScaleSets(filter []labelAutoDiscoveryConfig) (

var nodeGroups []cloudprovider.NodeGroup
for _, scaleSet := range vmssList {
var cfgSizes *autoDiscoveryConfigSizes
if len(filter) > 0 {
if scaleSet.Tags == nil || len(scaleSet.Tags) == 0 {
continue
}

if !matchDiscoveryConfig(scaleSet.Tags, filter) {
if cfgSizes = matchDiscoveryConfig(scaleSet.Tags, filter); cfgSizes == nil {
continue
}
}
Expand All @@ -332,6 +333,8 @@ func (m *AzureManager) getFilteredScaleSets(filter []labelAutoDiscoveryConfig) (
klog.Warningf("ignoring vmss %q because of invalid minimum size specified for vmss: %s", *scaleSet.Name, err)
continue
}
} else if cfgSizes.Min >= 0 {
spec.MinSize = cfgSizes.Min
} else {
klog.Warningf("ignoring vmss %q because of no minimum size specified for vmss", *scaleSet.Name)
continue
Expand All @@ -347,12 +350,14 @@ func (m *AzureManager) getFilteredScaleSets(filter []labelAutoDiscoveryConfig) (
klog.Warningf("ignoring vmss %q because of invalid maximum size specified for vmss: %s", *scaleSet.Name, err)
continue
}
} else if cfgSizes.Max >= 0 {
spec.MaxSize = cfgSizes.Max
} else {
klog.Warningf("ignoring vmss %q because of no maximum size specified for vmss", *scaleSet.Name)
continue
}
if spec.MaxSize < spec.MinSize {
klog.Warningf("ignoring vmss %q because of maximum size must be greater than minimum size: max=%d < min=%d", *scaleSet.Name, spec.MaxSize, spec.MinSize)
klog.Warningf("ignoring vmss %q because of maximum size must be greater than or equal to minimum size: max=%d < min=%d", *scaleSet.Name, spec.MaxSize, spec.MinSize)
continue
}

Expand Down
47 changes: 47 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,53 @@ func TestGetFilteredAutoscalingGroupsVmss(t *testing.T) {
assert.True(t, assert.ObjectsAreEqualValues(expectedAsgs, asgs), "expected %#v, but found: %#v", expectedAsgs, asgs)
}

func TestGetFilteredAutoscalingGroupsVmssWithConfiguredSizes(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

vmssName := "test-vmss"
vmssTag := "fake-tag"
vmssTagValue := "fake-value"
vmssTag2 := "fake-tag2"
vmssTagValue2 := "fake-value2"
minVal := 2
maxVal := 4

ngdo := cloudprovider.NodeGroupDiscoveryOptions{
NodeGroupAutoDiscoverySpecs: []string{
fmt.Sprintf("label:%s=%s,min=2,max=5", vmssTag, vmssTagValue),
fmt.Sprintf("label:%s=%s,min=1,max=4", vmssTag2, vmssTagValue2),
},
}

manager := newTestAzureManager(t)
expectedScaleSets := []compute.VirtualMachineScaleSet{fakeVMSSWithTags(vmssName, map[string]*string{vmssTag: &vmssTagValue, vmssTag2: &vmssTagValue2})}
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedScaleSets, nil).AnyTimes()
manager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
err := manager.forceRefresh()
assert.NoError(t, err)

specs, err := ParseLabelAutoDiscoverySpecs(ngdo)
assert.NoError(t, err)

asgs, err := manager.getFilteredNodeGroups(specs)
assert.NoError(t, err)
expectedAsgs := []cloudprovider.NodeGroup{&ScaleSet{
azureRef: azureRef{
Name: vmssName,
},
minSize: minVal,
maxSize: maxVal,
manager: manager,
enableForceDelete: manager.config.EnableForceDelete,
curSize: 3,
sizeRefreshPeriod: manager.azureCache.refreshInterval,
instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod,
}}
assert.True(t, assert.ObjectsAreEqualValues(expectedAsgs, asgs), "expected %#v, but found: %#v", expectedAsgs, asgs)
}

func TestGetFilteredAutoscalingGroupsWithInvalidVMType(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
Expand Down
1 change: 1 addition & 0 deletions cluster-autoscaler/cloudprovider/azure/test/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
_artifacts
61 changes: 13 additions & 48 deletions cluster-autoscaler/cloudprovider/azure/test/Makefile
Original file line number Diff line number Diff line change
@@ -1,60 +1,25 @@
REPO_ROOT:=$(shell git rev-parse --show-toplevel)
CAS_ROOT:=$(REPO_ROOT)/cluster-autoscaler
CAS_CHART:=$(REPO_ROOT)/charts/cluster-autoscaler

include $(CAS_ROOT)/Makefile

TOOLS_BIN_DIR := $(abspath bin)

export PATH := $(TOOLS_BIN_DIR):$(PATH)

HELM_VER := v3.15.2
HELM_BIN := helm
HELM := $(TOOLS_BIN_DIR)/$(HELM_BIN)-$(HELM_VER)

helm: $(HELM)

$(HELM):
mkdir -p $(TOOLS_BIN_DIR)
rm -f "$(TOOLS_BIN_DIR)/$(HELM_BIN)*"
curl --retry 3 -fsSL -o $(TOOLS_BIN_DIR)/get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
chmod 700 $(TOOLS_BIN_DIR)/get_helm.sh
USE_SUDO=false HELM_INSTALL_DIR=$(TOOLS_BIN_DIR) DESIRED_VERSION=$(HELM_VER) BINARY_NAME=$(HELM_BIN)-$(HELM_VER) $(TOOLS_BIN_DIR)/get_helm.sh
ln -sf $(HELM) $(TOOLS_BIN_DIR)/$(HELM_BIN)
rm -f $(TOOLS_BIN_DIR)/get_helm.sh

CLUSTER_AUTOSCALER_NAMESPACE?=default
CLUSTER_AUTOSCALER_SERVICEACCOUNT_NAME?=cluster-autoscaler

.PHONY: install-e2e
install-e2e: $(HELM)
.PHONY: build-e2e
build-e2e:
$(MAKE) -C $(CAS_ROOT) build-arch-$(GOARCH) make-image-arch-$(GOARCH)
docker push $(IMAGE)-$(GOARCH):$(TAG)
$(HELM) upgrade --install cluster-autoscaler $(CAS_CHART) \
--namespace $(CLUSTER_AUTOSCALER_NAMESPACE) --create-namespace \
--set cloudProvider=azure \
--set azureTenantID=$(AZURE_TENANT_ID) \
--set azureSubscriptionID=$(AZURE_SUBSCRIPTION_ID) \
--set azureUseWorkloadIdentityExtension=true \
--set-string podLabels."azure\.workload\.identity/use"=true \
--set rbac.serviceAccount.name=$(CLUSTER_AUTOSCALER_SERVICEACCOUNT_NAME) \
--set rbac.serviceAccount.annotations."azure\.workload\.identity/tenant-id"=$(AZURE_TENANT_ID) \
--set rbac.serviceAccount.annotations."azure\.workload\.identity/client-id"="$$(KUBECONFIG= kubectl get userassignedidentities -o jsonpath='{.items[0].status.clientId}')" \
--set autoDiscovery.clusterName="$$(KUBECONFIG= kubectl get cluster -o jsonpath='{.items[0].metadata.name}')" \
--set azureResourceGroup="$$(KUBECONFIG= kubectl get managedclusters -o jsonpath='{.items[0].status.nodeResourceGroup}')" \
--set nodeSelector."kubernetes\.io/os"=linux \
--set image.repository=$(IMAGE)-$(GOARCH) \
--set image.tag=$(TAG) \
--set image.pullPolicy=Always \
--set extraArgs.scale-down-delay-after-add=10s \
--set extraArgs.scale-down-unneeded-time=10s \
--set extraArgs.scale-down-candidates-pool-ratio=1.0 \
--set extraArgs.unremovable-node-recheck-timeout=10s \
--set extraArgs.skip-nodes-with-system-pods=false \
--set extraArgs.skip-nodes-with-local-storage=false \
--wait

ARTIFACTS?=_artifacts

.PHONY: test-e2e
test-e2e: install-e2e
go run github.com/onsi/ginkgo/v2/ginkgo e2e -v -- \
-resource-group="$$(KUBECONFIG= kubectl get managedclusters -o jsonpath='{.items[0].status.nodeResourceGroup}')"
test-e2e: build-e2e
go run github.com/onsi/ginkgo/v2/ginkgo -v --trace --output-dir "$(ARTIFACTS)" --junit-report="junit.e2e_suite.1.xml" e2e -- \
-resource-group="$$(KUBECONFIG= kubectl get managedclusters -o jsonpath='{.items[0].status.nodeResourceGroup}')" \
-cluster-name="$$(KUBECONFIG= kubectl get cluster -o jsonpath='{.items[0].metadata.name}')" \
-client-id="$$(KUBECONFIG= kubectl get userassignedidentities -o jsonpath='{.items[0].status.clientId}')" \
-cas-namespace="$(CLUSTER_AUTOSCALER_NAMESPACE)" \
-cas-serviceaccount-name="$(CLUSTER_AUTOSCALER_SERVICEACCOUNT_NAME)" \
-cas-image-repository="$(IMAGE)-$(GOARCH)" \
-cas-image-tag="$(TAG)"
Loading

0 comments on commit 084242e

Please sign in to comment.