Skip to content

Commit

Permalink
Merge branch 'kubernetes:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
Dharma-09 authored Jun 30, 2024
2 parents bdbe951 + 069aab7 commit a48f732
Show file tree
Hide file tree
Showing 196 changed files with 14,407 additions and 2,701 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: '1.22.1'
go-version: '1.22.2'

- uses: actions/checkout@v2
with:
Expand Down
142 changes: 100 additions & 42 deletions cluster-autoscaler/FAQ.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions cluster-autoscaler/OWNERS
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
approvers:
- aleksandra-malinowska
- BigDarkClown
- feiskyer
- towca
- x13n
reviewers:
- aleksandra-malinowska
- BigDarkClown
- feiskyer
- vadasambar
- x13n
emeritus_approvers:
- aleksandra-malinowska # 2022-09-30
labels:
- area/cluster-autoscaler
56 changes: 29 additions & 27 deletions cluster-autoscaler/apis/go.mod
Original file line number Diff line number Diff line change
@@ -1,32 +1,32 @@
module k8s.io/autoscaler/cluster-autoscaler/apis

go 1.21
go 1.22.0

require (
github.com/onsi/ginkgo/v2 v2.16.0
github.com/onsi/gomega v1.31.1
k8s.io/apimachinery v0.29.2
k8s.io/client-go v0.29.2
k8s.io/code-generator v0.29.2
github.com/onsi/ginkgo/v2 v2.19.0
github.com/onsi/gomega v1.33.1
k8s.io/apimachinery v0.31.0-alpha.2
k8s.io/client-go v0.31.0-alpha.2
k8s.io/code-generator v0.31.0-alpha.2
sigs.k8s.io/structured-merge-diff/v4 v4.4.1
)

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
github.com/fxamacker/cbor/v2 v2.7.0-beta // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.22.3 // indirect
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
github.com/go-openapi/swag v0.22.4 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af // indirect
github.com/google/uuid v1.3.1 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
Expand All @@ -35,24 +35,26 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
golang.org/x/mod v0.14.0 // indirect
golang.org/x/net v0.20.0 // indirect
golang.org/x/oauth2 v0.10.0 // indirect
golang.org/x/sys v0.16.0 // indirect
golang.org/x/term v0.16.0 // indirect
golang.org/x/text v0.14.0 // indirect
github.com/x448/float16 v0.8.4 // indirect
golang.org/x/mod v0.17.0 // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/oauth2 v0.20.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/term v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
golang.org/x/time v0.3.0 // indirect
golang.org/x/tools v0.17.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.31.0 // indirect
golang.org/x/tools v0.21.0 // indirect
google.golang.org/protobuf v1.33.0 // indirect
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/api v0.29.2 // indirect
k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01 // indirect
k8s.io/klog/v2 v2.110.1 // indirect
k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect
k8s.io/api v0.31.0-alpha.2 // indirect
k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 // indirect
k8s.io/klog/v2 v2.120.1 // indirect
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
)
135 changes: 60 additions & 75 deletions cluster-autoscaler/apis/go.sum

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,11 @@ const (
// ProvisioningClassCheckCapacity denotes that CA will check if current cluster state can fulfill this request,
// and reserve the capacity for a specified time.
ProvisioningClassCheckCapacity string = "check-capacity.autoscaling.x-k8s.io"
// ProvisioningClassAtomicScaleUp denotes that CA try to provision the capacity
// ProvisioningClassBestEffortAtomicScaleUp denotes that CA try to provision the capacity
// in an atomic manner.
ProvisioningClassAtomicScaleUp string = "atomic-scale-up.autoscaling.x-k8s.io"
ProvisioningClassBestEffortAtomicScaleUp string = "best-effort-atomic-scale-up.autoscaling.x-k8s.io"
// ProvisioningRequestPodAnnotationKey is a key used to annotate pods consuming provisioning request.
ProvisioningRequestPodAnnotationKey = "autoscaling.x-k8s.io/consume-provisioning-request"
// ProvisioningClassPodAnnotationKey is a key used to add annotation about Provisioning Class
ProvisioningClassPodAnnotationKey = "autoscaling.x-k8s.io/provisioning-class-name"
)
6 changes: 3 additions & 3 deletions cluster-autoscaler/cloudprovider/POLICY.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ in-tree cloudprovider follows the following rules:
handles. There is an expectation that at least some of the owners will
join Kubernetes organization (by following the
[process](https://github.com/kubernetes/community/blob/master/community-membership.md))
within one release cycly, so that they can approve PRs to their
within one release cycle, so that they can approve PRs to their
cloudprovider.
* Cloudprovider shouldn't introduce new dependencies (such as clients/SDKs)
to top-level go.mod vendor, unless those dependencies are already imported
Expand Down Expand Up @@ -109,8 +109,8 @@ maintenance request_ (CMR) mechanism.
(ex. 1.26).
* CMR will need to be discussed on sig-autoscaling meeting and approved by
sig leads before being issued. It will also be announced on sig-autoscaling
slack channel and highlited in sig-autoscaling meeting notes.
* A CMR may be issued no later then [enhancements
slack channel and highlighted in sig-autoscaling meeting notes.
* A CMR may be issued no later than [enhancements
freeze](https://github.com/kubernetes/sig-release/blob/master/releases/release_phases.md#enhancements-freeze)
of a given Kubernetes minor version.
* If a given cloud provider was added more than one release cycle ago and there
Expand Down
115 changes: 80 additions & 35 deletions cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,46 +308,78 @@ func (m *asgCache) DeleteInstances(instances []*AwsInstanceRef) error {
}
}

placeHolderInstancesCount := m.GetPlaceHolderInstancesCount(instances)
// Check if there are any placeholder instances in the list.
if placeHolderInstancesCount > 0 {
// Log the check for placeholders in the ASG.
klog.V(4).Infof("Detected %d placeholder instance(s) in ASG %s",
placeHolderInstancesCount, commonAsg.Name)

asgNames := []string{commonAsg.Name}
asgDetail, err := m.awsService.getAutoscalingGroupsByNames(asgNames)

if err != nil {
klog.Errorf("Error retrieving ASG details %s: %v", commonAsg.Name, err)
return err
}

activeInstancesInAsg := len(asgDetail[0].Instances)
desiredCapacityInAsg := int(*asgDetail[0].DesiredCapacity)
klog.V(4).Infof("asg %s has placeholders instances with desired capacity = %d and active instances = %d. updating ASG to match active instances count",
commonAsg.Name, desiredCapacityInAsg, activeInstancesInAsg)

// If the difference between the active instances and the desired capacity is greater than 1,
// it means that the ASG is under-provisioned and the desired capacity is not being reached.
// In this case, we would reduce the size of ASG by the count of unprovisioned instances
// which is equal to the total count of active instances in ASG

err = m.setAsgSizeNoLock(commonAsg, activeInstancesInAsg)

if err != nil {
klog.Errorf("Error reducing ASG %s size to %d: %v", commonAsg.Name, activeInstancesInAsg, err)
return err
}
}

for _, instance := range instances {
// check if the instance is a placeholder - a requested instance that was never created by the node group
// if it is, just decrease the size of the node group, as there's no specific instance we can remove
if m.isPlaceholderInstance(instance) {
klog.V(4).Infof("instance %s is detected as a placeholder, decreasing ASG requested size instead "+
"of deleting instance", instance.Name)
m.decreaseAsgSizeByOneNoLock(commonAsg)
} else {
// check if the instance is already terminating - if it is, don't bother terminating again
// as doing so causes unnecessary API calls and can cause the curSize cached value to decrement
// unnecessarily.
lifecycle, err := m.findInstanceLifecycle(*instance)
if err != nil {
return err
}

if lifecycle != nil &&
*lifecycle == autoscaling.LifecycleStateTerminated ||
*lifecycle == autoscaling.LifecycleStateTerminating ||
*lifecycle == autoscaling.LifecycleStateTerminatingWait ||
*lifecycle == autoscaling.LifecycleStateTerminatingProceed {
klog.V(2).Infof("instance %s is already terminating in state %s, will skip instead", instance.Name, *lifecycle)
continue
}
if m.isPlaceholderInstance(instance) {
// skipping placeholder as placeholder instances don't exist
// and we have already reduced ASG size during placeholder check.
continue
}
// check if the instance is already terminating - if it is, don't bother terminating again
// as doing so causes unnecessary API calls and can cause the curSize cached value to decrement
// unnecessarily.
lifecycle, err := m.findInstanceLifecycle(*instance)
if err != nil {
return err
}

params := &autoscaling.TerminateInstanceInAutoScalingGroupInput{
InstanceId: aws.String(instance.Name),
ShouldDecrementDesiredCapacity: aws.Bool(true),
}
start := time.Now()
resp, err := m.awsService.TerminateInstanceInAutoScalingGroup(params)
observeAWSRequest("TerminateInstanceInAutoScalingGroup", err, start)
if err != nil {
return err
}
klog.V(4).Infof(*resp.Activity.Description)
if lifecycle != nil &&
*lifecycle == autoscaling.LifecycleStateTerminated ||
*lifecycle == autoscaling.LifecycleStateTerminating ||
*lifecycle == autoscaling.LifecycleStateTerminatingWait ||
*lifecycle == autoscaling.LifecycleStateTerminatingProceed {
klog.V(2).Infof("instance %s is already terminating in state %s, will skip instead", instance.Name, *lifecycle)
continue
}

// Proactively decrement the size so autoscaler makes better decisions
commonAsg.curSize--
params := &autoscaling.TerminateInstanceInAutoScalingGroupInput{
InstanceId: aws.String(instance.Name),
ShouldDecrementDesiredCapacity: aws.Bool(true),
}
start := time.Now()
resp, err := m.awsService.TerminateInstanceInAutoScalingGroup(params)
observeAWSRequest("TerminateInstanceInAutoScalingGroup", err, start)
if err != nil {
return err
}
klog.V(4).Infof(*resp.Activity.Description)

// Proactively decrement the size so autoscaler makes better decisions
commonAsg.curSize--

}
return nil
}
Expand Down Expand Up @@ -624,3 +656,16 @@ func (m *asgCache) buildInstanceRefFromAWS(instance *autoscaling.Instance) AwsIn
func (m *asgCache) Cleanup() {
close(m.interrupt)
}

// GetPlaceHolderInstancesCount returns count of placeholder instances in the cache
func (m *asgCache) GetPlaceHolderInstancesCount(instances []*AwsInstanceRef) int {

placeholderInstancesCount := 0
for _, instance := range instances {
if strings.HasPrefix(instance.Name, placeholderInstanceNamePrefix) {
placeholderInstancesCount++

}
}
return placeholderInstancesCount
}
Loading

0 comments on commit a48f732

Please sign in to comment.