-
Notifications
You must be signed in to change notification settings - Fork 4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for all-or-nothing scale-up strategy #6821
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -89,6 +89,7 @@ func (o *ScaleUpOrchestrator) ScaleUp( | |
nodes []*apiv1.Node, | ||
daemonSets []*appsv1.DaemonSet, | ||
nodeInfos map[string]*schedulerframework.NodeInfo, | ||
allOrNothing bool, // Either request enough capacity for all unschedulablePods, or don't request it at all. | ||
) (*status.ScaleUpStatus, errors.AutoscalerError) { | ||
if !o.initialized { | ||
return status.UpdateScaleUpError(&status.ScaleUpStatus{}, errors.NewAutoscalerError(errors.InternalError, "ScaleUpOrchestrator is not initialized")) | ||
|
@@ -146,11 +147,13 @@ func (o *ScaleUpOrchestrator) ScaleUp( | |
} | ||
|
||
for _, nodeGroup := range validNodeGroups { | ||
option := o.ComputeExpansionOption(nodeGroup, schedulablePodGroups, nodeInfos, len(nodes)+len(upcomingNodes), now) | ||
option := o.ComputeExpansionOption(nodeGroup, schedulablePodGroups, nodeInfos, len(nodes)+len(upcomingNodes), now, allOrNothing) | ||
o.processors.BinpackingLimiter.MarkProcessed(o.autoscalingContext, nodeGroup.Id()) | ||
|
||
if len(option.Pods) == 0 || option.NodeCount == 0 { | ||
klog.V(4).Infof("No pod can fit to %s", nodeGroup.Id()) | ||
} else if allOrNothing && len(option.Pods) < len(unschedulablePods) { | ||
klog.V(4).Infof("Some pods can't fit to %s, giving up due to all-or-nothing scale-up strategy", nodeGroup.Id()) | ||
} else { | ||
options = append(options, option) | ||
} | ||
|
@@ -211,9 +214,26 @@ func (o *ScaleUpOrchestrator) ScaleUp( | |
aErr) | ||
} | ||
|
||
if newNodes < bestOption.NodeCount { | ||
klog.V(1).Infof("Only %d nodes can be added to %s due to cluster-wide limits", newNodes, bestOption.NodeGroup.Id()) | ||
if allOrNothing { | ||
// Can't execute a scale-up that will accommodate all pods, so nothing is considered schedulable. | ||
klog.V(1).Info("Not attempting scale-up due to all-or-nothing strategy: not all pods would be accommodated") | ||
markedEquivalenceGroups := markAllGroupsAsUnschedulable(podEquivalenceGroups, AllOrNothingReason) | ||
return buildNoOptionsAvailableStatus(markedEquivalenceGroups, skippedNodeGroups, nodeGroups), nil | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you use the helper function for the 2 other ScaleUpNoOptionsAvailable returns from this method? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can be fixed in the follow-up PR as well if you prefer There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do! |
||
} | ||
} | ||
|
||
// If necessary, create the node group. This is no longer simulation, an empty node group will be created by cloud provider if supported. | ||
createNodeGroupResults := make([]nodegroups.CreateNodeGroupResult, 0) | ||
if !bestOption.NodeGroup.Exist() { | ||
if allOrNothing && bestOption.NodeGroup.MaxSize() < newNodes { | ||
klog.V(1).Infof("Can only create a new node group with max %d nodes, need %d nodes", bestOption.NodeGroup.MaxSize(), newNodes) | ||
// Can't execute a scale-up that will accommodate all pods, so nothing is considered schedulable. | ||
klog.V(1).Info("Not attempting scale-up due to all-or-nothing strategy: not all pods would be accommodated") | ||
markedEquivalenceGroups := markAllGroupsAsUnschedulable(podEquivalenceGroups, AllOrNothingReason) | ||
return buildNoOptionsAvailableStatus(markedEquivalenceGroups, skippedNodeGroups, nodeGroups), nil | ||
} | ||
var scaleUpStatus *status.ScaleUpStatus | ||
createNodeGroupResults, scaleUpStatus, aErr = o.CreateNodeGroup(bestOption, nodeInfos, schedulablePodGroups, podEquivalenceGroups, daemonSets) | ||
if aErr != nil { | ||
|
@@ -256,9 +276,24 @@ func (o *ScaleUpOrchestrator) ScaleUp( | |
aErr) | ||
} | ||
|
||
// Last check before scale-up. Node group capacity (both due to max size limits & current size) is only checked when balancing. | ||
totalCapacity := 0 | ||
for _, sui := range scaleUpInfos { | ||
totalCapacity += sui.NewSize - sui.CurrentSize | ||
} | ||
if totalCapacity < newNodes { | ||
klog.V(1).Infof("Can only add %d nodes due to node group limits, need %d nodes", totalCapacity, newNodes) | ||
if allOrNothing { | ||
// Can't execute a scale-up that will accommodate all pods, so nothing is considered schedulable. | ||
klog.V(1).Info("Not attempting scale-up due to all-or-nothing strategy: not all pods would be accommodated") | ||
markedEquivalenceGroups := markAllGroupsAsUnschedulable(podEquivalenceGroups, AllOrNothingReason) | ||
return buildNoOptionsAvailableStatus(markedEquivalenceGroups, skippedNodeGroups, nodeGroups), nil | ||
} | ||
} | ||
|
||
// Execute scale up. | ||
klog.V(1).Infof("Final scale-up plan: %v", scaleUpInfos) | ||
aErr, failedNodeGroups := o.scaleUpExecutor.ExecuteScaleUps(scaleUpInfos, nodeInfos, now) | ||
aErr, failedNodeGroups := o.scaleUpExecutor.ExecuteScaleUps(scaleUpInfos, nodeInfos, now, allOrNothing) | ||
if aErr != nil { | ||
return status.UpdateScaleUpError( | ||
&status.ScaleUpStatus{ | ||
|
@@ -364,7 +399,7 @@ func (o *ScaleUpOrchestrator) ScaleUpToNodeGroupMinSize( | |
} | ||
|
||
klog.V(1).Infof("ScaleUpToNodeGroupMinSize: final scale-up plan: %v", scaleUpInfos) | ||
aErr, failedNodeGroups := o.scaleUpExecutor.ExecuteScaleUps(scaleUpInfos, nodeInfos, now) | ||
aErr, failedNodeGroups := o.scaleUpExecutor.ExecuteScaleUps(scaleUpInfos, nodeInfos, now, false /* allOrNothing disabled */) | ||
if aErr != nil { | ||
return status.UpdateScaleUpError( | ||
&status.ScaleUpStatus{ | ||
|
@@ -447,6 +482,7 @@ func (o *ScaleUpOrchestrator) ComputeExpansionOption( | |
nodeInfos map[string]*schedulerframework.NodeInfo, | ||
currentNodeCount int, | ||
now time.Time, | ||
allOrNothing bool, | ||
) expander.Option { | ||
option := expander.Option{NodeGroup: nodeGroup} | ||
podGroups := schedulablePodGroups[nodeGroup.Id()] | ||
|
@@ -471,11 +507,22 @@ func (o *ScaleUpOrchestrator) ComputeExpansionOption( | |
if err != nil && err != cloudprovider.ErrNotImplemented { | ||
klog.Errorf("Failed to get autoscaling options for node group %s: %v", nodeGroup.Id(), err) | ||
} | ||
|
||
// Special handling for groups that only scale from zero to max. | ||
if autoscalingOptions != nil && autoscalingOptions.ZeroOrMaxNodeScaling { | ||
if option.NodeCount > 0 && option.NodeCount != nodeGroup.MaxSize() { | ||
// For zero-or-max scaling groups, the only valid value of node count is node group's max size. | ||
if allOrNothing && option.NodeCount > nodeGroup.MaxSize() { | ||
towca marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// We would have to cap the node count, which means not all pods will be | ||
// accommodated. This violates the principle of all-or-nothing strategy. | ||
option.Pods = nil | ||
option.NodeCount = 0 | ||
} | ||
if option.NodeCount > 0 { | ||
// Cap or increase the number of nodes to the only valid value - node group's max size. | ||
option.NodeCount = nodeGroup.MaxSize() | ||
} | ||
} | ||
|
||
return option | ||
} | ||
|
||
|
@@ -564,6 +611,7 @@ func (o *ScaleUpOrchestrator) SchedulablePodGroups( | |
}) | ||
// Mark pod group as (theoretically) schedulable. | ||
eg.Schedulable = true | ||
eg.SchedulableGroups = append(eg.SchedulableGroups, nodeGroup.Id()) | ||
} else { | ||
klog.V(2).Infof("Pod %s/%s can't be scheduled on %s, predicate checking error: %v", samplePod.Namespace, samplePod.Name, nodeGroup.Id(), err.VerboseMessage()) | ||
if podCount := len(eg.Pods); podCount > 1 { | ||
|
@@ -709,6 +757,29 @@ func matchingSchedulablePodGroups(podGroups []estimator.PodEquivalenceGroup, sim | |
return true | ||
} | ||
|
||
func markAllGroupsAsUnschedulable(egs []*equivalence.PodGroup, reason status.Reasons) []*equivalence.PodGroup { | ||
for _, eg := range egs { | ||
if eg.Schedulable { | ||
if eg.SchedulingErrors == nil { | ||
eg.SchedulingErrors = map[string]status.Reasons{} | ||
} | ||
for _, sg := range eg.SchedulableGroups { | ||
eg.SchedulingErrors[sg] = reason | ||
} | ||
eg.Schedulable = false | ||
} | ||
} | ||
return egs | ||
} | ||
|
||
func buildNoOptionsAvailableStatus(egs []*equivalence.PodGroup, skipped map[string]status.Reasons, ngs []cloudprovider.NodeGroup) *status.ScaleUpStatus { | ||
return &status.ScaleUpStatus{ | ||
towca marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Result: status.ScaleUpNoOptionsAvailable, | ||
PodsRemainUnschedulable: GetRemainingPods(egs, skipped), | ||
ConsideredNodeGroups: ngs, | ||
} | ||
} | ||
|
||
// GetRemainingPods returns information about pods which CA is unable to help | ||
// at this moment. | ||
func GetRemainingPods(egs []*equivalence.PodGroup, skipped map[string]status.Reasons) []status.NoScaleUpInfo { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
Copyright 2024 The Kubernetes Authors. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package orchestrator | ||
|
||
// RejectedReasons contains information why given node group was rejected as a scale-up option. | ||
type RejectedReasons struct { | ||
messages []string | ||
} | ||
|
||
// NewRejectedReasons creates new RejectedReason object. | ||
func NewRejectedReasons(m string) *RejectedReasons { | ||
return &RejectedReasons{[]string{m}} | ||
} | ||
|
||
// Reasons returns a slice of reasons why the node group was not considered for scale up. | ||
func (sr *RejectedReasons) Reasons() []string { | ||
return sr.messages | ||
} | ||
|
||
var ( | ||
// AllOrNothingReason means the node group was rejected because not all pods would fit it when using all-or-nothing strategy. | ||
AllOrNothingReason = NewRejectedReasons("not all pods would fit and scale-up is using all-or-nothing strategy") | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Could you also copy the comment to the Orchestrator interface (e.g. in the follow-up PR)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will do in follow-up PR