Skip to content

Commit

Permalink
Merge pull request #4848 from chaosi-zju/reschedule-sche
Browse files Browse the repository at this point in the history
Introduce a mechanism to scheduler to actively trigger rescheduling
  • Loading branch information
karmada-bot authored Apr 22, 2024
2 parents e100008 + 0372cab commit aded7c0
Show file tree
Hide file tree
Showing 16 changed files with 454 additions and 223 deletions.
8 changes: 8 additions & 0 deletions api/openapi-spec/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -19151,6 +19151,10 @@
"$ref": "#/definitions/com.github.karmada-io.karmada.pkg.apis.work.v1alpha2.BindingSnapshot"
}
},
"rescheduleTriggeredAt": {
"description": "RescheduleTriggeredAt is a timestamp representing when the referenced resource is triggered rescheduling. When this field is updated, it means a rescheduling is manually triggered by user, and the expected behavior of this action is to do a complete recalculation without referring to last scheduling results. It works with the status.lastScheduledTime field, and only when this timestamp is later than timestamp in status.lastScheduledTime will the rescheduling actually execute, otherwise, ignored.\n\nIt is represented in RFC3339 form (like '2006-01-02T15:04:05Z') and is in UTC.",
"$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.Time"
},
"resource": {
"description": "Resource represents the Kubernetes resource to be propagated.",
"default": {},
Expand Down Expand Up @@ -19182,6 +19186,10 @@
"$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.Condition"
}
},
"lastScheduledTime": {
"description": "LastScheduledTime representing the latest timestamp when scheduler successfully finished a scheduling. It is represented in RFC3339 form (like '2006-01-02T15:04:05Z') and is in UTC.",
"$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.Time"
},
"schedulerObservedGeneration": {
"description": "SchedulerObservedGeneration is the generation(.metadata.generation) observed by the scheduler. If SchedulerObservedGeneration is less than the generation in metadata means the scheduler hasn't confirmed the scheduling result or hasn't done the schedule yet.",
"type": "integer",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,18 @@ spec:
- name
type: object
type: array
rescheduleTriggeredAt:
description: "RescheduleTriggeredAt is a timestamp representing when
the referenced resource is triggered rescheduling. When this field
is updated, it means a rescheduling is manually triggered by user,
and the expected behavior of this action is to do a complete recalculation
without referring to last scheduling results. It works with the
status.lastScheduledTime field, and only when this timestamp is
later than timestamp in status.lastScheduledTime will the rescheduling
actually execute, otherwise, ignored. \n It is represented in RFC3339
form (like '2006-01-02T15:04:05Z') and is in UTC."
format: date-time
type: string
resource:
description: Resource represents the Kubernetes resource to be propagated.
properties:
Expand Down Expand Up @@ -1279,6 +1291,12 @@ spec:
- type
type: object
type: array
lastScheduledTime:
description: LastScheduledTime representing the latest timestamp when
scheduler successfully finished a scheduling. It is represented
in RFC3339 form (like '2006-01-02T15:04:05Z') and is in UTC.
format: date-time
type: string
schedulerObservedGeneration:
description: SchedulerObservedGeneration is the generation(.metadata.generation)
observed by the scheduler. If SchedulerObservedGeneration is less
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,18 @@ spec:
- name
type: object
type: array
rescheduleTriggeredAt:
description: "RescheduleTriggeredAt is a timestamp representing when
the referenced resource is triggered rescheduling. When this field
is updated, it means a rescheduling is manually triggered by user,
and the expected behavior of this action is to do a complete recalculation
without referring to last scheduling results. It works with the
status.lastScheduledTime field, and only when this timestamp is
later than timestamp in status.lastScheduledTime will the rescheduling
actually execute, otherwise, ignored. \n It is represented in RFC3339
form (like '2006-01-02T15:04:05Z') and is in UTC."
format: date-time
type: string
resource:
description: Resource represents the Kubernetes resource to be propagated.
properties:
Expand Down Expand Up @@ -1279,6 +1291,12 @@ spec:
- type
type: object
type: array
lastScheduledTime:
description: LastScheduledTime representing the latest timestamp when
scheduler successfully finished a scheduling. It is represented
in RFC3339 form (like '2006-01-02T15:04:05Z') and is in UTC.
format: date-time
type: string
schedulerObservedGeneration:
description: SchedulerObservedGeneration is the generation(.metadata.generation)
observed by the scheduler. If SchedulerObservedGeneration is less
Expand Down
15 changes: 15 additions & 0 deletions pkg/apis/work/v1alpha2/binding_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,16 @@ type ResourceBindingSpec struct {
// +kubebuilder:validation:Enum=Abort;Overwrite
// +optional
ConflictResolution policyv1alpha1.ConflictResolution `json:"conflictResolution,omitempty"`

// RescheduleTriggeredAt is a timestamp representing when the referenced resource is triggered rescheduling.
// When this field is updated, it means a rescheduling is manually triggered by user, and the expected behavior
// of this action is to do a complete recalculation without referring to last scheduling results.
// It works with the status.lastScheduledTime field, and only when this timestamp is later than timestamp in
// status.lastScheduledTime will the rescheduling actually execute, otherwise, ignored.
//
// It is represented in RFC3339 form (like '2006-01-02T15:04:05Z') and is in UTC.
// +optional
RescheduleTriggeredAt *metav1.Time `json:"rescheduleTriggeredAt,omitempty"`
}

// ObjectReference contains enough information to locate the referenced object inside current cluster.
Expand Down Expand Up @@ -297,6 +307,11 @@ type ResourceBindingStatus struct {
// +optional
SchedulerObservedAffinityName string `json:"schedulerObservingAffinityName,omitempty"`

// LastScheduledTime representing the latest timestamp when scheduler successfully finished a scheduling.
// It is represented in RFC3339 form (like '2006-01-02T15:04:05Z') and is in UTC.
// +optional
LastScheduledTime *metav1.Time `json:"lastScheduledTime,omitempty"`

// Conditions contain the different condition statuses.
// +optional
Conditions []metav1.Condition `json:"conditions,omitempty"`
Expand Down
8 changes: 8 additions & 0 deletions pkg/apis/work/v1alpha2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 14 additions & 2 deletions pkg/generated/openapi/zz_generated.openapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 44 additions & 5 deletions pkg/scheduler/core/assignment.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,22 @@ const (
DynamicWeightStrategy = "DynamicWeight"
)

// assignmentMode indicates how to assign replicas, especially in case of re-assignment.
type assignmentMode string

const (
// Steady represents a steady, incremental approach to re-assign replicas
// across clusters. It aims to maintain the exist replicas distribution as
// closely as possible, only making minimal adjustments when necessary.
// It minimizes disruptions and preserves the balance across clusters.
Steady assignmentMode = "Steady"

// Fresh means that disregards the previous assignment entirely and
// seeks to establish an entirely new replica distribution across clusters.
// It is willing to accept significant changes even if it involves disruption.
Fresh assignmentMode = "Fresh"
)

// assignState is a wrapper of the input for assigning function.
type assignState struct {
candidates []*clusterv1alpha1.Cluster
Expand All @@ -58,6 +74,9 @@ type assignState struct {
// fields below are indirect results
strategyType string

// assignmentMode represents the mode how to assign replicas
assignmentMode assignmentMode

scheduledClusters []workv1alpha2.TargetCluster
assignedReplicas int32
availableClusters []workv1alpha2.TargetCluster
Expand All @@ -67,26 +86,35 @@ type assignState struct {
targetReplicas int32
}

func newAssignState(candidates []*clusterv1alpha1.Cluster, placement *policyv1alpha1.Placement, obj *workv1alpha2.ResourceBindingSpec) *assignState {
func newAssignState(candidates []*clusterv1alpha1.Cluster, spec *workv1alpha2.ResourceBindingSpec,
status *workv1alpha2.ResourceBindingStatus) *assignState {
var strategyType string

switch placement.ReplicaSchedulingType() {
switch spec.Placement.ReplicaSchedulingType() {
case policyv1alpha1.ReplicaSchedulingTypeDuplicated:
strategyType = DuplicatedStrategy
case policyv1alpha1.ReplicaSchedulingTypeDivided:
switch placement.ReplicaScheduling.ReplicaDivisionPreference {
switch spec.Placement.ReplicaScheduling.ReplicaDivisionPreference {
case policyv1alpha1.ReplicaDivisionPreferenceAggregated:
strategyType = AggregatedStrategy
case policyv1alpha1.ReplicaDivisionPreferenceWeighted:
if placement.ReplicaScheduling.WeightPreference != nil && len(placement.ReplicaScheduling.WeightPreference.DynamicWeight) != 0 {
if spec.Placement.ReplicaScheduling.WeightPreference != nil && len(spec.Placement.ReplicaScheduling.WeightPreference.DynamicWeight) != 0 {
strategyType = DynamicWeightStrategy
} else {
strategyType = StaticWeightStrategy
}
}
}

return &assignState{candidates: candidates, strategy: placement.ReplicaScheduling, spec: obj, strategyType: strategyType}
// the assignment mode is defaults to Steady to minimizes disruptions and preserves the balance across clusters.
expectAssignmentMode := Steady
// when spec.rescheduleTriggeredAt is updated, it represents a rescheduling is manually triggered by user, and the
// expected behavior of this action is to do a complete recalculation without referring to last scheduling results.
if util.RescheduleRequired(spec.RescheduleTriggeredAt, status.LastScheduledTime) {
expectAssignmentMode = Fresh
}

return &assignState{candidates: candidates, strategy: spec.Placement.ReplicaScheduling, spec: spec, strategyType: strategyType, assignmentMode: expectAssignmentMode}
}

func (as *assignState) buildScheduledClusters() {
Expand Down Expand Up @@ -179,6 +207,17 @@ func assignByStaticWeightStrategy(state *assignState) ([]workv1alpha2.TargetClus

func assignByDynamicStrategy(state *assignState) ([]workv1alpha2.TargetCluster, error) {
state.buildScheduledClusters()

// 1. when Fresh mode expected, do a complete recalculation without referring to the last scheduling results.
if state.assignmentMode == Fresh {
result, err := dynamicFreshScale(state)
if err != nil {
return nil, fmt.Errorf("failed to do fresh scale: %v", err)
}
return result, nil
}

// 2. when Steady mode expected, try minimizing large changes in scheduling results.
if state.assignedReplicas > state.spec.Replicas {
// We need to reduce the replicas in terms of the previous result.
result, err := dynamicScaleDown(state)
Expand Down
Loading

0 comments on commit aded7c0

Please sign in to comment.