Skip to content

Commit

Permalink
scheduler: reduce cycleState overhead for reservation (#2120)
Browse files Browse the repository at this point in the history
Signed-off-by: saintube <saintube@foxmail.com>
  • Loading branch information
saintube committed Jul 2, 2024
1 parent 89e9e5c commit 4a61e34
Show file tree
Hide file tree
Showing 6 changed files with 631 additions and 542 deletions.
8 changes: 5 additions & 3 deletions pkg/scheduler/plugins/reservation/nominator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,11 @@ func TestNominateReservation(t *testing.T) {
cycleState := framework.NewCycleState()
requests := apiresource.PodRequests(tt.pod, apiresource.PodResourcesOptions{})
state := &stateData{
nodeReservationStates: map[string]nodeReservationState{},
podRequests: requests,
podRequestsResources: framework.NewResource(requests),
schedulingStateData: schedulingStateData{
nodeReservationStates: map[string]nodeReservationState{},
podRequests: requests,
podRequestsResources: framework.NewResource(requests),
},
}
for _, reservation := range tt.reservations {
rInfo := frameworkext.NewReservationInfo(reservation)
Expand Down
40 changes: 32 additions & 8 deletions pkg/scheduler/plugins/reservation/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,19 @@ func (pl *Plugin) EventsToRegister() []framework.ClusterEventWithHint {
var _ framework.StateData = &stateData{}

type stateData struct {
// scheduling cycle data
schedulingStateData

// all cycle data
// NOTE: The part of data is kept during both the scheduling cycle and the binding cycle. In case too many
// binding goroutines reference the data causing OOM issues, the memory overhead of this part should be
// small and its space complexity should be no more than O(1) for pods, reservations and nodes.
assumed *frameworkext.ReservationInfo
}

// schedulingStateData is the data only kept in the scheduling cycle. It could be cleaned up
// before entering the binding cycle to reduce memory cost.
type schedulingStateData struct {
preemptLock sync.RWMutex
hasAffinity bool
podRequests corev1.ResourceList
Expand All @@ -159,7 +172,6 @@ type stateData struct {
nodeReservationStates map[string]nodeReservationState
nodeReservationDiagnosis map[string]*nodeDiagnosisState
preferredNode string
assumed *frameworkext.ReservationInfo
}

type nodeReservationState struct {
Expand All @@ -183,13 +195,15 @@ type nodeDiagnosisState struct {

func (s *stateData) Clone() framework.StateData {
ns := &stateData{
hasAffinity: s.hasAffinity,
podRequests: s.podRequests,
podRequestsResources: s.podRequestsResources,
nodeReservationStates: s.nodeReservationStates,
nodeReservationDiagnosis: s.nodeReservationDiagnosis,
preferredNode: s.preferredNode,
assumed: s.assumed,
schedulingStateData: schedulingStateData{
hasAffinity: s.hasAffinity,
podRequests: s.podRequests,
podRequestsResources: s.podRequestsResources,
nodeReservationStates: s.nodeReservationStates,
nodeReservationDiagnosis: s.nodeReservationDiagnosis,
preferredNode: s.preferredNode,
},
assumed: s.assumed,
}

s.preemptLock.RLock()
Expand Down Expand Up @@ -217,6 +231,12 @@ func (s *stateData) Clone() framework.StateData {
return ns
}

// CleanSchedulingData clears the scheduling cycle data in the stateData to reduce memory cost before entering
// the binding cycle.
func (s *stateData) CleanSchedulingData() {
s.schedulingStateData = schedulingStateData{}
}

func getStateData(cycleState *framework.CycleState) *stateData {
v, err := cycleState.Read(stateKey)
if err != nil {
Expand Down Expand Up @@ -669,6 +689,8 @@ func (pl *Plugin) Reserve(ctx context.Context, cycleState *framework.CycleState,
return framework.NewStatus(framework.Unschedulable, ErrReasonReservationAffinity)
}
klog.V(5).Infof("Skip reserve with reservation since there are no matched reservations, pod %v, node: %v", klog.KObj(pod), nodeName)
// clean scheduling cycle to avoid unnecessary memory cost before entering the binding
state.CleanSchedulingData()
return nil
}
pl.handle.GetReservationNominator().AddNominatedReservation(pod, nodeName, nominatedReservation)
Expand All @@ -680,6 +702,8 @@ func (pl *Plugin) Reserve(ctx context.Context, cycleState *framework.CycleState,
return framework.AsStatus(err)
}
state.assumed = nominatedReservation.Clone()
// clean scheduling cycle to avoid unnecessary memory cost before entering the binding
state.CleanSchedulingData()
klog.V(4).InfoS("Reserve pod to node with reservations", "pod", klog.KObj(pod), "node", nodeName, "assumed", klog.KObj(nominatedReservation))
return nil
}
Expand Down
Loading

0 comments on commit 4a61e34

Please sign in to comment.