From 43c1c09029cdfa78674ed6b8de4cebe53e2e88ae Mon Sep 17 00:00:00 2001 From: Xuzheng Chang Date: Fri, 1 Sep 2023 15:50:37 +0800 Subject: [PATCH] Add reSync task callback Signed-off-by: Xuzheng Chang (cherry picked from commit 5249bba7182148af1a6768870a04a9be7dc4bfeb) --- pkg/scheduler/api/job_info.go | 6 ++++++ pkg/scheduler/cache/cache.go | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/pkg/scheduler/api/job_info.go b/pkg/scheduler/api/job_info.go index 68b2d7f5d4..03d58fe3ba 100644 --- a/pkg/scheduler/api/job_info.go +++ b/pkg/scheduler/api/job_info.go @@ -33,6 +33,7 @@ import ( batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" "volcano.sh/apis/pkg/apis/scheduling" "volcano.sh/apis/pkg/apis/scheduling/v1beta1" + volumescheduling "volcano.sh/volcano/pkg/scheduler/capabilities/volumebinding" ) @@ -130,6 +131,11 @@ type TaskInfo struct { NumaInfo *TopologyInfo PodVolumes *volumescheduling.PodVolumes Pod *v1.Pod + + // CustomBindErrHandler is a custom callback func called when task bind err. + CustomBindErrHandler func() error + // CustomBindErrHandlerSucceeded indicates whether CustomBindErrHandler is executed successfully. + CustomBindErrHandlerSucceeded bool } func getJobID(pod *v1.Pod) JobID { diff --git a/pkg/scheduler/cache/cache.go b/pkg/scheduler/cache/cache.go index 1eb1558287..3a9735e2ab 100644 --- a/pkg/scheduler/cache/cache.go +++ b/pkg/scheduler/cache/cache.go @@ -955,9 +955,24 @@ func (sc *SchedulerCache) processResyncTask() { return } + reSynced := false if err := sc.syncTask(task); err != nil { klog.Errorf("Failed to sync pod <%v/%v>, retry it.", task.Namespace, task.Name) sc.resyncTask(task) + reSynced = true + } + + // execute custom bind err handler call back func if exists. + if task.CustomBindErrHandler != nil && !task.CustomBindErrHandlerSucceeded { + err := task.CustomBindErrHandler() + if err != nil { + klog.ErrorS(err, "Failed to execute custom bind err handler, retry it.") + } else { + task.CustomBindErrHandlerSucceeded = true + } + if !task.CustomBindErrHandlerSucceeded && !reSynced { + sc.resyncTask(task) + } } }