From 96f88839e8c7631eef0a1beba81fd9fca65ff7e4 Mon Sep 17 00:00:00 2001 From: Kiichiro YUKAWA Date: Fri, 22 Mar 2024 12:21:21 +0900 Subject: [PATCH] Bugfix recreate benchmark job when operator reboot (#2463) * :bug: Fix recreate benchmark job already runnnig when operator rebooted Signed-off-by: vankichi * :bug: Fix benchmarkJobReconcile status handling Signed-off-by: vankichi * :white_check_mark: Fix test Signed-off-by: vankichi * :recycle: Update k8s dir Signed-off-by: vankichi * :recycle: Fix Signed-off-by: vankichi --------- Signed-off-by: vankichi --- .../crds/valdbenchmarkjob.yaml | 1 + .../crds/valdbenchmarkscenario.yaml | 1 + .../operator/crds/valdbenchmarkjob.yaml | 1 + .../operator/crds/valdbenchmarkscenario.yaml | 1 + .../benchmark/operator/service/operator.go | 70 ++++++++++--------- .../operator/service/operator_test.go | 2 +- 6 files changed, 43 insertions(+), 33 deletions(-) diff --git a/charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml b/charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml index b0aad134ae..a51277c261 100644 --- a/charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml +++ b/charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml @@ -61,6 +61,7 @@ spec: - Completed - Available - Healthy + default: Available type: string spec: type: object diff --git a/charts/vald-benchmark-operator/crds/valdbenchmarkscenario.yaml b/charts/vald-benchmark-operator/crds/valdbenchmarkscenario.yaml index 3f7492d8b3..692493433d 100644 --- a/charts/vald-benchmark-operator/crds/valdbenchmarkscenario.yaml +++ b/charts/vald-benchmark-operator/crds/valdbenchmarkscenario.yaml @@ -58,6 +58,7 @@ spec: - Completed - Available - Healthy + default: Available type: string spec: type: object diff --git a/k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml b/k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml index b0aad134ae..a51277c261 100644 --- a/k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml +++ b/k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml @@ -61,6 +61,7 @@ spec: - Completed - Available - Healthy + default: Available type: string spec: type: object diff --git a/k8s/tools/benchmark/operator/crds/valdbenchmarkscenario.yaml b/k8s/tools/benchmark/operator/crds/valdbenchmarkscenario.yaml index 3f7492d8b3..692493433d 100644 --- a/k8s/tools/benchmark/operator/crds/valdbenchmarkscenario.yaml +++ b/k8s/tools/benchmark/operator/crds/valdbenchmarkscenario.yaml @@ -58,6 +58,7 @@ spec: - Completed - Available - Healthy + default: Available type: string spec: type: object diff --git a/pkg/tools/benchmark/operator/service/operator.go b/pkg/tools/benchmark/operator/service/operator.go index fbd4a05ab8..78830b1d03 100644 --- a/pkg/tools/benchmark/operator/service/operator.go +++ b/pkg/tools/benchmark/operator/service/operator.go @@ -203,7 +203,7 @@ func (o *operator) jobReconcile(ctx context.Context, jobList map[string][]job.Jo jobNames[job.GetName()] = struct{}{} if _, ok := cjobs[job.Name]; !ok && job.Status.CompletionTime == nil { cjobs[job.GetName()] = job.Namespace - benchmarkJobStatus[job.GetName()] = v1.BenchmarkJobAvailable + benchmarkJobStatus[job.GetName()] = v1.BenchmarkJobHealthy continue } name = job.GetName() @@ -248,22 +248,22 @@ func (o *operator) benchJobReconcile(ctx context.Context, benchJobList map[strin // jobStatus is used for update benchmarkJob CR status if updating is needed. jobStatus := make(map[string]v1.BenchmarkJobStatus) for k := range benchJobList { - // update scenario status job := benchJobList[k] - hasOwner := false + // update scenario status if len(job.GetOwnerReferences()) > 0 { - hasOwner = true - } - if scenarios := o.getAtomicScenario(); scenarios != nil && hasOwner { - on := job.GetOwnerReferences()[0].Name - if _, ok := scenarios[on]; ok { - if scenarios[on].BenchJobStatus == nil { - scenarios[on].BenchJobStatus = map[string]v1.BenchmarkJobStatus{} + if scenarios := o.getAtomicScenario(); scenarios != nil { + ownerRefs := job.GetOwnerReferences() + ownerName := ownerRefs[0].Name + if _, ok := scenarios[ownerName]; ok { + if scenarios[ownerName].BenchJobStatus == nil { + scenarios[ownerName].BenchJobStatus = map[string]v1.BenchmarkJobStatus{} + } + scenarios[ownerName].BenchJobStatus[job.Name] = job.Status } - scenarios[on].BenchJobStatus[job.Name] = job.Status + o.scenarios.Store(&scenarios) } - o.scenarios.Store(&scenarios) } + // update benchmark job if oldJob := cbjl[k]; oldJob != nil { if oldJob.GetGeneration() != job.GetGeneration() { if job.Status != "" && oldJob.Status != v1.BenchmarkJobCompleted { @@ -282,13 +282,15 @@ func (o *operator) benchJobReconcile(ctx context.Context, benchJobList map[strin } else if oldJob.Status == "" { jobStatus[oldJob.GetName()] = v1.BenchmarkJobAvailable } - } else if len(job.Status) == 0 || job.Status == v1.BenchmarkJobNotReady { - log.Info("[reconcile benchmark job resource] create job: ", k) - err := o.createJob(ctx, job) - if err != nil { - log.Errorf("[reconcile benchmark job resource] failed to create job: %s", err.Error()) + } else { + if job.Status == "" || job.Status == v1.BenchmarkJobAvailable { + log.Info("[reconcile benchmark job resource] create job: ", k) + err := o.createJob(ctx, job) + if err != nil { + log.Errorf("[reconcile benchmark job resource] failed to create job: %s", err.Error()) + } + jobStatus[job.Name] = v1.BenchmarkJobHealthy } - jobStatus[job.Name] = v1.BenchmarkJobAvailable cbjl[k] = &job } } @@ -325,22 +327,26 @@ func (o *operator) benchScenarioReconcile(ctx context.Context, scenarioList map[ for name := range scenarioList { sc := scenarioList[name] if oldScenario := cbsl[name]; oldScenario == nil { - // apply new crd which is not set yet. - jobNames, err := o.createBenchmarkJob(ctx, sc) - if err != nil { - log.Errorf("[reconcile benchmark scenario resource] failed to create benchmark job resource: %s", err.Error()) - } + // init atomic values for current scenario cbsl[name] = &scenario{ Crd: &sc, - BenchJobStatus: func() map[string]v1.BenchmarkJobStatus { - s := map[string]v1.BenchmarkJobStatus{} - for _, v := range jobNames { - s[v] = v1.BenchmarkJobNotReady - } - return s - }(), } - scenarioStatus[sc.GetName()] = v1.BenchmarkScenarioHealthy + scenarioStatus[sc.GetName()] = sc.Status + // apply new crd which is not set yet. + if sc.Status == "" || sc.Status == v1.BenchmarkScenarioAvailable { + jobNames, err := o.createBenchmarkJob(ctx, sc) + if err != nil { + log.Errorf("[reconcile benchmark scenario resource] failed to create benchmark job resource: %s", err.Error()) + } + // benchmark job resource status to store benchmarkScenario Atomic + s := map[string]v1.BenchmarkJobStatus{} + for _, v := range jobNames { + s[v] = v1.BenchmarkJobNotReady + } + cbsl[name].BenchJobStatus = s + // use for updating benchmarkScenario CR status + scenarioStatus[sc.GetName()] = v1.BenchmarkScenarioHealthy + } } else { // apply updated crd which is already applied. if oldScenario.Crd.GetGeneration() < sc.GetGeneration() { @@ -606,7 +612,7 @@ func (o *operator) checkAtomics() error { return errors.ErrMismatchBenchmarkAtomics(cjl, cbjl, cbsl) } } - // check scenario and bench + // check scenario resource and bench resource if owners := bj.GetOwnerReferences(); len(owners) > 0 { var scenarioName string for _, o := range owners { diff --git a/pkg/tools/benchmark/operator/service/operator_test.go b/pkg/tools/benchmark/operator/service/operator_test.go index 6de4237312..a2c130aa30 100644 --- a/pkg/tools/benchmark/operator/service/operator_test.go +++ b/pkg/tools/benchmark/operator/service/operator_test.go @@ -1084,7 +1084,7 @@ func Test_operator_benchJobReconcile(t *testing.T) { Timestamp: "", }, }, - Status: v1.BenchmarkJobAvailable, + Status: v1.BenchmarkJobHealthy, }, }, },