Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix recreate benchmark job when operator reboot #2463

Merged
merged 5 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ spec:
- Completed
- Available
- Healthy
default: Available
type: string
spec:
type: object
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ spec:
- Completed
- Available
- Healthy
default: Available
type: string
spec:
type: object
Expand Down
59 changes: 32 additions & 27 deletions pkg/tools/benchmark/operator/service/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@
jobNames[job.GetName()] = struct{}{}
if _, ok := cjobs[job.Name]; !ok && job.Status.CompletionTime == nil {
cjobs[job.GetName()] = job.Namespace
benchmarkJobStatus[job.GetName()] = v1.BenchmarkJobAvailable
benchmarkJobStatus[job.GetName()] = v1.BenchmarkJobHealthy
continue
}
name = job.GetName()
Expand Down Expand Up @@ -248,22 +248,21 @@
// jobStatus is used for update benchmarkJob CR status if updating is needed.
jobStatus := make(map[string]v1.BenchmarkJobStatus)
for k := range benchJobList {
// update scenario status
job := benchJobList[k]
hasOwner := false
// update scenario status
if len(job.GetOwnerReferences()) > 0 {
hasOwner = true
}
if scenarios := o.getAtomicScenario(); scenarios != nil && hasOwner {
on := job.GetOwnerReferences()[0].Name
if _, ok := scenarios[on]; ok {
if scenarios[on].BenchJobStatus == nil {
scenarios[on].BenchJobStatus = map[string]v1.BenchmarkJobStatus{}
if scenarios := o.getAtomicScenario(); scenarios != nil {
on := job.GetOwnerReferences()[0].Name
ykadowak marked this conversation as resolved.
Show resolved Hide resolved
if _, ok := scenarios[on]; ok {
ykadowak marked this conversation as resolved.
Show resolved Hide resolved
if scenarios[on].BenchJobStatus == nil {
ykadowak marked this conversation as resolved.
Show resolved Hide resolved
scenarios[on].BenchJobStatus = map[string]v1.BenchmarkJobStatus{}
}
scenarios[on].BenchJobStatus[job.Name] = job.Status
}
scenarios[on].BenchJobStatus[job.Name] = job.Status
o.scenarios.Store(&scenarios)
}
o.scenarios.Store(&scenarios)
}
// update benchmark job
if oldJob := cbjl[k]; oldJob != nil {
if oldJob.GetGeneration() != job.GetGeneration() {
if job.Status != "" && oldJob.Status != v1.BenchmarkJobCompleted {
Expand All @@ -282,13 +281,15 @@
} else if oldJob.Status == "" {
jobStatus[oldJob.GetName()] = v1.BenchmarkJobAvailable
}
} else if len(job.Status) == 0 || job.Status == v1.BenchmarkJobNotReady {
log.Info("[reconcile benchmark job resource] create job: ", k)
err := o.createJob(ctx, job)
if err != nil {
log.Errorf("[reconcile benchmark job resource] failed to create job: %s", err.Error())
} else {
if job.Status == "" || job.Status == v1.BenchmarkJobAvailable {
log.Info("[reconcile benchmark job resource] create job: ", k)
err := o.createJob(ctx, job)
if err != nil {
log.Errorf("[reconcile benchmark job resource] failed to create job: %s", err.Error())

Check warning on line 289 in pkg/tools/benchmark/operator/service/operator.go

View check run for this annotation

Codecov / codecov/patch

pkg/tools/benchmark/operator/service/operator.go#L289

Added line #L289 was not covered by tests
}
jobStatus[job.Name] = v1.BenchmarkJobHealthy
}
jobStatus[job.Name] = v1.BenchmarkJobAvailable
cbjl[k] = &job
}
}
Expand Down Expand Up @@ -325,22 +326,26 @@
for name := range scenarioList {
sc := scenarioList[name]
if oldScenario := cbsl[name]; oldScenario == nil {
// apply new crd which is not set yet.
jobNames, err := o.createBenchmarkJob(ctx, sc)
if err != nil {
log.Errorf("[reconcile benchmark scenario resource] failed to create benchmark job resource: %s", err.Error())
}
// init atomic values for current scenario
cbsl[name] = &scenario{
Crd: &sc,
BenchJobStatus: func() map[string]v1.BenchmarkJobStatus {
}
scenarioStatus[sc.GetName()] = sc.Status
// apply new crd which is not set yet.
if sc.Status == "" || sc.Status == v1.BenchmarkScenarioAvailable {
jobNames, err := o.createBenchmarkJob(ctx, sc)
if err != nil {
log.Errorf("[reconcile benchmark scenario resource] failed to create benchmark job resource: %s", err.Error())

Check warning on line 338 in pkg/tools/benchmark/operator/service/operator.go

View check run for this annotation

Codecov / codecov/patch

pkg/tools/benchmark/operator/service/operator.go#L338

Added line #L338 was not covered by tests
}
cbsl[name].BenchJobStatus = func() map[string]v1.BenchmarkJobStatus {
s := map[string]v1.BenchmarkJobStatus{}
for _, v := range jobNames {
s[v] = v1.BenchmarkJobNotReady
}
return s
}(),
}()
ykadowak marked this conversation as resolved.
Show resolved Hide resolved
scenarioStatus[sc.GetName()] = v1.BenchmarkScenarioHealthy
}
scenarioStatus[sc.GetName()] = v1.BenchmarkScenarioHealthy
} else {
// apply updated crd which is already applied.
if oldScenario.Crd.GetGeneration() < sc.GetGeneration() {
Expand Down Expand Up @@ -606,7 +611,7 @@
return errors.ErrMismatchBenchmarkAtomics(cjl, cbjl, cbsl)
}
}
// check scenario and bench
// check scenario resource and bench resource
if owners := bj.GetOwnerReferences(); len(owners) > 0 {
var scenarioName string
for _, o := range owners {
Expand Down
2 changes: 1 addition & 1 deletion pkg/tools/benchmark/operator/service/operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1084,7 +1084,7 @@ func Test_operator_benchJobReconcile(t *testing.T) {
Timestamp: "",
},
},
Status: v1.BenchmarkJobAvailable,
Status: v1.BenchmarkJobHealthy,
},
},
},
Expand Down
Loading