From 1b4e684a34f4de494837f233568ca8c44cfff9ae Mon Sep 17 00:00:00 2001 From: Daniel Vega-Myhre Date: Fri, 5 May 2023 23:37:51 +0000 Subject: [PATCH 1/6] fix default success policy --- pkg/controllers/jobset_controller.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pkg/controllers/jobset_controller.go b/pkg/controllers/jobset_controller.go index f93ad4500..ebfe50c4d 100644 --- a/pkg/controllers/jobset_controller.go +++ b/pkg/controllers/jobset_controller.go @@ -111,7 +111,7 @@ func (r *JobSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr } // If all jobs have succeeded, JobSet has succeeded. - if len(ownedJobs.successful) == len(js.Spec.ReplicatedJobs) { + if len(ownedJobs.successful) == totalJobs(&js) { if err := r.ensureCondition(ctx, &js, corev1.EventTypeNormal, metav1.Condition{ Type: string(jobset.JobSetCompleted), Status: metav1.ConditionStatus(corev1.ConditionTrue), @@ -589,6 +589,14 @@ func dnsHostnamesEnabled(rjob *jobset.ReplicatedJob) bool { return rjob.Network.EnableDNSHostnames != nil && *rjob.Network.EnableDNSHostnames } +func totalJobs(js *jobset.JobSet) int { + totalJobs := 0 + for _, rjob := range js.Spec.ReplicatedJobs { + totalJobs += rjob.Replicas + } + return totalJobs +} + func concat[T any](slices ...[]T) []T { var result []T for _, slice := range slices { From 63437edea4ebec0c2497880601e48de81a6f8340 Mon Sep 17 00:00:00 2001 From: Daniel Vega-Myhre Date: Fri, 5 May 2023 23:52:03 +0000 Subject: [PATCH 2/6] add TODO --- pkg/controllers/jobset_controller.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/controllers/jobset_controller.go b/pkg/controllers/jobset_controller.go index ebfe50c4d..9040baf90 100644 --- a/pkg/controllers/jobset_controller.go +++ b/pkg/controllers/jobset_controller.go @@ -111,6 +111,7 @@ func (r *JobSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr } // If all jobs have succeeded, JobSet has succeeded. + // TODO(#120): ensure that the successful jobs counted are actually the ones that we expect JobSet to have if len(ownedJobs.successful) == totalJobs(&js) { if err := r.ensureCondition(ctx, &js, corev1.EventTypeNormal, metav1.Condition{ Type: string(jobset.JobSetCompleted), From d181fcb83dcc83edb0d08b5e85d5d21093b737c8 Mon Sep 17 00:00:00 2001 From: Daniel Vega-Myhre Date: Fri, 5 May 2023 23:56:58 +0000 Subject: [PATCH 3/6] add integration test --- .../controller/jobset_controller_test.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/integration/controller/jobset_controller_test.go b/test/integration/controller/jobset_controller_test.go index 0826f8c23..dfdf49ca9 100644 --- a/test/integration/controller/jobset_controller_test.go +++ b/test/integration/controller/jobset_controller_test.go @@ -267,6 +267,20 @@ var _ = ginkgo.Describe("JobSet controller", func() { }, }, }), + ginkgo.Entry("jobset should not succeed if any job is not completed", &testCase{ + makeJobSet: testJobSet, + updates: []*update{ + { + jobUpdateFn: func(jobList *batchv1.JobList) { + ginkgo.By("completing all but 1 job") + for i := 0; i < len(jobList.Items)-1; i++ { + completeJob(&jobList.Items[i]) + } + }, + expectedJobSetCondition: "", // active + }, + }, + }), ginkgo.Entry("jobset with no failure policy should fail if any jobs fail", &testCase{ makeJobSet: testJobSet, updates: []*update{ From ea48c37db642bd204493655cf58c8431fd2c3114 Mon Sep 17 00:00:00 2001 From: Daniel Vega-Myhre Date: Sat, 6 May 2023 17:26:22 +0000 Subject: [PATCH 4/6] update integrationt tests --- .../controller/jobset_controller_test.go | 99 ++++++++++++++----- test/util/util.go | 13 ++- 2 files changed, 81 insertions(+), 31 deletions(-) diff --git a/test/integration/controller/jobset_controller_test.go b/test/integration/controller/jobset_controller_test.go index dfdf49ca9..4c7e80e3a 100644 --- a/test/integration/controller/jobset_controller_test.go +++ b/test/integration/controller/jobset_controller_test.go @@ -176,7 +176,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { // Create test namespace before each test. ns = &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ - GenerateName: "test-ns-", + GenerateName: "jobset-ns-", }, } gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed()) @@ -199,11 +199,11 @@ var _ = ginkgo.Describe("JobSet controller", func() { // update contains the mutations to perform on the jobs/jobset and the // checks to perform afterwards. type update struct { - jobSetUpdateFn func(*jobset.JobSet) - jobUpdateFn func(*batchv1.JobList) - checkJobSetState func(*jobset.JobSet) - checkJobState func(*jobset.JobSet) - expectedJobSetCondition jobset.JobSetConditionType + jobSetUpdateFn func(*jobset.JobSet) + jobUpdateFn func(*batchv1.JobList) + checkJobSetState func(*jobset.JobSet) + checkJobState func(*jobset.JobSet) + checkJobSetCondition func(*jobset.JobSet) } type testCase struct { @@ -252,9 +252,8 @@ var _ = ginkgo.Describe("JobSet controller", func() { } // Check jobset status if specified. - if up.expectedJobSetCondition != "" { - ginkgo.By(fmt.Sprintf("checking jobset status is: %s", up.expectedJobSetCondition)) - gomega.Eventually(util.CheckJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, up.expectedJobSetCondition).Should(gomega.Equal(true)) + if up.checkJobSetCondition != nil { + up.checkJobSetCondition(&jobSet) } } }, @@ -262,8 +261,8 @@ var _ = ginkgo.Describe("JobSet controller", func() { makeJobSet: testJobSet, updates: []*update{ { - jobUpdateFn: completeAllJobs, - expectedJobSetCondition: jobset.JobSetCompleted, + jobUpdateFn: completeAllJobs, + checkJobSetCondition: jobSetCompleted, }, }, }), @@ -277,7 +276,10 @@ var _ = ginkgo.Describe("JobSet controller", func() { completeJob(&jobList.Items[i]) } }, - expectedJobSetCondition: "", // active + checkJobSetCondition: func(js *jobset.JobSet) { + ginkgo.By(fmt.Sprintf(`checking jobset status is active`)) + gomega.Consistently(util.CheckJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, []metav1.Condition{}).Should(gomega.Equal(true)) + }, }, }, }), @@ -288,7 +290,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { jobUpdateFn: func(jobList *batchv1.JobList) { failJob(&jobList.Items[0]) }, - expectedJobSetCondition: jobset.JobSetFailed, + checkJobSetCondition: jobSetFailed, }, }, }), @@ -299,8 +301,8 @@ var _ = ginkgo.Describe("JobSet controller", func() { checkJobSetState: checkExpectedServices, }, { - jobUpdateFn: completeAllJobs, - expectedJobSetCondition: jobset.JobSetCompleted, + jobUpdateFn: completeAllJobs, + checkJobSetCondition: jobSetCompleted, }, }, }), @@ -311,8 +313,8 @@ var _ = ginkgo.Describe("JobSet controller", func() { checkJobSetState: checkExpectedServices, }, { - jobUpdateFn: completeAllJobs, - expectedJobSetCondition: jobset.JobSetCompleted, + jobUpdateFn: completeAllJobs, + checkJobSetCondition: jobSetCompleted, }, }, }), @@ -326,7 +328,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { jobUpdateFn: func(jobList *batchv1.JobList) { failJob(&jobList.Items[0]) }, - expectedJobSetCondition: jobset.JobSetFailed, + checkJobSetCondition: jobSetFailed, }, }, }), @@ -351,7 +353,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { jobUpdateFn: func(jobList *batchv1.JobList) { failJob(&jobList.Items[1]) }, - expectedJobSetCondition: jobset.JobSetFailed, + checkJobSetCondition: jobSetFailed, }, }, }), @@ -374,8 +376,8 @@ var _ = ginkgo.Describe("JobSet controller", func() { }, }, { - jobUpdateFn: completeAllJobs, - expectedJobSetCondition: jobset.JobSetCompleted, + jobUpdateFn: completeAllJobs, + checkJobSetCondition: jobSetCompleted, }, }, }), @@ -386,11 +388,11 @@ var _ = ginkgo.Describe("JobSet controller", func() { }, updates: []*update{ { - expectedJobSetCondition: jobset.JobSetSuspended, checkJobSetState: func(js *jobset.JobSet) { ginkgo.By("checking all jobs are suspended") gomega.Eventually(matchJobsSuspendState, timeout, interval).WithArguments(js, true).Should(gomega.Equal(true)) }, + checkJobSetCondition: jobSetSuspended, }, }, }), @@ -400,11 +402,11 @@ var _ = ginkgo.Describe("JobSet controller", func() { }, updates: []*update{ { - expectedJobSetCondition: jobset.JobSetSuspended, checkJobSetState: func(js *jobset.JobSet) { ginkgo.By("checking all jobs are suspended") gomega.Eventually(matchJobsSuspendState, timeout, interval).WithArguments(js, true).Should(gomega.Equal(true)) }, + checkJobSetCondition: jobSetSuspended, }, { jobSetUpdateFn: func(js *jobset.JobSet) { @@ -414,10 +416,11 @@ var _ = ginkgo.Describe("JobSet controller", func() { ginkgo.By("checking all jobs are resumed") gomega.Eventually(matchJobsSuspendState, timeout, interval).WithArguments(js, false).Should(gomega.Equal(true)) }, + checkJobSetCondition: jobSetResumed, }, { - jobUpdateFn: completeAllJobs, - expectedJobSetCondition: jobset.JobSetCompleted, + jobUpdateFn: completeAllJobs, + checkJobSetCondition: jobSetCompleted, }, }, }), @@ -436,11 +439,11 @@ var _ = ginkgo.Describe("JobSet controller", func() { jobSetUpdateFn: func(js *jobset.JobSet) { suspendJobSet(js, true) }, - expectedJobSetCondition: jobset.JobSetSuspended, checkJobSetState: func(js *jobset.JobSet) { ginkgo.By("checking all jobs are suspended") gomega.Eventually(matchJobsSuspendState, timeout, interval).WithArguments(js, true).Should(gomega.Equal(true)) }, + checkJobSetCondition: jobSetSuspended, }, }, }), @@ -555,6 +558,50 @@ func checkExpectedServices(js *jobset.JobSet) { }).Should(gomega.Equal(numExpectedServices(js))) } +func jobSetCompleted(js *jobset.JobSet) { + ginkgo.By(fmt.Sprintf("checking jobset status is: %s", jobset.JobSetCompleted)) + conditions := []metav1.Condition{ + { + Type: string(jobset.JobSetCompleted), + Status: metav1.ConditionTrue, + }, + } + gomega.Eventually(util.CheckJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, conditions).Should(gomega.Equal(true)) +} + +func jobSetFailed(js *jobset.JobSet) { + ginkgo.By(fmt.Sprintf("checking jobset status is: %s", jobset.JobSetFailed)) + conditions := []metav1.Condition{ + { + Type: string(jobset.JobSetFailed), + Status: metav1.ConditionTrue, + }, + } + gomega.Eventually(util.CheckJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, conditions).Should(gomega.Equal(true)) +} + +func jobSetSuspended(js *jobset.JobSet) { + ginkgo.By(fmt.Sprintf("checking jobset status is: %s", jobset.JobSetSuspended)) + conditions := []metav1.Condition{ + { + Type: string(jobset.JobSetSuspended), + Status: metav1.ConditionTrue, + }, + } + gomega.Eventually(util.CheckJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, conditions).Should(gomega.Equal(true)) +} + +func jobSetResumed(js *jobset.JobSet) { + ginkgo.By("checking jobset status is resumed") + conditions := []metav1.Condition{ + { + Type: string(jobset.JobSetSuspended), + Status: metav1.ConditionFalse, + }, + } + gomega.Eventually(util.CheckJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, conditions).Should(gomega.Equal(true)) +} + // 2 replicated jobs: // - one with 1 replica // - one with 3 replicas and DNS hostnames enabled diff --git a/test/util/util.go b/test/util/util.go index 496f00cbc..03ff9fe66 100644 --- a/test/util/util.go +++ b/test/util/util.go @@ -17,23 +17,26 @@ import ( "context" batchv1 "k8s.io/api/batch/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" jobset "sigs.k8s.io/jobset/api/v1alpha1" ) -func CheckJobSetStatus(ctx context.Context, k8sClient client.Client, js *jobset.JobSet, condition jobset.JobSetConditionType) (bool, error) { +func CheckJobSetStatus(ctx context.Context, k8sClient client.Client, js *jobset.JobSet, conditions []metav1.Condition) (bool, error) { var fetchedJS jobset.JobSet if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: js.Namespace, Name: js.Name}, &fetchedJS); err != nil { return false, err } - for _, c := range fetchedJS.Status.Conditions { - if c.Type == string(condition) { - return true, nil + for _, want := range conditions { + for _, c := range fetchedJS.Status.Conditions { + if c.Type == want.Type && c.Status == want.Status { + return true, nil + } } } - return false, nil + return len(conditions) == len(fetchedJS.Status.Conditions), nil } func NumExpectedJobs(js *jobset.JobSet) int { From a295a9d6d2e4345bd384e8ca9177098b63376598 Mon Sep 17 00:00:00 2001 From: Daniel Vega-Myhre Date: Sat, 6 May 2023 18:04:36 +0000 Subject: [PATCH 5/6] refactor e2e --- test/e2e/e2e_test.go | 11 ++- .../controller/jobset_controller_test.go | 79 ++++------------- test/util/util.go | 85 +++++++++++++++---- 3 files changed, 94 insertions(+), 81 deletions(-) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index c0e5a75c1..201f1d6a1 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -75,11 +75,11 @@ var _ = ginkgo.Describe("JobSet", func() { gomega.Eventually(k8sClient.Get(ctx, types.NamespacedName{Name: js.Name, Namespace: js.Namespace}, &jobset.JobSet{}), timeout, interval).Should(gomega.Succeed()) ginkgo.By("checking all jobs were created successfully") - gomega.Eventually(util.CheckNumJobs, timeout, interval).WithArguments(ctx, k8sClient, js).Should(gomega.Equal(util.NumExpectedJobs(js))) + gomega.Eventually(util.NumJobs, timeout, interval).WithArguments(ctx, k8sClient, js).Should(gomega.Equal(util.NumExpectedJobs(js))) // Check jobset status if specified. ginkgo.By("checking jobset condition") - gomega.Eventually(util.CheckJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, jobset.JobSetCompleted).Should(gomega.Equal(true)) + util.JobSetCompleted(ctx, k8sClient, js, timeout) }) }) }) // end of Describe @@ -96,6 +96,10 @@ func pingTestJobSet(ns *corev1.Namespace) *testing.JobSetWrapper { podHostnames = append(podHostnames, fmt.Sprintf("%s-%s-%d-0.%s-%s", jsName, rjobName, jobIdx, jsName, rjobName)) } + // This bash script loops infinitely until it successfully pings all pods by hostname. + // Once successful, it sleeps for 5 seconds to reduce flakiness, since occasionally + // all pods but one will successfully ping eachother and complete before the last one + // successfully pings them all, resulting in a failed test run. cmd := fmt.Sprintf(`for pod in {"%s","%s","%s","%s"} do gotStatus="-1" @@ -110,7 +114,8 @@ do fi done echo "Successfully pinged pod: $pod" -done`, podHostnames[0], podHostnames[1], podHostnames[2], podHostnames[3]) +done +sleep 5`, podHostnames[0], podHostnames[1], podHostnames[2], podHostnames[3]) return testing.MakeJobSet(jsName, ns.Name). ReplicatedJob(testing.MakeReplicatedJob(rjobName). diff --git a/test/integration/controller/jobset_controller_test.go b/test/integration/controller/jobset_controller_test.go index 4c7e80e3a..151dd0b31 100644 --- a/test/integration/controller/jobset_controller_test.go +++ b/test/integration/controller/jobset_controller_test.go @@ -203,7 +203,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { jobUpdateFn func(*batchv1.JobList) checkJobSetState func(*jobset.JobSet) checkJobState func(*jobset.JobSet) - checkJobSetCondition func(*jobset.JobSet) + checkJobSetCondition func(context.Context, client.Client, *jobset.JobSet, time.Duration) } type testCase struct { @@ -228,7 +228,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { gomega.Eventually(k8sClient.Get(ctx, types.NamespacedName{Name: js.Name, Namespace: js.Namespace}, &jobset.JobSet{}), timeout, interval).Should(gomega.Succeed()) ginkgo.By("checking all jobs were created successfully") - gomega.Eventually(util.CheckNumJobs, timeout, interval).WithArguments(ctx, k8sClient, js).Should(gomega.Equal(util.NumExpectedJobs(js))) + gomega.Eventually(util.NumJobs, timeout, interval).WithArguments(ctx, k8sClient, js).Should(gomega.Equal(util.NumExpectedJobs(js))) // Perform a series of updates to jobset resources and check resulting jobset state after each update. for _, up := range tc.updates { @@ -253,7 +253,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { // Check jobset status if specified. if up.checkJobSetCondition != nil { - up.checkJobSetCondition(&jobSet) + up.checkJobSetCondition(ctx, k8sClient, &jobSet, timeout) } } }, @@ -262,7 +262,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { updates: []*update{ { jobUpdateFn: completeAllJobs, - checkJobSetCondition: jobSetCompleted, + checkJobSetCondition: util.JobSetCompleted, }, }, }), @@ -276,10 +276,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { completeJob(&jobList.Items[i]) } }, - checkJobSetCondition: func(js *jobset.JobSet) { - ginkgo.By(fmt.Sprintf(`checking jobset status is active`)) - gomega.Consistently(util.CheckJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, []metav1.Condition{}).Should(gomega.Equal(true)) - }, + checkJobSetCondition: util.JobSetActive, }, }, }), @@ -290,7 +287,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { jobUpdateFn: func(jobList *batchv1.JobList) { failJob(&jobList.Items[0]) }, - checkJobSetCondition: jobSetFailed, + checkJobSetCondition: util.JobSetFailed, }, }, }), @@ -302,7 +299,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { }, { jobUpdateFn: completeAllJobs, - checkJobSetCondition: jobSetCompleted, + checkJobSetCondition: util.JobSetCompleted, }, }, }), @@ -314,7 +311,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { }, { jobUpdateFn: completeAllJobs, - checkJobSetCondition: jobSetCompleted, + checkJobSetCondition: util.JobSetCompleted, }, }, }), @@ -328,7 +325,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { jobUpdateFn: func(jobList *batchv1.JobList) { failJob(&jobList.Items[0]) }, - checkJobSetCondition: jobSetFailed, + checkJobSetCondition: util.JobSetFailed, }, }, }), @@ -353,7 +350,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { jobUpdateFn: func(jobList *batchv1.JobList) { failJob(&jobList.Items[1]) }, - checkJobSetCondition: jobSetFailed, + checkJobSetCondition: util.JobSetFailed, }, }, }), @@ -377,7 +374,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { }, { jobUpdateFn: completeAllJobs, - checkJobSetCondition: jobSetCompleted, + checkJobSetCondition: util.JobSetCompleted, }, }, }), @@ -392,7 +389,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { ginkgo.By("checking all jobs are suspended") gomega.Eventually(matchJobsSuspendState, timeout, interval).WithArguments(js, true).Should(gomega.Equal(true)) }, - checkJobSetCondition: jobSetSuspended, + checkJobSetCondition: util.JobSetSuspended, }, }, }), @@ -406,7 +403,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { ginkgo.By("checking all jobs are suspended") gomega.Eventually(matchJobsSuspendState, timeout, interval).WithArguments(js, true).Should(gomega.Equal(true)) }, - checkJobSetCondition: jobSetSuspended, + checkJobSetCondition: util.JobSetSuspended, }, { jobSetUpdateFn: func(js *jobset.JobSet) { @@ -416,11 +413,11 @@ var _ = ginkgo.Describe("JobSet controller", func() { ginkgo.By("checking all jobs are resumed") gomega.Eventually(matchJobsSuspendState, timeout, interval).WithArguments(js, false).Should(gomega.Equal(true)) }, - checkJobSetCondition: jobSetResumed, + checkJobSetCondition: util.JobSetResumed, }, { jobUpdateFn: completeAllJobs, - checkJobSetCondition: jobSetCompleted, + checkJobSetCondition: util.JobSetCompleted, }, }, }), @@ -443,7 +440,7 @@ var _ = ginkgo.Describe("JobSet controller", func() { ginkgo.By("checking all jobs are suspended") gomega.Eventually(matchJobsSuspendState, timeout, interval).WithArguments(js, true).Should(gomega.Equal(true)) }, - checkJobSetCondition: jobSetSuspended, + checkJobSetCondition: util.JobSetSuspended, }, }, }), @@ -558,50 +555,6 @@ func checkExpectedServices(js *jobset.JobSet) { }).Should(gomega.Equal(numExpectedServices(js))) } -func jobSetCompleted(js *jobset.JobSet) { - ginkgo.By(fmt.Sprintf("checking jobset status is: %s", jobset.JobSetCompleted)) - conditions := []metav1.Condition{ - { - Type: string(jobset.JobSetCompleted), - Status: metav1.ConditionTrue, - }, - } - gomega.Eventually(util.CheckJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, conditions).Should(gomega.Equal(true)) -} - -func jobSetFailed(js *jobset.JobSet) { - ginkgo.By(fmt.Sprintf("checking jobset status is: %s", jobset.JobSetFailed)) - conditions := []metav1.Condition{ - { - Type: string(jobset.JobSetFailed), - Status: metav1.ConditionTrue, - }, - } - gomega.Eventually(util.CheckJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, conditions).Should(gomega.Equal(true)) -} - -func jobSetSuspended(js *jobset.JobSet) { - ginkgo.By(fmt.Sprintf("checking jobset status is: %s", jobset.JobSetSuspended)) - conditions := []metav1.Condition{ - { - Type: string(jobset.JobSetSuspended), - Status: metav1.ConditionTrue, - }, - } - gomega.Eventually(util.CheckJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, conditions).Should(gomega.Equal(true)) -} - -func jobSetResumed(js *jobset.JobSet) { - ginkgo.By("checking jobset status is resumed") - conditions := []metav1.Condition{ - { - Type: string(jobset.JobSetSuspended), - Status: metav1.ConditionFalse, - }, - } - gomega.Eventually(util.CheckJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, conditions).Should(gomega.Equal(true)) -} - // 2 replicated jobs: // - one with 1 replica // - one with 3 replicas and DNS hostnames enabled diff --git a/test/util/util.go b/test/util/util.go index 03ff9fe66..c760de2c3 100644 --- a/test/util/util.go +++ b/test/util/util.go @@ -15,7 +15,11 @@ package util import ( "context" + "fmt" + "time" + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" batchv1 "k8s.io/api/batch/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -24,20 +28,7 @@ import ( jobset "sigs.k8s.io/jobset/api/v1alpha1" ) -func CheckJobSetStatus(ctx context.Context, k8sClient client.Client, js *jobset.JobSet, conditions []metav1.Condition) (bool, error) { - var fetchedJS jobset.JobSet - if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: js.Namespace, Name: js.Name}, &fetchedJS); err != nil { - return false, err - } - for _, want := range conditions { - for _, c := range fetchedJS.Status.Conditions { - if c.Type == want.Type && c.Status == want.Status { - return true, nil - } - } - } - return len(conditions) == len(fetchedJS.Status.Conditions), nil -} +const interval = time.Millisecond * 250 func NumExpectedJobs(js *jobset.JobSet) int { expectedJobs := 0 @@ -47,10 +38,74 @@ func NumExpectedJobs(js *jobset.JobSet) int { return expectedJobs } -func CheckNumJobs(ctx context.Context, k8sClient client.Client, js *jobset.JobSet) (int, error) { +func NumJobs(ctx context.Context, k8sClient client.Client, js *jobset.JobSet) (int, error) { var jobList batchv1.JobList if err := k8sClient.List(ctx, &jobList, client.InNamespace(js.Namespace)); err != nil { return -1, err } return len(jobList.Items), nil } + +func JobSetCompleted(ctx context.Context, k8sClient client.Client, js *jobset.JobSet, timeout time.Duration) { + ginkgo.By(fmt.Sprintf("checking jobset status is: %s", jobset.JobSetCompleted)) + conditions := []metav1.Condition{ + { + Type: string(jobset.JobSetCompleted), + Status: metav1.ConditionTrue, + }, + } + gomega.Eventually(checkJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, conditions).Should(gomega.Equal(true)) +} + +func JobSetFailed(ctx context.Context, k8sClient client.Client, js *jobset.JobSet, timeout time.Duration) { + ginkgo.By(fmt.Sprintf("checking jobset status is: %s", jobset.JobSetFailed)) + conditions := []metav1.Condition{ + { + Type: string(jobset.JobSetFailed), + Status: metav1.ConditionTrue, + }, + } + gomega.Eventually(checkJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, conditions).Should(gomega.Equal(true)) +} + +func JobSetSuspended(ctx context.Context, k8sClient client.Client, js *jobset.JobSet, timeout time.Duration) { + ginkgo.By(fmt.Sprintf("checking jobset status is: %s", jobset.JobSetSuspended)) + conditions := []metav1.Condition{ + { + Type: string(jobset.JobSetSuspended), + Status: metav1.ConditionTrue, + }, + } + gomega.Eventually(checkJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, conditions).Should(gomega.Equal(true)) +} + +func JobSetResumed(ctx context.Context, k8sClient client.Client, js *jobset.JobSet, timeout time.Duration) { + ginkgo.By("checking jobset status is resumed") + conditions := []metav1.Condition{ + { + Type: string(jobset.JobSetSuspended), + Status: metav1.ConditionFalse, + }, + } + gomega.Eventually(checkJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, conditions).Should(gomega.Equal(true)) +} + +func JobSetActive(ctx context.Context, k8sClient client.Client, js *jobset.JobSet, timeout time.Duration) { + ginkgo.By("checking jobset status is active") + gomega.Consistently(checkJobSetStatus, timeout, interval).WithArguments(ctx, k8sClient, js, []metav1.Condition{}).Should(gomega.Equal(true)) +} + +func checkJobSetStatus(ctx context.Context, k8sClient client.Client, js *jobset.JobSet, conditions []metav1.Condition) (bool, error) { + var fetchedJS jobset.JobSet + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: js.Namespace, Name: js.Name}, &fetchedJS); err != nil { + return false, err + } + for _, want := range conditions { + for _, c := range fetchedJS.Status.Conditions { + if c.Type == want.Type && c.Status == want.Status { + return true, nil + } + } + } + return len(conditions) == len(fetchedJS.Status.Conditions), nil +} From c176aa7176c3eba29ad8969653620bf29333ea3f Mon Sep 17 00:00:00 2001 From: Daniel Vega-Myhre Date: Sat, 6 May 2023 18:11:52 +0000 Subject: [PATCH 6/6] fix checkJobSetStatus --- test/util/util.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/util/util.go b/test/util/util.go index c760de2c3..65de801cf 100644 --- a/test/util/util.go +++ b/test/util/util.go @@ -100,12 +100,13 @@ func checkJobSetStatus(ctx context.Context, k8sClient client.Client, js *jobset. if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: js.Namespace, Name: js.Name}, &fetchedJS); err != nil { return false, err } + found := 0 for _, want := range conditions { for _, c := range fetchedJS.Status.Conditions { if c.Type == want.Type && c.Status == want.Status { - return true, nil + found += 1 } } } - return len(conditions) == len(fetchedJS.Status.Conditions), nil + return found == len(conditions), nil }