Skip to content

Commit

Permalink
Add global-job-replicas label/annotation (#677)
Browse files Browse the repository at this point in the history
* Add global-job-replicas label/annotation

* Change the name to global replicas
  • Loading branch information
GiuseppeTT authored Sep 18, 2024
1 parent 0f08912 commit ef7f910
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 0 deletions.
4 changes: 4 additions & 0 deletions api/jobset/v1alpha2/jobset_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ import (
const (
JobSetNameKey string = "jobset.sigs.k8s.io/jobset-name"
ReplicatedJobReplicas string = "jobset.sigs.k8s.io/replicatedjob-replicas"
// GlobalReplicasKey is a label/annotation set to the total number of replicatedJob replicas.
// For each JobSet, this value will be equal to the sum of `replicas`, where `replicas`
// is equal to jobset.spec.replicatedJobs[*].replicas.
GlobalReplicasKey string = "jobset.sigs.k8s.io/global-replicas"
// ReplicatedJobNameKey is used to index into a Jobs labels and retrieve the name of the parent ReplicatedJob
ReplicatedJobNameKey string = "jobset.sigs.k8s.io/replicatedjob-name"
// JobIndexKey is a label/annotation set to the index of the Job replica within its parent replicatedJob.
Expand Down
11 changes: 11 additions & 0 deletions pkg/controllers/jobset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,7 @@ func labelAndAnnotateObject(obj metav1.Object, js *jobset.JobSet, rjob *jobset.R
labels[jobset.ReplicatedJobNameKey] = rjob.Name
labels[constants.RestartsKey] = strconv.Itoa(int(js.Status.Restarts))
labels[jobset.ReplicatedJobReplicas] = strconv.Itoa(int(rjob.Replicas))
labels[jobset.GlobalReplicasKey] = globalReplicas(js)
labels[jobset.JobIndexKey] = strconv.Itoa(jobIdx)
labels[jobset.JobKey] = jobHashKey(js.Namespace, jobName)
labels[jobset.JobGlobalIndexKey] = globalJobIndex(js, rjob.Name, jobIdx)
Expand All @@ -738,6 +739,7 @@ func labelAndAnnotateObject(obj metav1.Object, js *jobset.JobSet, rjob *jobset.R
annotations[jobset.ReplicatedJobNameKey] = rjob.Name
annotations[constants.RestartsKey] = strconv.Itoa(int(js.Status.Restarts))
annotations[jobset.ReplicatedJobReplicas] = strconv.Itoa(int(rjob.Replicas))
annotations[jobset.GlobalReplicasKey] = globalReplicas(js)
annotations[jobset.JobIndexKey] = strconv.Itoa(jobIdx)
annotations[jobset.JobKey] = jobHashKey(js.Namespace, jobName)
annotations[jobset.JobGlobalIndexKey] = globalJobIndex(js, rjob.Name, jobIdx)
Expand Down Expand Up @@ -1063,3 +1065,12 @@ func globalJobIndex(js *jobset.JobSet, replicatedJobName string, jobIdx int) str
}
return ""
}

// globalReplicas calculates the total number of replicas across all replicated jobs in a JobSet.
func globalReplicas(js *jobset.JobSet) string {
currGlobalReplicas := 0
for _, rjob := range js.Spec.ReplicatedJobs {
currGlobalReplicas += int(rjob.Replicas)
}
return strconv.Itoa(currGlobalReplicas)
}
67 changes: 67 additions & 0 deletions pkg/controllers/jobset_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,7 @@ func TestConstructJobsFromTemplate(t *testing.T) {
// full JobSet spec to calculate a unique ID for each Job.
for _, expectedJob := range tc.want {
addJobGlobalIndex(t, tc.js, expectedJob)
addGlobalReplicas(t, tc.js, expectedJob)
}

// Now get the actual output of constructJobsFromTemplate, and diff the results.
Expand Down Expand Up @@ -728,6 +729,18 @@ func addJobGlobalIndex(t *testing.T, js *jobset.JobSet, job *batchv1.Job) {
job.Spec.Template.Annotations[jobset.JobGlobalIndexKey] = globalJobIndex(js, rjobName, jobIdx)
}

func addGlobalReplicas(t *testing.T, js *jobset.JobSet, job *batchv1.Job) {
t.Helper()

// Job label/annotation
job.Labels[jobset.GlobalReplicasKey] = globalReplicas(js)
job.Annotations[jobset.GlobalReplicasKey] = globalReplicas(js)

// Job template spec label/annotation
job.Spec.Template.Labels[jobset.GlobalReplicasKey] = globalReplicas(js)
job.Spec.Template.Annotations[jobset.GlobalReplicasKey] = globalReplicas(js)
}

func TestUpdateConditions(t *testing.T) {
var (
jobSetName = "test-jobset"
Expand Down Expand Up @@ -1471,3 +1484,57 @@ func TestGlobalJobIndex(t *testing.T) {
})
}
}

func TestGlobalReplicas(t *testing.T) {
tests := []struct {
name string
jobSet *jobset.JobSet
expectedGlobalReplicas string
}{
{
name: "empty jobset",
jobSet: &jobset.JobSet{
Spec: jobset.JobSetSpec{},
},
expectedGlobalReplicas: "0",
},
{
name: "single replicated job",
jobSet: &jobset.JobSet{
Spec: jobset.JobSetSpec{
ReplicatedJobs: []jobset.ReplicatedJob{
{
Replicas: 3,
},
},
},
},
expectedGlobalReplicas: "3",
},
{
name: "multiple replicated jobs",
jobSet: &jobset.JobSet{
Spec: jobset.JobSetSpec{
ReplicatedJobs: []jobset.ReplicatedJob{
{
Replicas: 2,
},
{
Replicas: 5,
},
},
},
},
expectedGlobalReplicas: "7",
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
actualGlobalReplicas := globalReplicas(tc.jobSet)
if diff := cmp.Diff(tc.expectedGlobalReplicas, actualGlobalReplicas); diff != "" {
t.Errorf("unexpected global replicas (-want/+got): %s", diff)
}
})
}
}

0 comments on commit ef7f910

Please sign in to comment.