Skip to content

Commit

Permalink
Remove Info from Initializer builder
Browse files Browse the repository at this point in the history
Signed-off-by: Andrey Velichkevich <andrey.velichkevich@gmail.com>
  • Loading branch information
andreyvelich committed Nov 1, 2024
1 parent ba9e215 commit 58d563d
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 12 deletions.
2 changes: 1 addition & 1 deletion pkg/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ var (
// JobCompletionIndexFieldPath is the field path for the Job completion index annotation.
JobCompletionIndexFieldPath string = fmt.Sprintf("metadata.annotations['%s']", batchv1.JobCompletionIndexAnnotation)

// This is temp container that we use in the initializer ReplicatedJob.
// This is the temporary container that we use in the initializer ReplicatedJob.
// TODO (andreyvelich): Once JobSet supports execution policy, we can remove it.
// TODO (andreyvelich): Add validation to check that initializer ReplicatedJob has this container.
ContainerBusyBox corev1.Container = corev1.Container{
Expand Down
4 changes: 2 additions & 2 deletions pkg/runtime.v2/core/trainingruntime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ func TestTrainingRuntimeNewObjects(t *testing.T) {
MinMember(31). // 31 replicas = 30 Trainer nodes + 1 Initializer.
MinResources(corev1.ResourceList{
// Every replica has 1 CPU = 31 CPUs in total.
// Since initializers use init containers, they execute sequentially.
// MinResources is equal to the maximum from the initContainer resources.
// Initializer uses InitContainers which execute sequentially.
// Thus, the MinResources is equal to the maximum from the initContainer resources.
corev1.ResourceCPU: resource.MustParse("31"),
}).
SchedulingTimeout(120).
Expand Down
14 changes: 6 additions & 8 deletions pkg/runtime.v2/framework/plugins/jobset/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func NewBuilder(objectKey client.ObjectKey, jobSetTemplateSpec kubeflowv2.JobSet
}
}

// mergeInitializerEnvs merge the TrainJob and Runtime Pod envs.
// mergeInitializerEnvs merges the TrainJob and Runtime Pod envs.
func mergeInitializerEnvs(storageUri *string, trainJobEnvs, containerEnv []corev1.EnvVar) []corev1.EnvVar {
envNames := sets.New[string]()
envs := []corev1.EnvVar{}
Expand All @@ -66,11 +66,9 @@ func mergeInitializerEnvs(storageUri *string, trainJobEnvs, containerEnv []corev
}
// Add the rest TrainJob envs.
// TODO (andreyvelich): Validate that TrainJob dataset and model envs don't have the STORAGE_URI env.
if trainJobEnvs != nil {
for _, e := range trainJobEnvs {
envNames.Insert(e.Name)
envs = append(envs, e)
}
for _, e := range trainJobEnvs {
envNames.Insert(e.Name)
envs = append(envs, e)
}

// TrainJob envs take precedence over the TrainingRuntime envs.
Expand All @@ -83,7 +81,7 @@ func mergeInitializerEnvs(storageUri *string, trainJobEnvs, containerEnv []corev
}

// Initializer updates JobSet values for the initializer Job.
func (b *Builder) Initializer(info *runtime.Info, trainJob *kubeflowv2.TrainJob) *Builder {
func (b *Builder) Initializer(trainJob *kubeflowv2.TrainJob) *Builder {
for i, rJob := range b.Spec.ReplicatedJobs {
if rJob.Name == constants.JobInitializer {
// TODO (andreyvelich): Currently, we use initContainers for the initializers.
Expand All @@ -110,7 +108,7 @@ func (b *Builder) Initializer(info *runtime.Info, trainJob *kubeflowv2.TrainJob)
)
}
}
// TODO (andreyvelich): Add support for the model exporter when we support it.
// TODO (andreyvelich): Add the model exporter when we support it.
// Update values for the model initializer container.
if container.Name == constants.ContainerModelInitializer && trainJob.Spec.ModelConfig != nil && trainJob.Spec.ModelConfig.Input != nil {
// Update the model initializer envs.
Expand Down
2 changes: 1 addition & 1 deletion pkg/runtime.v2/framework/plugins/jobset/jobset.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func (j *JobSet) Build(ctx context.Context, runtimeJobTemplate client.Object, in
// TODO (andreyvelich): Add support for the PodSpecOverride.
// TODO (andreyvelich): Refactor the builder with wrappers for PodSpec.
jobSet := jobSetBuilder.
Initializer(info, trainJob).
Initializer(trainJob).
Trainer(info, trainJob).
PodLabels(info.PodLabels).
Suspend(trainJob.Spec.Suspend).
Expand Down

0 comments on commit 58d563d

Please sign in to comment.