Skip to content

Commit

Permalink
Feature/support pytorchjob set queue of volcano (#1415)
Browse files Browse the repository at this point in the history
* support pytorch use volcano-queue

* support pytorch use volcano-queue

Signed-off-by: bert.li <qiankun.li@qq.com>

* set SchedulingPolicy for runPolicy

Signed-off-by: bert.li <qiankun.li@qq.com>

* use pytorchjob.Spec.RunPolicy directly
  • Loading branch information
qiankunli authored Sep 24, 2021
1 parent 7fb592f commit 557ba80
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 20 deletions.
11 changes: 1 addition & 10 deletions pkg/controller.v1/mxnet/mxjob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,17 +166,8 @@ func (r *MXJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
replicas[commonv1.ReplicaType(k)] = v
}

// Construct RunPolicy based on MXJob.Spec
runPolicy := &commonv1.RunPolicy{
CleanPodPolicy: mxjob.Spec.RunPolicy.CleanPodPolicy,
TTLSecondsAfterFinished: mxjob.Spec.RunPolicy.TTLSecondsAfterFinished,
ActiveDeadlineSeconds: mxjob.Spec.RunPolicy.ActiveDeadlineSeconds,
BackoffLimit: mxjob.Spec.RunPolicy.BackoffLimit,
SchedulingPolicy: nil,
}

// Use common to reconcile the job related pod and service
err = r.ReconcileJobs(mxjob, replicas, mxjob.Status, runPolicy)
err = r.ReconcileJobs(mxjob, replicas, mxjob.Status, &mxjob.Spec.RunPolicy)
if err != nil {
logrus.Warnf("Reconcile MX Job error %v", err)
return ctrl.Result{}, err
Expand Down
11 changes: 1 addition & 10 deletions pkg/controller.v1/pytorch/pytorchjob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,17 +155,8 @@ func (r *PyTorchJobReconciler) Reconcile(ctx context.Context, req ctrl.Request)
// Set default priorities to pytorch job
r.Scheme.Default(pytorchjob)

// Construct RunPolicy based on PyTorchJob.Spec
runPolicy := &commonv1.RunPolicy{
CleanPodPolicy: pytorchjob.Spec.RunPolicy.CleanPodPolicy,
TTLSecondsAfterFinished: pytorchjob.Spec.RunPolicy.TTLSecondsAfterFinished,
ActiveDeadlineSeconds: pytorchjob.Spec.RunPolicy.ActiveDeadlineSeconds,
BackoffLimit: pytorchjob.Spec.RunPolicy.BackoffLimit,
SchedulingPolicy: nil,
}

// Use common to reconcile the job related pod and service
err = r.ReconcileJobs(pytorchjob, pytorchjob.Spec.PyTorchReplicaSpecs, pytorchjob.Status, runPolicy)
err = r.ReconcileJobs(pytorchjob, pytorchjob.Spec.PyTorchReplicaSpecs, pytorchjob.Status, &pytorchjob.Spec.RunPolicy)
if err != nil {
logrus.Warnf("Reconcile PyTorch Job error %v", err)
return ctrl.Result{}, err
Expand Down

0 comments on commit 557ba80

Please sign in to comment.