Skip to content

Commit

Permalink
Upgrade RayJob API to v1 (kubernetes-sigs#1802)
Browse files Browse the repository at this point in the history
  • Loading branch information
astefanutti authored and vsoch committed Apr 18, 2024
1 parent 18a3852 commit 3f8c555
Show file tree
Hide file tree
Showing 14 changed files with 97 additions and 99 deletions.
8 changes: 4 additions & 4 deletions charts/kueue/templates/webhook/webhook.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -222,14 +222,14 @@ webhooks:
service:
name: '{{ include "kueue.fullname" . }}-webhook-service'
namespace: '{{ .Release.Namespace }}'
path: /mutate-ray-io-v1alpha1-rayjob
path: /mutate-ray-io-v1-rayjob
failurePolicy: Fail
name: mrayjob.kb.io
rules:
- apiGroups:
- ray.io
apiVersions:
- v1alpha1
- v1
operations:
- CREATE
resources:
Expand Down Expand Up @@ -526,14 +526,14 @@ webhooks:
service:
name: '{{ include "kueue.fullname" . }}-webhook-service'
namespace: '{{ .Release.Namespace }}'
path: /validate-ray-io-v1alpha1-rayjob
path: /validate-ray-io-v1-rayjob
failurePolicy: Fail
name: vrayjob.kb.io
rules:
- apiGroups:
- ray.io
apiVersions:
- v1alpha1
- v1
operations:
- CREATE
- UPDATE
Expand Down
8 changes: 4 additions & 4 deletions config/components/webhook/manifests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -200,14 +200,14 @@ webhooks:
service:
name: webhook-service
namespace: system
path: /mutate-ray-io-v1alpha1-rayjob
path: /mutate-ray-io-v1-rayjob
failurePolicy: Fail
name: mrayjob.kb.io
rules:
- apiGroups:
- ray.io
apiVersions:
- v1alpha1
- v1
operations:
- CREATE
resources:
Expand Down Expand Up @@ -482,14 +482,14 @@ webhooks:
service:
name: webhook-service
namespace: system
path: /validate-ray-io-v1alpha1-rayjob
path: /validate-ray-io-v1-rayjob
failurePolicy: Fail
name: vrayjob.kb.io
rules:
- apiGroups:
- ray.io
apiVersions:
- v1alpha1
- v1
operations:
- CREATE
- UPDATE
Expand Down
3 changes: 1 addition & 2 deletions pkg/controller/jobframework/setup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
kubeflow "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1"
kftraining "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
rayjobapi "github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
apimeta "k8s.io/apimachinery/pkg/api/meta"
Expand Down Expand Up @@ -75,7 +74,7 @@ func TestSetupControllers(t *testing.T) {
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
_, logger := utiltesting.ContextWithLog(t)
k8sClient := utiltesting.NewClientBuilder(jobset.AddToScheme, kubeflow.AddToScheme, rayjobapi.AddToScheme, kftraining.AddToScheme, rayv1.AddToScheme).Build()
k8sClient := utiltesting.NewClientBuilder(jobset.AddToScheme, kubeflow.AddToScheme, kftraining.AddToScheme, rayv1.AddToScheme).Build()

mgrOpts := ctrlmgr.Options{
Scheme: k8sClient.Scheme(),
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/jobs/pod/pod_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1171,7 +1171,7 @@ func (p *Pod) ReclaimablePods() ([]kueue.ReclaimablePod, error) {

func IsPodOwnerManagedByKueue(p *Pod) bool {
if owner := metav1.GetControllerOf(&p.pod); owner != nil {
return jobframework.IsOwnerManagedByKueue(owner) || (owner.Kind == "RayCluster" && strings.HasPrefix(owner.APIVersion, "ray.io/v1alpha1"))
return jobframework.IsOwnerManagedByKueue(owner) || (owner.Kind == "RayCluster" && (strings.HasPrefix(owner.APIVersion, "ray.io/v1alpha1") || strings.HasPrefix(owner.APIVersion, "ray.io/v1")))
}
return false
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/controller/jobs/pod/pod_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
rayjobapi "github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1"
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -120,11 +120,11 @@ func TestDefault(t *testing.T) {
namespaceSelector: defaultNamespaceSelector,
pod: testingpod.MakePod("test-pod", defaultNamespace.Name).
Queue("test-queue").
OwnerReference("parent-ray-cluster", rayjobapi.GroupVersion.WithKind("RayCluster")).
OwnerReference("parent-ray-cluster", rayv1.GroupVersion.WithKind("RayCluster")).
Obj(),
want: testingpod.MakePod("test-pod", defaultNamespace.Name).
Queue("test-queue").
OwnerReference("parent-ray-cluster", rayjobapi.GroupVersion.WithKind("RayCluster")).
OwnerReference("parent-ray-cluster", rayv1.GroupVersion.WithKind("RayCluster")).
Obj(),
},
"pod with owner managed by kueue (MPIJob)": {
Expand Down
18 changes: 9 additions & 9 deletions pkg/controller/jobs/rayjob/rayjob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
"context"
"strings"

rayjobapi "github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1"
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
Expand All @@ -33,7 +33,7 @@ import (
)

var (
gvk = rayjobapi.GroupVersion.WithKind("RayJob")
gvk = rayv1.GroupVersion.WithKind("RayJob")
)

const (
Expand All @@ -46,8 +46,8 @@ func init() {
SetupIndexes: SetupIndexes,
NewReconciler: NewReconciler,
SetupWebhook: SetupRayJobWebhook,
JobType: &rayjobapi.RayJob{},
AddToScheme: rayjobapi.AddToScheme,
JobType: &rayv1.RayJob{},
AddToScheme: rayv1.AddToScheme,
IsManagingObjectsOwner: isRayJob,
}))
}
Expand All @@ -64,20 +64,20 @@ func init() {

var NewReconciler = jobframework.NewGenericReconcilerFactory(func() jobframework.GenericJob { return &RayJob{} })

type RayJob rayjobapi.RayJob
type RayJob rayv1.RayJob

var _ jobframework.GenericJob = (*RayJob)(nil)

func (j *RayJob) Object() client.Object {
return (*rayjobapi.RayJob)(j)
return (*rayv1.RayJob)(j)
}

func (j *RayJob) IsSuspended() bool {
return j.Spec.Suspend
}

func (j *RayJob) IsActive() bool {
return j.Status.JobDeploymentStatus != rayjobapi.JobDeploymentStatusSuspended
return j.Status.JobDeploymentStatus != rayv1.JobDeploymentStatusSuspended
}

func (j *RayJob) Suspend() {
Expand Down Expand Up @@ -168,11 +168,11 @@ func (j *RayJob) Finished() (metav1.Condition, bool) {
Message: j.Status.Message,
}

return condition, j.Status.JobStatus == rayjobapi.JobStatusFailed || j.Status.JobStatus == rayjobapi.JobStatusSucceeded
return condition, j.Status.JobStatus == rayv1.JobStatusFailed || j.Status.JobStatus == rayv1.JobStatusSucceeded
}

func (j *RayJob) PodsReady() bool {
return j.Status.RayClusterStatus.State == rayjobapi.Ready
return j.Status.RayClusterStatus.State == rayv1.Ready
}

func SetupIndexes(ctx context.Context, indexer client.FieldIndexer) error {
Expand Down
26 changes: 13 additions & 13 deletions pkg/controller/jobs/rayjob/rayjob_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
rayjobapi "github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1"
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/utils/ptr"

Expand All @@ -33,7 +33,7 @@ import (
func TestPodSets(t *testing.T) {
job := testingrayutil.MakeJob("job", "ns").
WithHeadGroupSpec(
rayjobapi.HeadGroupSpec{
rayv1.HeadGroupSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
Expand All @@ -46,7 +46,7 @@ func TestPodSets(t *testing.T) {
},
).
WithWorkerGroups(
rayjobapi.WorkerGroupSpec{
rayv1.WorkerGroupSpec{
GroupName: "group1",
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Expand All @@ -58,7 +58,7 @@ func TestPodSets(t *testing.T) {
},
},
},
rayjobapi.WorkerGroupSpec{
rayv1.WorkerGroupSpec{
GroupName: "group2",
Replicas: ptr.To[int32](3),
Template: corev1.PodTemplateSpec{
Expand Down Expand Up @@ -125,22 +125,22 @@ func TestPodSets(t *testing.T) {

func TestNodeSelectors(t *testing.T) {
baseJob := testingrayutil.MakeJob("job", "ns").
WithHeadGroupSpec(rayjobapi.HeadGroupSpec{
WithHeadGroupSpec(rayv1.HeadGroupSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
NodeSelector: map[string]string{},
},
},
}).
WithWorkerGroups(rayjobapi.WorkerGroupSpec{
WithWorkerGroups(rayv1.WorkerGroupSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
NodeSelector: map[string]string{
"key-wg1": "value-wg1",
},
},
},
}, rayjobapi.WorkerGroupSpec{
}, rayv1.WorkerGroupSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
NodeSelector: map[string]string{
Expand All @@ -152,12 +152,12 @@ func TestNodeSelectors(t *testing.T) {
Obj()

cases := map[string]struct {
job *rayjobapi.RayJob
job *rayv1.RayJob
runInfo []podset.PodSetInfo
restoreInfo []podset.PodSetInfo
wantRunError error
wantAfterRun *rayjobapi.RayJob
wantFinal *rayjobapi.RayJob
wantAfterRun *rayv1.RayJob
wantFinal *rayv1.RayJob
}{
"valid configuration": {
job: baseJob.DeepCopy(),
Expand Down Expand Up @@ -197,7 +197,7 @@ func TestNodeSelectors(t *testing.T) {
},
wantAfterRun: testingrayutil.MakeJob("job", "ns").
Suspend(false).
WithHeadGroupSpec(rayjobapi.HeadGroupSpec{
WithHeadGroupSpec(rayv1.HeadGroupSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
NodeSelector: map[string]string{
Expand All @@ -206,15 +206,15 @@ func TestNodeSelectors(t *testing.T) {
},
},
}).
WithWorkerGroups(rayjobapi.WorkerGroupSpec{
WithWorkerGroups(rayv1.WorkerGroupSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
NodeSelector: map[string]string{
"key-wg1": "value-wg1",
},
},
},
}, rayjobapi.WorkerGroupSpec{
}, rayv1.WorkerGroupSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
NodeSelector: map[string]string{
Expand Down
20 changes: 10 additions & 10 deletions pkg/controller/jobs/rayjob/rayjob_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
"context"
"fmt"

rayjobapi "github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1"
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/klog/v2"
Expand All @@ -37,45 +37,45 @@ type RayJobWebhook struct {
manageJobsWithoutQueueName bool
}

// SetupRayJobWebhook configures the webhook for rayjobapi RayJob.
// SetupRayJobWebhook configures the webhook for RayJob.
func SetupRayJobWebhook(mgr ctrl.Manager, opts ...jobframework.Option) error {
options := jobframework.ProcessOptions(opts...)
wh := &RayJobWebhook{
manageJobsWithoutQueueName: options.ManageJobsWithoutQueueName,
}
return ctrl.NewWebhookManagedBy(mgr).
For(&rayjobapi.RayJob{}).
For(&rayv1.RayJob{}).
WithDefaulter(wh).
WithValidator(wh).
Complete()
}

// +kubebuilder:webhook:path=/mutate-ray-io-v1alpha1-rayjob,mutating=true,failurePolicy=fail,sideEffects=None,groups=ray.io,resources=rayjobs,verbs=create,versions=v1alpha1,name=mrayjob.kb.io,admissionReviewVersions=v1
// +kubebuilder:webhook:path=/mutate-ray-io-v1-rayjob,mutating=true,failurePolicy=fail,sideEffects=None,groups=ray.io,resources=rayjobs,verbs=create,versions=v1,name=mrayjob.kb.io,admissionReviewVersions=v1

var _ webhook.CustomDefaulter = &RayJobWebhook{}

// Default implements webhook.CustomDefaulter so a webhook will be registered for the type
func (w *RayJobWebhook) Default(ctx context.Context, obj runtime.Object) error {
job := obj.(*rayjobapi.RayJob)
job := obj.(*rayv1.RayJob)
log := ctrl.LoggerFrom(ctx).WithName("rayjob-webhook")
log.V(5).Info("Applying defaults", "job", klog.KObj(job))
jobframework.ApplyDefaultForSuspend((*RayJob)(job), w.manageJobsWithoutQueueName)
return nil
}

// +kubebuilder:webhook:path=/validate-ray-io-v1alpha1-rayjob,mutating=false,failurePolicy=fail,sideEffects=None,groups=ray.io,resources=rayjobs,verbs=create;update,versions=v1alpha1,name=vrayjob.kb.io,admissionReviewVersions=v1
// +kubebuilder:webhook:path=/validate-ray-io-v1-rayjob,mutating=false,failurePolicy=fail,sideEffects=None,groups=ray.io,resources=rayjobs,verbs=create;update,versions=v1,name=vrayjob.kb.io,admissionReviewVersions=v1

var _ webhook.CustomValidator = &RayJobWebhook{}

// ValidateCreate implements webhook.CustomValidator so a webhook will be registered for the type
func (w *RayJobWebhook) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
job := obj.(*rayjobapi.RayJob)
job := obj.(*rayv1.RayJob)
log := ctrl.LoggerFrom(ctx).WithName("rayjob-webhook")
log.Info("Validating create", "job", klog.KObj(job))
return nil, w.validateCreate(job).ToAggregate()
}

func (w *RayJobWebhook) validateCreate(job *rayjobapi.RayJob) field.ErrorList {
func (w *RayJobWebhook) validateCreate(job *rayv1.RayJob) field.ErrorList {
var allErrors field.ErrorList
kueueJob := (*RayJob)(job)

Expand Down Expand Up @@ -120,8 +120,8 @@ func (w *RayJobWebhook) validateCreate(job *rayjobapi.RayJob) field.ErrorList {

// ValidateUpdate implements webhook.CustomValidator so a webhook will be registered for the type
func (w *RayJobWebhook) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) {
oldJob := oldObj.(*rayjobapi.RayJob)
newJob := newObj.(*rayjobapi.RayJob)
oldJob := oldObj.(*rayv1.RayJob)
newJob := newObj.(*rayv1.RayJob)
log := ctrl.LoggerFrom(ctx).WithName("rayjob-webhook")
if w.manageJobsWithoutQueueName || jobframework.QueueName((*RayJob)(newJob)) != "" {
log.Info("Validating update", "job", klog.KObj(newJob))
Expand Down
Loading

0 comments on commit 3f8c555

Please sign in to comment.