Skip to content

Commit

Permalink
Added SparkApplication integration
Browse files Browse the repository at this point in the history
that requires pod integration when dynamicAllocation was enabled

Signed-off-by: Shingo Omura <everpeace@gmail.com>
  • Loading branch information
everpeace committed Jan 27, 2025
1 parent 298decd commit 6f7a6ca
Show file tree
Hide file tree
Showing 11 changed files with 2,104 additions and 1 deletion.
2 changes: 2 additions & 0 deletions config/components/manager/controller_manager_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,12 @@ integrations:
- "kubeflow.org/tfjob"
- "kubeflow.org/xgboostjob"
- "workload.codeflare.dev/appwrapper"
- "codeflare.dev/appwrapper"
# - "pod"
# - "deployment" # requires enabling pod integration
# - "statefulset" # requires enabling pod integration
# - "leaderworkerset.x-k8s.io/leaderworkerset" # requires enabling pod integration
# - "sparkoperator.k8s.io/sparkapplication" # requires enabling pod integration
# externalFrameworks:
# - "Foo.v1.example.com"
# podOptions:
Expand Down
2 changes: 1 addition & 1 deletion config/components/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ spec:
- /manager
args:
- "--zap-log-level=2"
imagePullPolicy: Always
imagePullPolicy: IfNotPresent
image: controller:latest
name: manager
securityContext:
Expand Down
25 changes: 25 additions & 0 deletions config/components/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,31 @@ rules:
- get
- list
- watch
- apiGroups:
- sparkoperator.k8s.io
resources:
- sparkapplications
verbs:
- get
- list
- patch
- update
- watch
- apiGroups:
- sparkoperator.k8s.io
resources:
- sparkapplications/finalizers
verbs:
- get
- update
- apiGroups:
- sparkoperator.k8s.io
resources:
- sparkapplications/status
verbs:
- get
- patch
- update
- apiGroups:
- workload.codeflare.dev
resources:
Expand Down
40 changes: 40 additions & 0 deletions config/components/webhook/manifests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,26 @@ webhooks:
resources:
- rayjobs
sideEffects: None
- admissionReviewVersions:
- v1
clientConfig:
service:
name: webhook-service
namespace: system
path: /mutate-sparkoperator-k8s-io-v1beta2-sparkapplication
failurePolicy: Fail
name: msparkapplication.kb.io
reinvocationPolicy: IfNeeded
rules:
- apiGroups:
- sparkoperator.k8s.io
apiVersions:
- v1beta2
operations:
- CREATE
resources:
- sparkapplications
sideEffects: None
- admissionReviewVersions:
- v1
clientConfig:
Expand Down Expand Up @@ -596,6 +616,26 @@ webhooks:
resources:
- rayjobs
sideEffects: None
- admissionReviewVersions:
- v1
clientConfig:
service:
name: webhook-service
namespace: system
path: /validate-sparkoperator-k8s-io-v1beta2-sparkapplication
failurePolicy: Fail
name: vsparkapplication.kb.io
rules:
- apiGroups:
- sparkoperator.k8s.io
apiVersions:
- v1beta2
operations:
- CREATE
- UPDATE
resources:
- sparkapplications
sideEffects: None
- admissionReviewVersions:
- v1
clientConfig:
Expand Down
1 change: 1 addition & 0 deletions pkg/controller/jobs/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@ import (
_ "sigs.k8s.io/kueue/pkg/controller/jobs/pod"
_ "sigs.k8s.io/kueue/pkg/controller/jobs/raycluster"
_ "sigs.k8s.io/kueue/pkg/controller/jobs/rayjob"
_ "sigs.k8s.io/kueue/pkg/controller/jobs/sparkapplication"
_ "sigs.k8s.io/kueue/pkg/controller/jobs/statefulset"
)
23 changes: 23 additions & 0 deletions pkg/controller/jobs/pod/pod_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -56,6 +57,20 @@ const (
SuspendedByParentAnnotation = "kueue.x-k8s.io/pod-suspending-parent"
RoleHashAnnotation = "kueue.x-k8s.io/role-hash"
RetriableInGroupAnnotation = "kueue.x-k8s.io/retriable-in-group"

// pod annotation "kueue.x-k8s.io/skip-pod-integration-webhook=true"
// can forcefully skip the pod integration webhook.
//
// Why such annotation is needed?
// There is some controllers which mutates OwnerReference by webhook
// to its managing pods (e.g., SparkApplication controller).
// In such case, even though the CRD integration is enabled in Kueue,
// pod integration webhook might be called WITHOUT the OwnerReference.
// Then, pod integration webhook can not detect the pod is controlled
// by some CRD in the webhook call, consequently, it mistakenly mutates
// the pod integration information to the pods.
SkipWebhookAnnotationKey = "kueue.x-k8s.io/skip-pod-integration-webhook"
SkipWebhookAnnotationValue = "true"
)

var (
Expand Down Expand Up @@ -139,6 +154,14 @@ func (w *PodWebhook) Default(ctx context.Context, obj runtime.Object) error {
log := ctrl.LoggerFrom(ctx).WithName("pod-webhook")
log.V(5).Info("Applying defaults")

if v, ok := pod.pod.Annotations[SkipWebhookAnnotationKey]; ok && v == SkipWebhookAnnotationValue {
log.V(5).Info(fmt.Sprintf(
"Skip applying defaults due to %s=%s annotation", SkipWebhookAnnotationKey, SkipWebhookAnnotationValue),
"pod", klog.KObj(pod.Object()),
)
return nil
}

_, suspend := pod.pod.GetAnnotations()[SuspendedByParentAnnotation]
if !suspend {
// Namespace filtering
Expand Down
Loading

0 comments on commit 6f7a6ca

Please sign in to comment.