From e55bb8c9cecf0e23ce248879ece764a1b4a6eccd Mon Sep 17 00:00:00 2001 From: vladikkuzn <51460778+vladikkuzn@users.noreply.github.com> Date: Thu, 18 Apr 2024 16:51:50 +0300 Subject: [PATCH 01/49] Non-admitted workloads with QuotaReserved condition are shown as Admitted by kubectl (#1991) * * Rename "Admitted by" to "Reserving in" printed column * Add "Admitted" printed column * * Output Admitted column without -wide * * Remove priority=0 as it's default * * Documentation update * Rename "Reserving in" to "Reserved in" --- apis/kueue/v1beta1/workload_types.go | 7 ++++--- charts/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml | 8 ++++++-- config/components/crd/bases/kueue.x-k8s.io_workloads.yaml | 8 ++++++-- site/content/en/docs/tasks/run/jobs.md | 8 ++++---- .../en/docs/tasks/troubleshooting/troubleshooting_jobs.md | 8 ++++---- 5 files changed, 24 insertions(+), 15 deletions(-) diff --git a/apis/kueue/v1beta1/workload_types.go b/apis/kueue/v1beta1/workload_types.go index fc8c48c95d..698787d8c1 100644 --- a/apis/kueue/v1beta1/workload_types.go +++ b/apis/kueue/v1beta1/workload_types.go @@ -330,9 +330,10 @@ const ( // +kubebuilder:object:root=true // +kubebuilder:storageversion // +kubebuilder:subresource:status -// +kubebuilder:printcolumn:name="Queue",JSONPath=".spec.queueName",type=string,description="Name of the queue this workload was submitted to" -// +kubebuilder:printcolumn:name="Admitted by",JSONPath=".status.admission.clusterQueue",type=string,description="Name of the ClusterQueue that admitted this workload" -// +kubebuilder:printcolumn:name="Age",JSONPath=".metadata.creationTimestamp",type=date,description="Time this workload was created" +// +kubebuilder:printcolumn:name="Queue",JSONPath=".spec.queueName",type="string",description="Name of the queue this workload was submitted to" +// +kubebuilder:printcolumn:name="Reserved in",JSONPath=".status.admission.clusterQueue",type="string",description="Name of the ClusterQueue where the workload is reserving quota" +// +kubebuilder:printcolumn:name="Admitted",JSONPath=".status.conditions[?(@.type=='Admitted')].status",type="string",description="Admission status" +// +kubebuilder:printcolumn:name="Age",JSONPath=".metadata.creationTimestamp",type="date",description="Time this workload was created" // +kubebuilder:resource:shortName={wl} // Workload is the Schema for the workloads API diff --git a/charts/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml b/charts/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml index 8697489777..3f2afaa75a 100644 --- a/charts/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml +++ b/charts/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml @@ -36,9 +36,13 @@ spec: jsonPath: .spec.queueName name: Queue type: string - - description: Name of the ClusterQueue that admitted this workload + - description: Name of the ClusterQueue where the workload is reserving quota jsonPath: .status.admission.clusterQueue - name: Admitted by + name: Reserved in + type: string + - description: Admission status + jsonPath: .status.conditions[?(@.type=='Admitted')].status + name: Admitted type: string - description: Time this workload was created jsonPath: .metadata.creationTimestamp diff --git a/config/components/crd/bases/kueue.x-k8s.io_workloads.yaml b/config/components/crd/bases/kueue.x-k8s.io_workloads.yaml index 095dcb1072..9bc7c62a89 100644 --- a/config/components/crd/bases/kueue.x-k8s.io_workloads.yaml +++ b/config/components/crd/bases/kueue.x-k8s.io_workloads.yaml @@ -21,9 +21,13 @@ spec: jsonPath: .spec.queueName name: Queue type: string - - description: Name of the ClusterQueue that admitted this workload + - description: Name of the ClusterQueue where the workload is reserving quota jsonPath: .status.admission.clusterQueue - name: Admitted by + name: Reserved in + type: string + - description: Admission status + jsonPath: .status.conditions[?(@.type=='Admitted')].status + name: Admitted type: string - description: Time this workload was created jsonPath: .metadata.creationTimestamp diff --git a/site/content/en/docs/tasks/run/jobs.md b/site/content/en/docs/tasks/run/jobs.md index 86121ed6bb..9d5d2e8112 100644 --- a/site/content/en/docs/tasks/run/jobs.md +++ b/site/content/en/docs/tasks/run/jobs.md @@ -77,8 +77,8 @@ kubectl -n default get workloads The output will be similar to the following: ```shell -NAME QUEUE ADMITTED BY AGE -sample-job-sl4bm user-queue 1s +NAME QUEUE RESERVED IN ADMITTED AGE +sample-job-sl4bm user-queue 1s ``` ## 3. (Optional) Monitor the status of the workload @@ -124,8 +124,8 @@ kubectl -n default get workloads The output is similar to the following: ```shell -NAME QUEUE ADMITTED BY AGE -sample-job-sl4bm user-queue cluster-queue 45s +NAME QUEUE RESERVED IN ADMITTED AGE +sample-job-sl4bm user-queue cluster-queue True 1s ``` To view the event for the Workload admission, run the following command: diff --git a/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md b/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md index c3411049d0..db8dc8b7dc 100644 --- a/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md +++ b/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md @@ -41,8 +41,8 @@ Job is called `my-job` in the `my-namespace` namespace. The output looks like the following: ``` - NAME QUEUE ADMITTED BY AGE - job-my-job-19797 user-queue cluster-queue 9m45s + NAME QUEUE RESERVED IN ADMITTED AGE + job-my-job-19797 user-queue cluster-queue True 9m45s ``` 3. You can list all of the workloads in the same namespace of your job and identify the one @@ -56,8 +56,8 @@ Job is called `my-job` in the `my-namespace` namespace. The output looks like the following: ``` - NAME QUEUE ADMITTED BY AGE - job-my-job-19797 user-queue cluster-queue 9m45s + NAME QUEUE RESERVED IN ADMITTED AGE + job-my-job-19797 user-queue cluster-queue True 9m45s ``` ## Is my Job running? From 472ce6d2c6bbfab2903db065dbb8f117415bc8d5 Mon Sep 17 00:00:00 2001 From: Vanessasaurus <814322+vsoch@users.noreply.github.com> Date: Thu, 18 Apr 2024 08:45:59 -0600 Subject: [PATCH 02/49] docs: add troubleshooting resource requests (#2001) * docs: add note about resources matching cluster-queue Problem: the troubleshooting guide should demonstrate how to debug the case where jobs are not admitted. Solution: add a small section to show that resource types need to match resource requests, and other small debug tips. Signed-off-by: vsoch * fix: typos in provisioning and troubleshooting Signed-off-by: vsoch * fix: code indent Signed-off-by: vsoch * review: aldo Signed-off-by: vsoch --------- Signed-off-by: vsoch Co-authored-by: vsoch --- .../provisioning.md | 2 +- .../troubleshooting/troubleshooting_jobs.md | 38 +++++++++++++++++++ .../troubleshooting/troubleshooting_pods.md | 2 +- 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/site/content/en/docs/admission-check-controllers/provisioning.md b/site/content/en/docs/admission-check-controllers/provisioning.md index aec9c66250..620261400c 100644 --- a/site/content/en/docs/admission-check-controllers/provisioning.md +++ b/site/content/en/docs/admission-check-controllers/provisioning.md @@ -15,7 +15,7 @@ The Provisioning Admission Check Controller is supported on [Kubernetes cluster- ## Usage To use the Provisioning AdmissionCheck, create an [AdmissionCheck](docs/concepts/admission_check) -with `kueue.x-k8s.io/provisioning-request` as a `.spec.controllerName` and create a ProvisioningRequest configuration usign a `ProvisioningRequestConfig` object. See an example below. +with `kueue.x-k8s.io/provisioning-request` as a `.spec.controllerName` and create a ProvisioningRequest configuration using a `ProvisioningRequestConfig` object. See an example below. ## ProvisioningRequest configuration diff --git a/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md b/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md index db8dc8b7dc..c517b15186 100644 --- a/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md +++ b/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md @@ -133,6 +133,44 @@ status: type: QuotaReserved ``` +### Does my ClusterQueue have the resource requests that the job requires? + +When you submit a job that has a resource request, for example: + +```bash +$ kubectl get jobs job-0-9-size-6 -o json | jq -r .spec.template.spec.containers[0].resources +``` +```console +{ + "limits": { + "cpu": "2" + }, + "requests": { + "cpu": "2" + } +} +``` + +If your ClusterQueue does not have a definition for the `requests`, Kueue cannot admit the job. For the job above, you should define `cpu` quotas under `resourceGroups`. A ClusterQueue defining `cpu` quota looks like the following: + +```yaml +apiVersion: kueue.x-k8s.io/v1beta1 +kind: ClusterQueue +metadata: + name: "cluster-queue" +spec: + namespaceSelector: {} + resourceGroups: + - coveredResources: ["cpu"] + flavors: + - name: "default-flavor" + resources: + - name: "cpu" + nominalQuota: 40 +``` + +See [resources groups](https://kueue.sigs.k8s.io/docs/concepts/cluster_queue/#resource-groups) for more information. + ### Unattempted Workload When using a [ClusterQueue](/docs/concepts/cluster_queue) with the `StrictFIFO` diff --git a/site/content/en/docs/tasks/troubleshooting/troubleshooting_pods.md b/site/content/en/docs/tasks/troubleshooting/troubleshooting_pods.md index 3dc4f7f8d0..0dbfc6ff37 100644 --- a/site/content/en/docs/tasks/troubleshooting/troubleshooting_pods.md +++ b/site/content/en/docs/tasks/troubleshooting/troubleshooting_pods.md @@ -78,7 +78,7 @@ Events: ## Why did my Pod disappear? When you enable [preemption](/docs/concepts/cluster_queue/#preemption), Kueue might preempt Pods -to accomodate higher priority jobs or reclaim quota. Preemption is implemented via `DELETE` calls, +to accommodate higher priority jobs or reclaim quota. Preemption is implemented via `DELETE` calls, the standard way of terminating a Pod in Kubernetes. When using single Pods, Kubernetes will delete Workload object along with the Pod, as there is From df18528c880b211190949fa485cc6ce4effaef64 Mon Sep 17 00:00:00 2001 From: Marty Mcfly Date: Fri, 19 Apr 2024 14:02:21 +0800 Subject: [PATCH 03/49] site: fix kubectl command of tasks example (#2013) --- site/content/en/docs/tasks/run/plain_pods.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/site/content/en/docs/tasks/run/plain_pods.md b/site/content/en/docs/tasks/run/plain_pods.md index 66ff545526..314daf70bf 100644 --- a/site/content/en/docs/tasks/run/plain_pods.md +++ b/site/content/en/docs/tasks/run/plain_pods.md @@ -107,7 +107,7 @@ Here is a sample Pod that just sleeps for a few seconds: You can create the Pod using the following command: ```sh # Create the pod -kubectl apply -f kueue-pod.yaml +kubectl create -f kueue-pod.yaml ``` ## Running a group of Pods to be admitted together @@ -164,7 +164,7 @@ Here is a sample Pod group that just sleeps for a few seconds: You can create the Pod group using the following command: ```sh -kubectl apply -f kueue-pod-group.yaml +kubectl create -f kueue-pod-group.yaml ``` The name of the associated Workload created by Kueue equals the name of the Pod From 69fb8d317c064591539b066a26fa4b3d959c65ab Mon Sep 17 00:00:00 2001 From: Oleksandr Redko Date: Fri, 19 Apr 2024 09:30:56 +0300 Subject: [PATCH 04/49] Fix deprecated comment for constants.QueueAnnotation (#1976) --- .golangci.yaml | 7 ++++++- pkg/controller/constants/constants.go | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.golangci.yaml b/.golangci.yaml index 8daf73f55a..b4ba30a602 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -11,7 +11,6 @@ linters-settings: - assignOp - captLocal - commentFormatting - - deprecatedComment - elseif - exitAfterDefer - ifElseChain @@ -38,6 +37,12 @@ issues: # Which dirs to exclude: issues from them won't be reported exclude-dirs: - bin + # Excluding configuration per-path, per-linter, per-text and per-source + exclude-rules: + - linters: + - staticcheck + # TODO(#768): Drop when incrementing the API version. + text: "SA1019: constants.QueueAnnotation is deprecated" # Show all issues from a linter max-issues-per-linter: 0 # Show all issues with the same text diff --git a/pkg/controller/constants/constants.go b/pkg/controller/constants/constants.go index 0287cc4154..718f9ad251 100644 --- a/pkg/controller/constants/constants.go +++ b/pkg/controller/constants/constants.go @@ -22,7 +22,7 @@ const ( // QueueAnnotation is the annotation key in the workload that holds the queue name. // - // DEPRECATED: Use QueueLabel as a label key. + // Deprecated: Use QueueLabel as a label key. QueueAnnotation = QueueLabel // PrebuiltWorkloadLabel is the label key of the job holding the name of the pre-built workload to use. From a4354c359df7c39a2bc6fe9ba25c99fa78697399 Mon Sep 17 00:00:00 2001 From: jiangjiang <86391540+googs1025@users.noreply.github.com> Date: Fri, 19 Apr 2024 15:07:41 +0800 Subject: [PATCH 05/49] fix: website jobset url (#2011) --- site/content/en/docs/tasks/run/jobsets.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/site/content/en/docs/tasks/run/jobsets.md b/site/content/en/docs/tasks/run/jobsets.md index e91e48e22c..5d57953578 100644 --- a/site/content/en/docs/tasks/run/jobsets.md +++ b/site/content/en/docs/tasks/run/jobsets.md @@ -15,11 +15,11 @@ This guide is for [batch users](/docs/tasks#batch-user) that have a basic unders 1. Check [Administer cluster quotas](/docs/tasks/manage/administer_cluster_quotas) for details on the initial Kueue setup. -2. See [JobSet Installation](https://github.com/kubernetes-sigs/jobset/blob/main/docs/setup/install.md) for installation and configuration details of JobSet Operator. +2. See [JobSet Installation](https://jobset.sigs.k8s.io/docs/installation/) for installation and configuration details of JobSet Operator. ## JobSet definition -When running [JobSets](https://github.com/kubernetes-sigs/jobset/blob/main/docs/concepts/README.md) on +When running [JobSets](https://jobset.sigs.k8s.io/docs/concepts/) on Kueue, take into consideration the following aspects: ### a. Queue selection From 4e5eb4366249b1a483ef5d4244b966fa6d0aca0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?= Date: Fri, 19 Apr 2024 12:13:05 +0200 Subject: [PATCH 06/49] Fix config for e2e tests (#2017) --- test/e2e/config/controller_manager_config.yaml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/test/e2e/config/controller_manager_config.yaml b/test/e2e/config/controller_manager_config.yaml index a3b5950bdb..b21739e9a2 100644 --- a/test/e2e/config/controller_manager_config.yaml +++ b/test/e2e/config/controller_manager_config.yaml @@ -5,10 +5,11 @@ leaderElection: controller: groupKindConcurrency: Job.batch: 5 + Pod: 5 + Workload.kueue.x-k8s.io: 5 LocalQueue.kueue.x-k8s.io: 1 ClusterQueue.kueue.x-k8s.io: 1 ResourceFlavor.kueue.x-k8s.io: 1 - Workload.kueue.x-k8s.io: 1 clientConnection: qps: 50 burst: 100 @@ -24,8 +25,3 @@ integrations: - "kubeflow.org/tfjob" - "kubeflow.org/xgboostjob" - "pod" -controller: - groupKindConcurrency: - Job.batch: 5 - Pod.: 5 - Workload.kueue.x-k8s.io: 5 From bfb148b97c43a306ded8e85a2655d240f289f178 Mon Sep 17 00:00:00 2001 From: Christian Zaccaria <73656840+ChristianZaccaria@users.noreply.github.com> Date: Fri, 19 Apr 2024 11:33:11 +0100 Subject: [PATCH 07/49] CVE fixes - Upgrade mpi-operator (#1989) * CVE fixes - Upgrading mpi-operator * Adjust type for mpi-operator v0.5.0 --- go.mod | 35 +++++----- go.sum | 70 +++++++++---------- .../jobs/mpijob/mpijob_controller_test.go | 13 ++-- .../testingjobs/mpijob/wrappers_mpijob.go | 3 +- 4 files changed, 58 insertions(+), 63 deletions(-) diff --git a/go.mod b/go.mod index 307e28448c..ee0d082ba9 100644 --- a/go.mod +++ b/go.mod @@ -5,8 +5,7 @@ go 1.22 require ( github.com/go-logr/logr v1.4.1 github.com/google/go-cmp v0.6.0 - github.com/kubeflow/common v0.4.7 - github.com/kubeflow/mpi-operator v0.4.0 + github.com/kubeflow/mpi-operator v0.5.0 github.com/kubeflow/training-operator v1.7.0 github.com/onsi/ginkgo/v2 v2.17.1 github.com/onsi/gomega v1.32.0 @@ -16,18 +15,18 @@ require ( github.com/ray-project/kuberay/ray-operator v1.1.0 github.com/spf13/cobra v1.8.0 go.uber.org/zap v1.27.0 - k8s.io/api v0.29.3 - k8s.io/apimachinery v0.29.3 - k8s.io/apiserver v0.29.3 + k8s.io/api v0.29.4 + k8s.io/apimachinery v0.29.4 + k8s.io/apiserver v0.29.4 k8s.io/autoscaler/cluster-autoscaler/apis v0.0.0-20240325113845-0130d33747bb - k8s.io/client-go v0.29.3 - k8s.io/code-generator v0.29.3 - k8s.io/component-base v0.29.3 - k8s.io/component-helpers v0.29.3 + k8s.io/client-go v0.29.4 + k8s.io/code-generator v0.29.4 + k8s.io/component-base v0.29.4 + k8s.io/component-helpers v0.29.4 k8s.io/klog/v2 v2.110.1 k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 - k8s.io/metrics v0.29.3 - k8s.io/utils v0.0.0-20230726121419-3b25d923346b + k8s.io/metrics v0.29.4 + k8s.io/utils v0.0.0-20240102154912-e7106e64919e sigs.k8s.io/controller-runtime v0.17.3 sigs.k8s.io/controller-tools v0.14.0 sigs.k8s.io/jobset v0.5.0 @@ -50,7 +49,7 @@ require ( github.com/evanphx/json-patch v5.6.0+incompatible // indirect github.com/evanphx/json-patch/v5 v5.8.0 // indirect github.com/fatih/color v1.16.0 // indirect - github.com/felixge/httpsnoop v1.0.3 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-logr/zapr v1.3.0 // indirect @@ -91,7 +90,7 @@ require ( go.etcd.io/etcd/client/v3 v3.5.11 // indirect go.etcd.io/etcd/server/v3 v3.5.11 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.0 // indirect go.opentelemetry.io/otel v1.20.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.20.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.20.0 // indirect @@ -101,14 +100,14 @@ require ( go.opentelemetry.io/proto/otlp v1.0.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.18.0 // indirect + golang.org/x/crypto v0.21.0 // indirect golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect golang.org/x/mod v0.14.0 // indirect - golang.org/x/net v0.20.0 // indirect + golang.org/x/net v0.23.0 // indirect golang.org/x/oauth2 v0.12.0 // indirect golang.org/x/sync v0.6.0 // indirect - golang.org/x/sys v0.16.0 // indirect - golang.org/x/term v0.16.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/term v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect golang.org/x/time v0.3.0 // indirect golang.org/x/tools v0.17.0 // indirect @@ -125,7 +124,7 @@ require ( gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiextensions-apiserver v0.29.2 // indirect k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01 // indirect - k8s.io/kms v0.29.3 // indirect + k8s.io/kms v0.29.4 // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.28.0 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect ) diff --git a/go.sum b/go.sum index b9ca19d18f..8dd1eabf2a 100644 --- a/go.sum +++ b/go.sum @@ -40,8 +40,8 @@ github.com/evanphx/json-patch/v5 v5.8.0 h1:lRj6N9Nci7MvzrXuX6HFzU8XjmhPiXPlsKEy1 github.com/evanphx/json-patch/v5 v5.8.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= -github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= -github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= @@ -127,10 +127,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/kubeflow/common v0.4.7 h1:zz6QS4k2u2FY838M/FjOtwjJq39MRZVZcvPahRYL97M= -github.com/kubeflow/common v0.4.7/go.mod h1:43MAof/uhpJA2C0urynqatE3oKFQc7m2HLmJty7waqY= -github.com/kubeflow/mpi-operator v0.4.0 h1:PS4jLoMuRyrk/DHuYkI0D46sQQYpQt375HjOV4KVMFs= -github.com/kubeflow/mpi-operator v0.4.0/go.mod h1:/A4mTy/RYh2UIgaGUiXUaW70eThjsogu80WbbcZpuMg= +github.com/kubeflow/mpi-operator v0.5.0 h1:XvBwyXXQ9103DNMa22sxsaQlaktvaT2LY/g0UniGn5U= +github.com/kubeflow/mpi-operator v0.5.0/go.mod h1:SeZQJW8KJxSTWD++eQYKRFpoDg1v8yrdC6fjx2/3mG0= github.com/kubeflow/training-operator v1.7.0 h1:Zh61GlOWrlRi4UFOtJeV+/5REo/OndhwQ25KYd0llzc= github.com/kubeflow/training-operator v1.7.0/go.mod h1:BZCLX1h06wY3YSeSZZcGYAqI9/nVi7isVCRkfgZe9nE= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= @@ -224,8 +222,8 @@ go.etcd.io/etcd/server/v3 v3.5.11 h1:FEa0ImvoXdIPa81/vZUKpnJ74fpQ5ZivseoIKMPzfpg go.etcd.io/etcd/server/v3 v3.5.11/go.mod h1:CS0+TwcuRlhg1I5CpA3YlisOcoqJB1h1GMRgje75uDs= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.0 h1:PzIubN4/sjByhDRHLviCjJuweBXWFZWhghjg7cS28+M= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.0/go.mod h1:Ct6zzQEuGK3WpJs2n4dn+wfJYzd/+hNnxMRTWjGn30M= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0 h1:KfYpVmrjI7JuToy5k8XV3nkapjWx48k4E4JOtVstzQI= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0/go.mod h1:SeQhzAEccGVZVEy7aH87Nh0km+utSpo1pTv6eMMop48= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.0 h1:1eHu3/pUSWaOgltNK3WJFaywKsTIr/PwvHyDmi0lQA0= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.0/go.mod h1:HyABWq60Uy1kjJSa2BVOxUVao8Cdick5AWSKPutqy6U= go.opentelemetry.io/otel v1.20.0 h1:vsb/ggIY+hUjD/zCAQHpzTmndPqv/ml2ArbsbfBYTAc= go.opentelemetry.io/otel v1.20.0/go.mod h1:oUIGj3D77RwJdM6PPZImDpSZGDvkD9fhesHny69JFrs= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.20.0 h1:DeFD0VgTZ+Cj6hxravYYZE2W4GlneVH81iAOPjZkzk8= @@ -252,8 +250,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc= -golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= +golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= +golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -267,8 +265,8 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4= golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -287,12 +285,12 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE= -golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= +golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= +golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= @@ -345,39 +343,39 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.29.3 h1:2ORfZ7+bGC3YJqGpV0KSDDEVf8hdGQ6A03/50vj8pmw= -k8s.io/api v0.29.3/go.mod h1:y2yg2NTyHUUkIoTC+phinTnEa3KFM6RZ3szxt014a80= +k8s.io/api v0.29.4 h1:WEnF/XdxuCxdG3ayHNRR8yH3cI1B/llkWBma6bq4R3w= +k8s.io/api v0.29.4/go.mod h1:DetSv0t4FBTcEpfA84NJV3g9a7+rSzlUHk5ADAYHUv0= k8s.io/apiextensions-apiserver v0.29.2 h1:UK3xB5lOWSnhaCk0RFZ0LUacPZz9RY4wi/yt2Iu+btg= k8s.io/apiextensions-apiserver v0.29.2/go.mod h1:aLfYjpA5p3OwtqNXQFkhJ56TB+spV8Gc4wfMhUA3/b8= -k8s.io/apimachinery v0.29.3 h1:2tbx+5L7RNvqJjn7RIuIKu9XTsIZ9Z5wX2G22XAa5EU= -k8s.io/apimachinery v0.29.3/go.mod h1:hx/S4V2PNW4OMg3WizRrHutyB5la0iCUbZym+W0EQIU= -k8s.io/apiserver v0.29.3 h1:xR7ELlJ/BZSr2n4CnD3lfA4gzFivh0wwfNfz9L0WZcE= -k8s.io/apiserver v0.29.3/go.mod h1:hrvXlwfRulbMbBgmWRQlFru2b/JySDpmzvQwwk4GUOs= +k8s.io/apimachinery v0.29.4 h1:RaFdJiDmuKs/8cm1M6Dh1Kvyh59YQFDcFuFTSmXes6Q= +k8s.io/apimachinery v0.29.4/go.mod h1:i3FJVwhvSp/6n8Fl4K97PJEP8C+MM+aoDq4+ZJBf70Y= +k8s.io/apiserver v0.29.4 h1:wPwGOO58GQOpRiZu59P5eRoDcB7QtV+QBglkRiXwCiM= +k8s.io/apiserver v0.29.4/go.mod h1:VqTF9t98HVfhKZVRohCPezsdUt9u2g3bHKftxGcXoRo= k8s.io/autoscaler/cluster-autoscaler/apis v0.0.0-20240325113845-0130d33747bb h1:ycQ/tSpcJEUHHx0pv6MXdq4NcRflCvFX6SMwmKROiis= k8s.io/autoscaler/cluster-autoscaler/apis v0.0.0-20240325113845-0130d33747bb/go.mod h1:LPhCVj3E5Lp9W6HGVlW664m/X+KN2firfF3wtBBji54= -k8s.io/client-go v0.29.3 h1:R/zaZbEAxqComZ9FHeQwOh3Y1ZUs7FaHKZdQtIc2WZg= -k8s.io/client-go v0.29.3/go.mod h1:tkDisCvgPfiRpxGnOORfkljmS+UrW+WtXAy2fTvXJB0= -k8s.io/code-generator v0.29.3 h1:m7E25/t9R9NvejspO2zBdyu+/Gl0Z5m7dCRc680KS14= -k8s.io/code-generator v0.29.3/go.mod h1:x47ofBhN4gxYFcxeKA1PYXeaPreAGaDN85Y/lNUsPoM= -k8s.io/component-base v0.29.3 h1:Oq9/nddUxlnrCuuR2K/jp6aflVvc0uDvxMzAWxnGzAo= -k8s.io/component-base v0.29.3/go.mod h1:Yuj33XXjuOk2BAaHsIGHhCKZQAgYKhqIxIjIr2UXYio= -k8s.io/component-helpers v0.29.3 h1:1dqZswuZgT2ZMixYeORyCUOAApXxgsvjVSgfoUT+P4o= -k8s.io/component-helpers v0.29.3/go.mod h1:yiDqbRQrnQY+sPju/bL7EkwDJb6LVOots53uZNMZBos= +k8s.io/client-go v0.29.4 h1:79ytIedxVfyXV8rpH3jCBW0u+un0fxHDwX5F9K8dPR8= +k8s.io/client-go v0.29.4/go.mod h1:kC1thZQ4zQWYwldsfI088BbK6RkxK+aF5ebV8y9Q4tk= +k8s.io/code-generator v0.29.4 h1:8ESudFNbY5/9BzB8KOEFG2uV9Q0AQxkc4mrQESr30Ks= +k8s.io/code-generator v0.29.4/go.mod h1:7TYnI0dYItL2cKuhhgPSuF3WED9uMdELgbVXFfn/joE= +k8s.io/component-base v0.29.4 h1:xeKzuuHI/1tjleu5jycDAcYbhAxeGHCQBZUY2eRIkOo= +k8s.io/component-base v0.29.4/go.mod h1:pYjt+oEZP9gtmwSikwAJgfSBikqKX2gOqRat0QjmQt0= +k8s.io/component-helpers v0.29.4 h1:lbVFhywtv64KlaIYTKszkHaFAqwCjNn7xyRTeWorzfI= +k8s.io/component-helpers v0.29.4/go.mod h1:rMOVMGYEju7/GKMV0USfYAYJBIQdxlMMN1VFl/Mf2so= k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01 h1:pWEwq4Asjm4vjW7vcsmijwBhOr1/shsbSYiWXmNGlks= k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= -k8s.io/kms v0.29.3 h1:ReljsAUhYlm2spdT4yXmY+9a8x8dc/OT4mXvwQPPteQ= -k8s.io/kms v0.29.3/go.mod h1:TBGbJKpRUMk59neTMDMddjIDL+D4HuFUbpuiuzmOPg0= +k8s.io/kms v0.29.4 h1:cFGEoCLwoXk/eqYZppLZxybCdmEWeRKMCbm9f13IdRQ= +k8s.io/kms v0.29.4/go.mod h1:vWVImKkJd+1BQY4tBwdfSwjQBiLrnbNtHADcDEDQFtk= k8s.io/kube-aggregator v0.28.1 h1:rvG4llYnQKHjj6YjjoBPEJxfD1uH0DJwkrJTNKGAaCs= k8s.io/kube-aggregator v0.28.1/go.mod h1:JaLizMe+AECSpO2OmrWVsvnG0V3dX1RpW+Wq/QHbu18= k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= -k8s.io/metrics v0.29.3 h1:nN+eavbMQ7Kuif2tIdTr2/F2ec2E/SIAWSruTZ+Ye6U= -k8s.io/metrics v0.29.3/go.mod h1:kb3tGGC4ZcIDIuvXyUE291RwJ5WmDu0tB4wAVZM6h2I= -k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= -k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/metrics v0.29.4 h1:06sZ63/Kt9HEb5GP/1y6xbHDz6XkxnHpu949UdXfoXQ= +k8s.io/metrics v0.29.4/go.mod h1:ZN9peB0nLTqPZuwQna8ZUrPFJQ0i8QNH4pqRJopS+9c= +k8s.io/utils v0.0.0-20240102154912-e7106e64919e h1:eQ/4ljkx21sObifjzXwlPKpdGLrCfRziVtos3ofG/sQ= +k8s.io/utils v0.0.0-20240102154912-e7106e64919e/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.28.0 h1:TgtAeesdhpm2SGwkQasmbeqDo8th5wOBA5h/AjTKA4I= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.28.0/go.mod h1:VHVDI/KrK4fjnV61bE2g3sA7tiETLn8sooImelsCx3Y= sigs.k8s.io/controller-runtime v0.17.3 h1:65QmN7r3FWgTxDMz9fvGnO1kbf2nu+acg9p2R9oYYYk= diff --git a/pkg/controller/jobs/mpijob/mpijob_controller_test.go b/pkg/controller/jobs/mpijob/mpijob_controller_test.go index 0de713b22e..71f3e68e46 100644 --- a/pkg/controller/jobs/mpijob/mpijob_controller_test.go +++ b/pkg/controller/jobs/mpijob/mpijob_controller_test.go @@ -21,7 +21,6 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - common "github.com/kubeflow/common/pkg/apis/common/v1" kubeflow "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -53,7 +52,7 @@ func TestCalcPriorityClassName(t *testing.T) { PriorityClass: "scheduling-priority", }, }, - MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{ + MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*kubeflow.ReplicaSpec{ kubeflow.MPIReplicaTypeLauncher: { Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ @@ -79,7 +78,7 @@ func TestCalcPriorityClassName(t *testing.T) { RunPolicy: kubeflow.RunPolicy{ SchedulingPolicy: &kubeflow.SchedulingPolicy{}, }, - MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{ + MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*kubeflow.ReplicaSpec{ kubeflow.MPIReplicaTypeLauncher: { Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ @@ -95,7 +94,7 @@ func TestCalcPriorityClassName(t *testing.T) { "specified on launcher takes precedence over worker": { job: kubeflow.MPIJob{ Spec: kubeflow.MPIJobSpec{ - MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{ + MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*kubeflow.ReplicaSpec{ kubeflow.MPIReplicaTypeLauncher: { Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ @@ -118,7 +117,7 @@ func TestCalcPriorityClassName(t *testing.T) { "launcher present, but without priority; fallback to worker": { job: kubeflow.MPIJob{ Spec: kubeflow.MPIJobSpec{ - MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{ + MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*kubeflow.ReplicaSpec{ kubeflow.MPIReplicaTypeLauncher: { Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{}, @@ -139,7 +138,7 @@ func TestCalcPriorityClassName(t *testing.T) { "specified on worker only": { job: kubeflow.MPIJob{ Spec: kubeflow.MPIJobSpec{ - MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{ + MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*kubeflow.ReplicaSpec{ kubeflow.MPIReplicaTypeLauncher: {}, kubeflow.MPIReplicaTypeWorker: { Template: corev1.PodTemplateSpec{ @@ -156,7 +155,7 @@ func TestCalcPriorityClassName(t *testing.T) { "worker present, but without priority; fallback to empty": { job: kubeflow.MPIJob{ Spec: kubeflow.MPIJobSpec{ - MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{ + MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*kubeflow.ReplicaSpec{ kubeflow.MPIReplicaTypeLauncher: {}, kubeflow.MPIReplicaTypeWorker: { Template: corev1.PodTemplateSpec{ diff --git a/pkg/util/testingjobs/mpijob/wrappers_mpijob.go b/pkg/util/testingjobs/mpijob/wrappers_mpijob.go index 57228bb463..42be69ce49 100644 --- a/pkg/util/testingjobs/mpijob/wrappers_mpijob.go +++ b/pkg/util/testingjobs/mpijob/wrappers_mpijob.go @@ -17,7 +17,6 @@ limitations under the License. package testing import ( - common "github.com/kubeflow/common/pkg/apis/common/v1" kubeflow "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -43,7 +42,7 @@ func MakeMPIJob(name, ns string) *MPIJobWrapper { RunPolicy: kubeflow.RunPolicy{ Suspend: ptr.To(true), }, - MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{ + MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*kubeflow.ReplicaSpec{ kubeflow.MPIReplicaTypeLauncher: { Replicas: ptr.To[int32](1), Template: corev1.PodTemplateSpec{ From 59a77a8fb3e98b294308e2df13592a61862f8713 Mon Sep 17 00:00:00 2001 From: peng Date: Fri, 19 Apr 2024 20:49:28 +0800 Subject: [PATCH 08/49] Update kubeflow sample pytorchjob to pytorch 2.x and cuda 12.x (#1910) (#1992) Signed-off-by: wangdepeng --- site/static/examples/jobs/sample-pytorchjob.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/site/static/examples/jobs/sample-pytorchjob.yaml b/site/static/examples/jobs/sample-pytorchjob.yaml index 37acc3bc7f..bbb5c9e28c 100644 --- a/site/static/examples/jobs/sample-pytorchjob.yaml +++ b/site/static/examples/jobs/sample-pytorchjob.yaml @@ -14,7 +14,9 @@ spec: spec: containers: - name: pytorch - image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727 + image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-21320b6 +# If you have gpu, pytorch-mnist-gpu would be helpful. pytorch-mnist-gpu is approximately 22GB +# image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest imagePullPolicy: Always command: - "python3" @@ -31,7 +33,9 @@ spec: spec: containers: - name: pytorch - image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727 + image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-21320b6 +# If you have gpu, pytorch-mnist-gpu would be helpful. pytorch-mnist-gpu is approximately 22GB +# image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest imagePullPolicy: Always command: - "python3" From 497050a385444bb2f60845675486f8713738ea13 Mon Sep 17 00:00:00 2001 From: Traian Schiau <55734665+trasc@users.noreply.github.com> Date: Fri, 19 Apr 2024 17:42:29 +0300 Subject: [PATCH 09/49] [test] Scalability runner metrics (#1987) * Add runner metrics This reverts commit c64bb7d36acc04487539f6ec28abc5b4e74a0bc6. * Review Remarks * Review remarks * Use different channels for wl and cq events --- Makefile | 1 + test/scalability/checker/checker_test.go | 45 ++ test/scalability/default_rangespec.yaml | 8 + .../runner/controller/controller.go | 11 +- test/scalability/runner/main.go | 16 + test/scalability/runner/recorder/recorder.go | 387 +++++++++++++++++- 6 files changed, 451 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index 86b6a1db1b..240beee7f8 100644 --- a/Makefile +++ b/Makefile @@ -246,6 +246,7 @@ run-scalability: envtest scalability-runner minimalkueue .PHONY: test-scalability test-scalability: gotestsum run-scalability $(GOTESTSUM) --junitfile $(ARTIFACTS)/junit.xml -- $(GO_TEST_FLAGS) ./test/scalability/checker \ + --summary=$(SCALABILITY_RUN_DIR)/summary.yaml \ --cmdStats=$(SCALABILITY_RUN_DIR)/minimalkueue.stats.yaml \ --range=$(PROJECT_DIR)/test/scalability/default_rangespec.yaml diff --git a/test/scalability/checker/checker_test.go b/test/scalability/checker/checker_test.go index 57aa5eff70..09a0d40d91 100644 --- a/test/scalability/checker/checker_test.go +++ b/test/scalability/checker/checker_test.go @@ -23,10 +23,12 @@ import ( "sigs.k8s.io/yaml" + "sigs.k8s.io/kueue/test/scalability/runner/recorder" "sigs.k8s.io/kueue/test/scalability/runner/stats" ) var ( + summaryFile = flag.String("summary", "", "the runner summary report") cmdStatsFile = flag.String("cmdStats", "", "command stats yaml file") rangeFile = flag.String("range", "", "expectations range file") ) @@ -38,9 +40,23 @@ type RangeSpec struct { MaxSysMs int64 `json:"maxSysMs"` Maxrss uint64 `json:"maxrss"` } `json:"cmd"` + ClusterQueueClassesMinUsage map[string]float64 `json:"clusterQueueClassesMinUsage"` + WlClassesMaxAvgTimeToAdmissionMs map[string]int64 `json:"wlClassesMaxAvgTimeToAdmissionMs"` } func TestScalability(t *testing.T) { + summaryBytes, err := os.ReadFile(*summaryFile) + if err != nil { + t.Fatalf("Unable to read summary: %s", err) + } + + summary := recorder.Summary{} + + err = yaml.UnmarshalStrict(summaryBytes, &summary) + if err != nil { + t.Fatalf("Unable to unmarshal summary: %s", err) + } + cmdStatsBytes, err := os.ReadFile(*cmdStatsFile) if err != nil { t.Fatalf("Unable to read command stats: %s", err) @@ -77,4 +93,33 @@ func TestScalability(t *testing.T) { t.Errorf("Maxrss %dKib is greater than maximum expected %dKib", cmdStats.Maxrss, rangeSpec.Cmd.Maxrss) } }) + + t.Run("ClusterQueueClasses", func(t *testing.T) { + for c, cqcSummarry := range summary.ClusterQueueClasses { + t.Run(c, func(t *testing.T) { + expected, found := rangeSpec.ClusterQueueClassesMinUsage[c] + if !found { + t.Fatalf("Unexpected class") + } + actual := float64(cqcSummarry.CPUUsed) * 100 / (float64(cqcSummarry.NominalQuota) * float64(cqcSummarry.LastEventTime.Sub(cqcSummarry.FirstEventTime).Milliseconds())) + if actual < expected { + t.Errorf("Usage %.2f%% is less then expected %.2f%%", actual, expected) + } + }) + } + }) + + t.Run("WorkloadClasses", func(t *testing.T) { + for c, wlcSummary := range summary.WorkloadClasses { + t.Run(c, func(t *testing.T) { + expected, found := rangeSpec.WlClassesMaxAvgTimeToAdmissionMs[c] + if !found { + t.Fatalf("Unexpected class") + } + if wlcSummary.AverageTimeToAdmissionMs > expected { + t.Errorf("Average wait for admission %dms is more then expected %dms", wlcSummary.AverageTimeToAdmissionMs, expected) + } + }) + } + }) } diff --git a/test/scalability/default_rangespec.yaml b/test/scalability/default_rangespec.yaml index e57e111793..cd13b6714b 100644 --- a/test/scalability/default_rangespec.yaml +++ b/test/scalability/default_rangespec.yaml @@ -5,3 +5,11 @@ cmd: maxUserMs: 3600_000 maxSysMs: 3600_000 maxrss: 1024_000 #1000MiB + +clusterQueueClassesMinUsage: + cq: 10 #10% + +wlClassesMaxAvgTimeToAdmissionMs: + large: 3600_000 #1h + medium: 3600_000 + small: 3600_000 diff --git a/test/scalability/runner/controller/controller.go b/test/scalability/runner/controller/controller.go index 9c37e121ef..3e6cab6ca2 100644 --- a/test/scalability/runner/controller/controller.go +++ b/test/scalability/runner/controller/controller.go @@ -73,7 +73,10 @@ var _ reconcile.Reconciler = (*reconciler)(nil) var _ predicate.Predicate = (*reconciler)(nil) func (r *reconciler) Create(ev event.CreateEvent) bool { - _, isWl := (ev.Object).(*kueue.Workload) + wl, isWl := (ev.Object).(*kueue.Workload) + if isWl { + r.recorder.RecordWorkloadState(wl) + } return !isWl } @@ -90,6 +93,8 @@ func (r *reconciler) Update(ev event.UpdateEvent) bool { admitted := apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadAdmitted) r.setAdmittedTime(wl.UID, admitted) + r.recorder.RecordWorkloadState(wl) + return admitted && !apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadFinished) } @@ -155,12 +160,12 @@ func (r *reconciler) SetupWithManager(mgr ctrl.Manager) error { cqHandler := handler.Funcs{ CreateFunc: func(_ context.Context, ev event.CreateEvent, _ workqueue.RateLimitingInterface) { if cq, isCq := ev.Object.(*kueue.ClusterQueue); isCq { - r.recorder.RecordCQStatus(cq) + r.recorder.RecordCQState(cq) } }, UpdateFunc: func(_ context.Context, ev event.UpdateEvent, _ workqueue.RateLimitingInterface) { if cq, isCq := ev.ObjectNew.(*kueue.ClusterQueue); isCq { - r.recorder.RecordCQStatus(cq) + r.recorder.RecordCQState(cq) } }, } diff --git a/test/scalability/runner/main.go b/test/scalability/runner/main.go index 2ce9f1d30f..b134845506 100644 --- a/test/scalability/runner/main.go +++ b/test/scalability/runner/main.go @@ -196,6 +196,22 @@ func main() { os.Exit(1) } + err = recorder.WriteSummary(path.Join(*outputDir, "summary.yaml")) + if err != nil { + log.Error(err, "Writing summary") + os.Exit(1) + } + err = recorder.WriteCQCsv(path.Join(*outputDir, "cqStates.csv")) + if err != nil { + log.Error(err, "Writing cq csv") + os.Exit(1) + } + err = recorder.WriteWLCsv(path.Join(*outputDir, "wlStates.csv")) + if err != nil { + log.Error(err, "Writing wl csv") + os.Exit(1) + } + if *minimalKueuePath == "" { c, err := client.New(cfg, client.Options{Scheme: scheme}) if err != nil { diff --git a/test/scalability/runner/recorder/recorder.go b/test/scalability/runner/recorder/recorder.go index 442f9454ca..065addc4ac 100644 --- a/test/scalability/runner/recorder/recorder.go +++ b/test/scalability/runner/recorder/recorder.go @@ -18,27 +18,124 @@ package recorder import ( "context" + "encoding/csv" + "os" + "strconv" "sync/atomic" "time" apimeta "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/yaml" kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" + "sigs.k8s.io/kueue/test/scalability/runner/generator" ) -type CQStatus struct { - Name string +type CQEvent struct { + Time time.Time + Name string + ClassName string + Cohort string + UID types.UID + + CPUReservation int64 + CPUUsage int64 + CPUQuota int64 PendingWorkloads int32 ReservingWorkloads int32 + AdmittedWorkloads int32 Active bool } -type Store map[string]CQStatus +type CQState struct { + FirstEventTime time.Time + FirstActiveTime time.Time + CPUUsed int64 + CPUMaxUsage int64 + LastEvent *CQEvent +} + +var CQStateCsvHeader = []string{ + "name", + "cohort", + "class name", + "CPU quota (mCPU)", + "CPU used (mCPU * ms)", + "CPU max usage (mCPU)", + "monitor time (ms)", +} + +func (cqs *CQState) CsvRecord() []string { + monitoringTimeMs := cqs.LastEvent.Time.Sub(cqs.FirstEventTime).Milliseconds() + return []string{ + cqs.LastEvent.Name, + cqs.LastEvent.Cohort, + cqs.LastEvent.ClassName, + strconv.FormatInt(cqs.LastEvent.CPUQuota, 10), + strconv.FormatInt(cqs.CPUUsed, 10), + strconv.FormatInt(cqs.CPUMaxUsage, 10), + strconv.FormatInt(monitoringTimeMs, 10), + } +} + +type CQStore map[string]*CQState + +type WLEvent struct { + Time time.Time + types.NamespacedName + UID types.UID + ClassName string + Admitted bool + Evicted bool + Finished bool +} + +type WLState struct { + Id int + types.NamespacedName + ClassName string + FirstEventTime time.Time + TimeToAdmitMs int64 + TimeToFinishedMs int64 + EvictionCount int32 + LastEvent *WLEvent +} + +var WLStateCsvHeader = []string{ + "id", + "class name", + "namespace", + "name", + "ms to admitted", + "ms to finish", + "num evictions", +} + +func (wls *WLState) CsvRecord() []string { + return []string{ + strconv.Itoa(wls.Id), + wls.ClassName, + wls.Namespace, + wls.Name, + strconv.FormatInt(wls.TimeToAdmitMs, 10), + strconv.FormatInt(wls.TimeToFinishedMs, 10), + strconv.FormatInt(int64(wls.EvictionCount), 10), + } +} + +type WLStore map[types.UID]*WLState + +type Store struct { + CQ CQStore + WL WLStore +} type Recorder struct { maxRecording time.Duration running atomic.Bool - evChan chan CQStatus + cqEvChan chan *CQEvent + wlEvChan chan *WLEvent Store Store } @@ -47,17 +144,67 @@ func New(maxRecording time.Duration) *Recorder { return &Recorder{ maxRecording: maxRecording, running: atomic.Bool{}, - evChan: make(chan CQStatus, 10), - Store: map[string]CQStatus{}, + cqEvChan: make(chan *CQEvent, 10), + wlEvChan: make(chan *WLEvent, 10), + Store: Store{ + CQ: make(CQStore), + WL: make(WLStore), + }, + } +} + +func (r *Recorder) recordCQEvent(ev *CQEvent) { + state, found := r.Store.CQ[ev.Name] + if !found { + state = &CQState{ + FirstEventTime: ev.Time, + LastEvent: ev, + } + r.Store.CQ[ev.Name] = state + } else { + if state.LastEvent.CPUUsage > 0 { + state.CPUUsed += state.LastEvent.CPUUsage * ev.Time.Sub(state.LastEvent.Time).Milliseconds() + } + state.LastEvent = ev } + + if ev.Active && state.FirstActiveTime.IsZero() { + state.FirstActiveTime = ev.Time + } + state.CPUMaxUsage = max(state.CPUMaxUsage, ev.CPUUsage) } -func (r *Recorder) record(ev CQStatus) { - r.Store[ev.Name] = ev +func (r *Recorder) recordWLEvent(ev *WLEvent) { + state, found := r.Store.WL[ev.UID] + if !found { + state = &WLState{ + Id: len(r.Store.WL), + NamespacedName: ev.NamespacedName, + ClassName: ev.ClassName, + FirstEventTime: ev.Time, + LastEvent: &WLEvent{}, + } + r.Store.WL[ev.UID] = state + } + + if ev.Admitted && !state.LastEvent.Admitted { + state.TimeToAdmitMs = ev.Time.Sub(state.FirstEventTime).Milliseconds() + } + + if ev.Evicted && !state.LastEvent.Evicted { + state.EvictionCount++ + } + + if ev.Finished && !state.LastEvent.Finished { + state.TimeToFinishedMs = ev.Time.Sub(state.FirstEventTime).Milliseconds() + } + + state.LastEvent = ev } func (r *Recorder) expectMoreEvents() bool { - for _, s := range r.Store { + for _, cqStatus := range r.Store.CQ { + s := cqStatus.LastEvent if (s.PendingWorkloads > 0 || s.ReservingWorkloads > 0) && s.Active { return true } @@ -65,6 +212,176 @@ func (r *Recorder) expectMoreEvents() bool { return false } +type CQGroupSummary struct { + CPUUsed int64 `json:"cpuUsed"` + CPUAverageUsage int64 `json:"cpuAverageUsage"` + NominalQuota int64 `json:"nominalQuota"` + FirstEventTime time.Time `json:"firstEventTime"` + LastEventTime time.Time `json:"lastEventTime"` +} + +func (qgs *CQGroupSummary) AddQueueSummary(qs *CQState) { + qgs.CPUUsed += qs.CPUUsed + qgs.NominalQuota += qs.LastEvent.CPUQuota + if qs.FirstEventTime.Before(qgs.FirstEventTime) { + qgs.FirstEventTime = qs.FirstEventTime + } + if qs.LastEvent.Time.After(qgs.LastEventTime) { + qgs.LastEventTime = qs.LastEvent.Time + } +} + +func (qgs *CQGroupSummary) refreshAverage() { + monitoringTime := qgs.LastEventTime.Sub(qgs.FirstEventTime).Milliseconds() + if monitoringTime > 0 { + qgs.CPUAverageUsage = qgs.CPUUsed / monitoringTime + } +} + +func newCQGroupSummary(qs *CQState) *CQGroupSummary { + ret := &CQGroupSummary{ + CPUUsed: qs.CPUUsed, + CPUAverageUsage: 0, + NominalQuota: qs.LastEvent.CPUQuota, + FirstEventTime: qs.FirstEventTime, + LastEventTime: qs.LastEvent.Time, + } + return ret +} + +type WorkloadsClassSummary struct { + Count int32 `json:"count"` + totalTimeToAdmissionMs int64 `json:"-"` + totalTimeToFinishMs int64 `json:"-"` + TotalEvictions int32 `json:"totalEvictions"` + AverageTimeToAdmissionMs int64 `json:"averageTimeToAdmissionMs"` + AverageTimeToFinishMs int64 `json:"averageTimeToFinishMs"` +} + +func (wcs *WorkloadsClassSummary) refreshAverage() { + if wcs == nil || wcs.Count == 0 { + return + } + wcs.AverageTimeToAdmissionMs = wcs.totalTimeToAdmissionMs / int64(wcs.Count) + wcs.AverageTimeToFinishMs = wcs.totalTimeToFinishMs / int64(wcs.Count) +} + +type Summary struct { + ClusterQueueClasses map[string]*CQGroupSummary `json:"clusterQueueClasses"` + WorkloadClasses map[string]*WorkloadsClassSummary `json:"workloadClasses"` +} + +func (r *Recorder) WriteSummary(path string) error { + summary := Summary{ + ClusterQueueClasses: map[string]*CQGroupSummary{}, + WorkloadClasses: map[string]*WorkloadsClassSummary{}, + } + + for _, cqState := range r.Store.CQ { + if cqState.LastEvent == nil { + continue + } + if groupSummary, found := summary.ClusterQueueClasses[cqState.LastEvent.ClassName]; found { + groupSummary.AddQueueSummary(cqState) + } else { + summary.ClusterQueueClasses[cqState.LastEvent.ClassName] = newCQGroupSummary(cqState) + } + } + + for _, group := range summary.ClusterQueueClasses { + group.refreshAverage() + } + + for _, wlState := range r.Store.WL { + if class, found := summary.WorkloadClasses[wlState.ClassName]; !found { + summary.WorkloadClasses[wlState.ClassName] = &WorkloadsClassSummary{ + Count: 1, + totalTimeToAdmissionMs: wlState.TimeToAdmitMs, + totalTimeToFinishMs: wlState.TimeToFinishedMs, + TotalEvictions: wlState.EvictionCount, + } + } else { + class.Count++ + class.totalTimeToAdmissionMs += wlState.TimeToAdmitMs + class.totalTimeToFinishMs += wlState.TimeToFinishedMs + class.TotalEvictions += wlState.EvictionCount + } + } + + for _, class := range summary.WorkloadClasses { + class.refreshAverage() + } + + bytes, err := yaml.Marshal(summary) + if err != nil { + return err + } + + return os.WriteFile(path, bytes, 0666) +} + +func (r *Recorder) WriteCQCsv(path string) (err error) { + var f *os.File + f, err = os.Create(path) + if err != nil { + return err + } + defer f.Close() + cWriter := csv.NewWriter(f) + + defer func() { + cWriter.Flush() + if err == nil { + err = cWriter.Error() + } + }() + + err = cWriter.Write(CQStateCsvHeader) + if err != nil { + return err + } + + for _, cqs := range r.Store.CQ { + err = cWriter.Write(cqs.CsvRecord()) + if err != nil { + return err + } + } + + return err +} + +func (r *Recorder) WriteWLCsv(path string) (err error) { + var f *os.File + f, err = os.Create(path) + if err != nil { + return err + } + defer f.Close() + cWriter := csv.NewWriter(f) + + defer func() { + cWriter.Flush() + if err == nil { + err = cWriter.Error() + } + }() + + err = cWriter.Write(WLStateCsvHeader) + if err != nil { + return err + } + + for _, ev := range r.Store.WL { + err = cWriter.Write(ev.CsvRecord()) + if err != nil { + return err + } + } + + return err +} + func (r *Recorder) Run(ctx context.Context, genDone <-chan struct{}) error { r.running.Store(true) defer r.running.Store(false) @@ -83,24 +400,66 @@ func (r *Recorder) Run(ctx context.Context, genDone <-chan struct{}) error { select { case <-ctx.Done(): return ctx.Err() - case ev := <-r.evChan: - r.record(ev) + case ev := <-r.cqEvChan: + r.recordCQEvent(ev) if generateDone.Load() && !r.expectMoreEvents() { return nil } + case ev := <-r.wlEvChan: + r.recordWLEvent(ev) } } } -func (r *Recorder) RecordCQStatus(cq *kueue.ClusterQueue) { +func (r *Recorder) RecordWorkloadState(wl *kueue.Workload) { if !r.running.Load() { return } + r.wlEvChan <- &WLEvent{ + Time: time.Now(), + NamespacedName: types.NamespacedName{ + Namespace: wl.Namespace, + Name: wl.Name, + }, + UID: wl.UID, + ClassName: wl.Labels[generator.ClassLabel], + Admitted: apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadAdmitted), + Evicted: apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadEvicted), + Finished: apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadFinished), + } +} + +func (r *Recorder) RecordCQState(cq *kueue.ClusterQueue) { + if !r.running.Load() { + return + } + + var cpuReserved, cpuUsed, cpuQuota int64 + if len(cq.Status.FlavorsReservation) > 0 && len(cq.Status.FlavorsReservation[0].Resources) > 0 { + cpuReserved = cq.Status.FlavorsReservation[0].Resources[0].Total.MilliValue() + } + + if len(cq.Status.FlavorsUsage) > 0 && len(cq.Status.FlavorsUsage[0].Resources) > 0 { + cpuUsed = cq.Status.FlavorsUsage[0].Resources[0].Total.MilliValue() + } + + if len(cq.Spec.ResourceGroups) > 0 && len(cq.Spec.ResourceGroups[0].Flavors) > 0 && len(cq.Spec.ResourceGroups[0].Flavors[0].Resources) > 0 { + cpuQuota = cq.Spec.ResourceGroups[0].Flavors[0].Resources[0].NominalQuota.MilliValue() + } + + r.cqEvChan <- &CQEvent{ + Time: time.Now(), + Name: cq.Name, + ClassName: cq.Labels[generator.ClassLabel], + Cohort: cq.Spec.Cohort, + UID: cq.UID, - r.evChan <- CQStatus{ - Name: cq.Name, + CPUReservation: cpuReserved, + CPUUsage: cpuUsed, + CPUQuota: cpuQuota, PendingWorkloads: cq.Status.PendingWorkloads, ReservingWorkloads: cq.Status.ReservingWorkloads, + AdmittedWorkloads: cq.Status.AdmittedWorkloads, Active: apimeta.IsStatusConditionTrue(cq.Status.Conditions, kueue.AdmissionCheckActive), } } From 34dc915d93ca1b206672dfe96943765a1ac15067 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Mon, 22 Apr 2024 16:00:28 +0800 Subject: [PATCH 10/49] Clickable headers in documentation (#2006) --- site/layouts/_default/_markup/render-heading.html | 6 ++++++ site/layouts/partials/anchor.html | 4 ++++ 2 files changed, 10 insertions(+) create mode 100644 site/layouts/_default/_markup/render-heading.html create mode 100644 site/layouts/partials/anchor.html diff --git a/site/layouts/_default/_markup/render-heading.html b/site/layouts/_default/_markup/render-heading.html new file mode 100644 index 0000000000..e322af4159 --- /dev/null +++ b/site/layouts/_default/_markup/render-heading.html @@ -0,0 +1,6 @@ + +{{ .Text | safeHTML }} + + {{ partial "anchor.html" . }} + + diff --git a/site/layouts/partials/anchor.html b/site/layouts/partials/anchor.html new file mode 100644 index 0000000000..83e19c7441 --- /dev/null +++ b/site/layouts/partials/anchor.html @@ -0,0 +1,4 @@ + + + + \ No newline at end of file From 5762eee822b610d518b539423859f6fcff436476 Mon Sep 17 00:00:00 2001 From: Oleksandr Redko Date: Mon, 22 Apr 2024 11:34:36 +0300 Subject: [PATCH 11/49] [jobframework] Fix logging of error message (#1944) --- .golangci.yaml | 1 + pkg/controller/jobframework/reconciler.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.golangci.yaml b/.golangci.yaml index b4ba30a602..6c0fdd3289 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -29,6 +29,7 @@ linters: - gocritic - goimports - govet + - loggercheck - misspell - unconvert diff --git a/pkg/controller/jobframework/reconciler.go b/pkg/controller/jobframework/reconciler.go index 26b67f5e88..e61257e4ac 100644 --- a/pkg/controller/jobframework/reconciler.go +++ b/pkg/controller/jobframework/reconciler.go @@ -402,7 +402,7 @@ func (r *JobReconciler) ReconcileGenericJob(ctx context.Context, req ctrl.Reques // Mark the workload as finished with failure since the is no point to retry. errUpdateStatus := workload.UpdateStatus(ctx, r.client, wl, kueue.WorkloadFinished, metav1.ConditionTrue, FailedToStartFinishedReason, err.Error(), constants.JobControllerName) if errUpdateStatus != nil { - log.Error(errUpdateStatus, "Updating workload status, on start failure %s", err.Error()) + log.Error(errUpdateStatus, "Updating workload status, on start failure", "err", err) } return ctrl.Result{}, errUpdateStatus } From 9723760039ed23338d4115b0843e0fc8b892426b Mon Sep 17 00:00:00 2001 From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com> Date: Mon, 22 Apr 2024 05:05:52 -0400 Subject: [PATCH 12/49] Rule s2-b for flat fair preemption (#2002) * Rule s2-b for flat fair preemption Change-Id: Iac87a154d8fe2e65b7e6a2097037f509a7d46b44 * Simplified algorithm and extra test cases Change-Id: I84df52813e3f2990f4a1699ee66f7ac35d7caefc * review Change-Id: Ifd8d09dd2768d68c67e20ce2f3280128d6f63057 --- pkg/queue/cluster_queue.go | 2 +- pkg/scheduler/preemption/preemption.go | 80 ++++++++++++++------- pkg/scheduler/preemption/preemption_test.go | 77 ++++++++++++++++++++ pkg/util/heap/heap.go | 4 +- 4 files changed, 134 insertions(+), 29 deletions(-) diff --git a/pkg/queue/cluster_queue.go b/pkg/queue/cluster_queue.go index c25018f156..d252e07591 100644 --- a/pkg/queue/cluster_queue.go +++ b/pkg/queue/cluster_queue.go @@ -95,7 +95,7 @@ func newClusterQueue(cq *kueue.ClusterQueue, wo workload.Ordering) (*ClusterQueu func newClusterQueueImpl(wo workload.Ordering, clock clock.Clock) *ClusterQueue { lessFunc := queueOrderingFunc(wo) return &ClusterQueue{ - heap: heap.New(workloadKey, lessFunc), + heap: *heap.New(workloadKey, lessFunc), inadmissibleWorkloads: make(map[string]*workload.Info), queueInadmissibleCycle: -1, lessFunc: lessFunc, diff --git a/pkg/scheduler/preemption/preemption.go b/pkg/scheduler/preemption/preemption.go index 5605859dc2..52681a4fad 100644 --- a/pkg/scheduler/preemption/preemption.go +++ b/pkg/scheduler/preemption/preemption.go @@ -256,35 +256,13 @@ func restoreSnapshot(snapshot *cache.Snapshot, targets []*workload.Info) { } func fairPreemptions(wl *workload.Info, assignment flavorassigner.Assignment, snapshot *cache.Snapshot, resPerFlv resourcesPerFlavor, candidates []*workload.Info, allowBorrowingBelowPriority *int32) []*workload.Info { - cqHeap := heap.New( - func(c *candidateCQ) string { - return c.cq.Name - }, - func(c1, c2 *candidateCQ) bool { - return c1.share > c2.share - }, - ) - for _, cand := range candidates { - candCQ := cqHeap.GetByKey(cand.ClusterQueue) - if candCQ == nil { - cq := snapshot.ClusterQueues[cand.ClusterQueue] - share, _ := cq.DominantResourceShare() - candCQ = &candidateCQ{ - cq: cq, - share: share, - workloads: []*workload.Info{cand}, - } - _ = cqHeap.PushIfNotPresent(candCQ) - } else { - candCQ.workloads = append(candCQ.workloads, cand) - } - } - + cqHeap := cqHeapFromCandidates(candidates, false, snapshot) nominatedCQ := snapshot.ClusterQueues[wl.ClusterQueue] newNominatedShareValue, _ := nominatedCQ.DominantResourceShareWith(wl) wlReq := totalRequestsForAssignment(wl, assignment) var targets []*workload.Info fits := false + var retryCandidates []*workload.Info for cqHeap.Len() > 0 && !fits { candCQ := cqHeap.Pop() @@ -323,12 +301,35 @@ func fairPreemptions(wl *workload.Info, assignment flavorassigner.Assignment, sn } // Might need to pick a different CQ due to changing values. break + } else { + retryCandidates = append(retryCandidates, candCQ.workloads[i]) } } } if !fits { - restoreSnapshot(snapshot, targets) - return nil + // Try rule S2-b in https://sigs.k8s.io/kueue/keps/1714-fair-sharing#choosing-workloads-from-clusterqueues-for-preemption + // if rule S2-a was not enough. + cqHeap = cqHeapFromCandidates(retryCandidates, true, snapshot) + + for cqHeap.Len() > 0 && !fits { + candCQ := cqHeap.Pop() + if newNominatedShareValue < candCQ.share { + // The criteria doesn't depend on the preempted workload, so just preempt the first candidate. + candWl := candCQ.workloads[0] + snapshot.RemoveWorkload(candWl) + targets = append(targets, candWl) + if workloadFits(wlReq, nominatedCQ, true) { + fits = true + } + // No requeueing because there doesn't seem to be an scenario where + // it's possible to apply rule S2-b more than once in a CQ. + } + } + + if !fits { + restoreSnapshot(snapshot, targets) + return nil + } } targets = fillBackWorkloads(targets, wlReq, nominatedCQ, snapshot, true) restoreSnapshot(snapshot, targets) @@ -341,6 +342,33 @@ type candidateCQ struct { share int } +func cqHeapFromCandidates(candidates []*workload.Info, firstOnly bool, snapshot *cache.Snapshot) *heap.Heap[candidateCQ] { + cqHeap := heap.New( + func(c *candidateCQ) string { + return c.cq.Name + }, + func(c1, c2 *candidateCQ) bool { + return c1.share > c2.share + }, + ) + for _, cand := range candidates { + candCQ := cqHeap.GetByKey(cand.ClusterQueue) + if candCQ == nil { + cq := snapshot.ClusterQueues[cand.ClusterQueue] + share, _ := cq.DominantResourceShare() + candCQ = &candidateCQ{ + cq: cq, + share: share, + workloads: []*workload.Info{cand}, + } + cqHeap.PushOrUpdate(candCQ) + } else if !firstOnly { + candCQ.workloads = append(candCQ.workloads, cand) + } + } + return cqHeap +} + type resourcesPerFlavor map[kueue.ResourceFlavorReference]sets.Set[corev1.ResourceName] func resourcesRequiringPreemption(assignment flavorassigner.Assignment) resourcesPerFlavor { diff --git a/pkg/scheduler/preemption/preemption_test.go b/pkg/scheduler/preemption/preemption_test.go index 9351d2b4c7..421b2c1461 100644 --- a/pkg/scheduler/preemption/preemption_test.go +++ b/pkg/scheduler/preemption/preemption_test.go @@ -1522,6 +1522,83 @@ func TestFairPreemptions(t *testing.T) { targetCQ: "a", wantPreempted: sets.New("/a_low", "/b1"), }, + "preempt huge workload if there is no other option, as long as the target CQ gets a lower share": { + admitted: []kueue.Workload{ + *utiltesting.MakeWorkload("b1", "").Request(corev1.ResourceCPU, "9").SimpleReserveQuota("b", "default", now).Obj(), + }, + incoming: utiltesting.MakeWorkload("a_incoming", "").Request(corev1.ResourceCPU, "2").Obj(), + targetCQ: "a", + wantPreempted: sets.New("/b1"), + }, + "can't preempt huge workload if the incoming is also huge": { + admitted: []kueue.Workload{ + *utiltesting.MakeWorkload("a1", "").Request(corev1.ResourceCPU, "2").SimpleReserveQuota("a", "default", now).Obj(), + *utiltesting.MakeWorkload("b1", "").Request(corev1.ResourceCPU, "7").SimpleReserveQuota("b", "default", now).Obj(), + }, + incoming: utiltesting.MakeWorkload("a_incoming", "").Request(corev1.ResourceCPU, "5").Obj(), + targetCQ: "a", + }, + "can't preempt 2 smaller workloads if the incoming is huge": { + admitted: []kueue.Workload{ + *utiltesting.MakeWorkload("b1", "").Request(corev1.ResourceCPU, "2").SimpleReserveQuota("b", "default", now).Obj(), + *utiltesting.MakeWorkload("b2", "").Request(corev1.ResourceCPU, "2").SimpleReserveQuota("b", "default", now).Obj(), + *utiltesting.MakeWorkload("b3", "").Request(corev1.ResourceCPU, "3").SimpleReserveQuota("b", "default", now).Obj(), + }, + incoming: utiltesting.MakeWorkload("a_incoming", "").Request(corev1.ResourceCPU, "6").Obj(), + targetCQ: "a", + }, + "preempt from target and others even if over nominal": { + admitted: []kueue.Workload{ + *utiltesting.MakeWorkload("a1_low", "").Priority(-1).Request(corev1.ResourceCPU, "2").SimpleReserveQuota("b", "default", now).Obj(), + *utiltesting.MakeWorkload("a2_low", "").Priority(-1).Request(corev1.ResourceCPU, "1").SimpleReserveQuota("b", "default", now).Obj(), + *utiltesting.MakeWorkload("b1", "").Request(corev1.ResourceCPU, "3").SimpleReserveQuota("b", "default", now).Obj(), + *utiltesting.MakeWorkload("b2", "").Request(corev1.ResourceCPU, "3").SimpleReserveQuota("b", "default", now).Obj(), + }, + incoming: utiltesting.MakeWorkload("a_incoming", "").Request(corev1.ResourceCPU, "4").Obj(), + targetCQ: "a", + wantPreempted: sets.New("/a1_low", "/b1"), + }, + "prefer to preempt workloads that don't make the target CQ have the biggest share": { + admitted: []kueue.Workload{ + *utiltesting.MakeWorkload("b1", "").Request(corev1.ResourceCPU, "2").SimpleReserveQuota("b", "default", now).Obj(), + *utiltesting.MakeWorkload("b2", "").Request(corev1.ResourceCPU, "1").SimpleReserveQuota("b", "default", now).Obj(), + *utiltesting.MakeWorkload("b3", "").Request(corev1.ResourceCPU, "2").SimpleReserveQuota("b", "default", now).Obj(), + *utiltesting.MakeWorkload("c1", "").Request(corev1.ResourceCPU, "1").SimpleReserveQuota("c", "default", now).Obj(), + }, + incoming: utiltesting.MakeWorkload("a_incoming", "").Request(corev1.ResourceCPU, "3.5").Obj(), + targetCQ: "a", + // It would have been possible to preempt "/b1" under rule S2-b, but S2-a was possible first. + wantPreempted: sets.New("/b2"), + }, + "preempt from different cluster queues if the end result has a smaller max share": { + admitted: []kueue.Workload{ + *utiltesting.MakeWorkload("b1", "").Request(corev1.ResourceCPU, "2").SimpleReserveQuota("b", "default", now).Obj(), + *utiltesting.MakeWorkload("b2", "").Request(corev1.ResourceCPU, "2.5").SimpleReserveQuota("b", "default", now).Obj(), + *utiltesting.MakeWorkload("c1", "").Request(corev1.ResourceCPU, "2").SimpleReserveQuota("c", "default", now).Obj(), + *utiltesting.MakeWorkload("c2", "").Request(corev1.ResourceCPU, "2.5").SimpleReserveQuota("c", "default", now).Obj(), + }, + incoming: utiltesting.MakeWorkload("a_incoming", "").Request(corev1.ResourceCPU, "3.5").Obj(), + targetCQ: "a", + wantPreempted: sets.New("/b1", "/c1"), + }, + "scenario above does not flap": { + admitted: []kueue.Workload{ + *utiltesting.MakeWorkload("a1", "").Request(corev1.ResourceCPU, "3.5").SimpleReserveQuota("a", "default", now).Obj(), + *utiltesting.MakeWorkload("b2", "").Request(corev1.ResourceCPU, "2.5").SimpleReserveQuota("b", "default", now).Obj(), + *utiltesting.MakeWorkload("c2", "").Request(corev1.ResourceCPU, "2.5").SimpleReserveQuota("c", "default", now).Obj(), + }, + incoming: utiltesting.MakeWorkload("b_incoming", "").Request(corev1.ResourceCPU, "2").Obj(), + targetCQ: "b", + }, + "cannot preempt if it would make the candidate CQ go under nominal after preempting one element": { + admitted: []kueue.Workload{ + *utiltesting.MakeWorkload("b1", "").Request(corev1.ResourceCPU, "3").SimpleReserveQuota("b", "default", now).Obj(), + *utiltesting.MakeWorkload("b2", "").Request(corev1.ResourceCPU, "3").SimpleReserveQuota("b", "default", now).Obj(), + *utiltesting.MakeWorkload("c1", "").Request(corev1.ResourceCPU, "3").SimpleReserveQuota("c", "default", now).Obj(), + }, + incoming: utiltesting.MakeWorkload("a_incoming", "").Request(corev1.ResourceCPU, "4").Obj(), + targetCQ: "a", + }, "workloads under priority threshold can always be preempted": { admitted: []kueue.Workload{ *unitWl.Clone().Name("a1").SimpleReserveQuota("a", "default", now).Obj(), diff --git a/pkg/util/heap/heap.go b/pkg/util/heap/heap.go index ebad36dc6b..d993434325 100644 --- a/pkg/util/heap/heap.go +++ b/pkg/util/heap/heap.go @@ -171,8 +171,8 @@ func (h *Heap[T]) List() []*T { } // New returns a Heap which can be used to queue up items to process. -func New[T any](keyFn keyFunc[T], lessFn lessFunc[T]) Heap[T] { - return Heap[T]{ +func New[T any](keyFn keyFunc[T], lessFn lessFunc[T]) *Heap[T] { + return &Heap[T]{ data: data[T]{ items: make(map[string]*heapItem[T]), keyFunc: keyFn, From 7880819ae316be7bd9d906c75b7eeb15cd26f133 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 03:28:45 -0700 Subject: [PATCH 13/49] Bump github.com/onsi/gomega from 1.32.0 to 1.33.0 (#2028) Bumps [github.com/onsi/gomega](https://github.com/onsi/gomega) from 1.32.0 to 1.33.0. - [Release notes](https://github.com/onsi/gomega/releases) - [Changelog](https://github.com/onsi/gomega/blob/master/CHANGELOG.md) - [Commits](https://github.com/onsi/gomega/compare/v1.32.0...v1.33.0) --- updated-dependencies: - dependency-name: github.com/onsi/gomega dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index ee0d082ba9..c5622297bb 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/kubeflow/mpi-operator v0.5.0 github.com/kubeflow/training-operator v1.7.0 github.com/onsi/ginkgo/v2 v2.17.1 - github.com/onsi/gomega v1.32.0 + github.com/onsi/gomega v1.33.0 github.com/open-policy-agent/cert-controller v0.10.1 github.com/prometheus/client_golang v1.18.0 github.com/prometheus/client_model v0.6.1 diff --git a/go.sum b/go.sum index 8dd1eabf2a..954c1391b5 100644 --- a/go.sum +++ b/go.sum @@ -153,8 +153,8 @@ github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= github.com/onsi/ginkgo/v2 v2.17.1 h1:V++EzdbhI4ZV4ev0UTIj0PzhzOcReJFyJaLjtSF55M8= github.com/onsi/ginkgo/v2 v2.17.1/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= -github.com/onsi/gomega v1.32.0 h1:JRYU78fJ1LPxlckP6Txi/EYqJvjtMrDC04/MM5XRHPk= -github.com/onsi/gomega v1.32.0/go.mod h1:a4x4gW6Pz2yK1MAmvluYme5lvYTn61afQ2ETw/8n4Lg= +github.com/onsi/gomega v1.33.0 h1:snPCflnZrpMsy94p4lXVEkHo12lmPnc3vY5XBbreexE= +github.com/onsi/gomega v1.33.0/go.mod h1:+925n5YtiFsLzzafLUHzVMBpvvRAzrydIBiSIxjX3wY= github.com/open-policy-agent/cert-controller v0.10.1 h1:RXSYoyn8FdCenWecRP//UV5nbVfmstNpj4kHQFkvPK4= github.com/open-policy-agent/cert-controller v0.10.1/go.mod h1:4uRbBLY5DsPOog+a9pqk3JLxuuhrWsbUedQW65HcLTI= github.com/open-policy-agent/frameworks/constraint v0.0.0-20230822235116-f0b62fe1e4c4 h1:5dum5SLEz+95JDLkMls7Z7IDPjvSq3UhJSFe4f5einQ= From 63f46ac9e56a9a988c978945629b135dcdfdc8e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?= Date: Mon, 22 Apr 2024 18:52:31 +0200 Subject: [PATCH 14/49] Make the defaults for PodsReady backoff more practical (#2025) --- apis/config/v1beta1/configuration_types.go | 10 +++---- .../README.md | 26 ++++++++-------- pkg/controller/core/core.go | 30 +++++++++++++++++-- pkg/controller/core/workload_controller.go | 28 ++++++++++------- .../core/workload_controller_test.go | 7 +++-- .../scheduler/podsready/suite_test.go | 3 +- 6 files changed, 69 insertions(+), 35 deletions(-) diff --git a/apis/config/v1beta1/configuration_types.go b/apis/config/v1beta1/configuration_types.go index f21f2dd79d..88f36ddaf5 100644 --- a/apis/config/v1beta1/configuration_types.go +++ b/apis/config/v1beta1/configuration_types.go @@ -238,12 +238,12 @@ type RequeuingStrategy struct { // Once the number is reached, the workload is deactivated (`.spec.activate`=`false`). // When it is null, the workloads will repeatedly and endless re-queueing. // - // Every backoff duration is about "1.41284738^(n-1)+Rand" where the "n" represents the "workloadStatus.requeueState.count", - // and the "Rand" represents the random jitter. During this time, the workload is taken as an inadmissible and + // Every backoff duration is about "10s*2^(n-1)+Rand" where: + // - "n" represents the "workloadStatus.requeueState.count", + // - "Rand" represents the random jitter. + // During this time, the workload is taken as an inadmissible and // other workloads will have a chance to be admitted. - // For example, when the "waitForPodsReady.timeout" is the default, the workload deactivation time is as follows: - // {backoffLimitCount, workloadDeactivationSeconds} - // ~= {1, 601}, {2, 902}, ...,{5, 1811}, ...,{10, 3374}, ...,{20, 8730}, ...,{30, 86400(=24 hours)}, ... + // By default, the consecutive requeue delays are around: (10s, 20s, 40s, ...). // // Defaults to null. // +optional diff --git a/keps/1282-pods-ready-requeue-strategy/README.md b/keps/1282-pods-ready-requeue-strategy/README.md index df4af8a1b2..b025b3bb9f 100644 --- a/keps/1282-pods-ready-requeue-strategy/README.md +++ b/keps/1282-pods-ready-requeue-strategy/README.md @@ -143,12 +143,12 @@ type RequeuingStrategy struct { // Once the number is reached, the workload is deactivated (`.spec.activate`=`false`). // When it is null, the workloads will repeatedly and endless re-queueing. // - // Every backoff duration is about "1.41284738^(n-1)+Rand" where the "n" represents the "workloadStatus.requeueState.count", - // and the "Rand" represents the random jitter. During this time, the workload is taken as an inadmissible and + // Every backoff duration is about "10s*2^(n-1)+Rand" where: + // - "n" represents the "workloadStatus.requeueState.count", + // - "Rand" represents the random jitter. + // During this time, the workload is taken as an inadmissible and // other workloads will have a chance to be admitted. - // For example, when the "waitForPodsReady.timeout" is the default, the workload deactivation time is as follows: - // {backoffLimitCount, workloadDeactivationSeconds} - // ~= {1, 601}, {2, 902}, ...,{5, 1811}, ...,{10, 3374}, ...,{20, 8730}, ...,{30, 86400(=24 hours)}, ... + // By default, the consecutive requeue delays are around: (10s, 20s, 40s, ...). // // Defaults to null. // +optional @@ -222,16 +222,16 @@ the queueManager holds the evicted workloads as inadmissible workloads while exp Duration this time, other workloads will have a chance to be admitted. The queueManager calculates an exponential backoff duration by [the Step function](https://pkg.go.dev/k8s.io/apimachinery/pkg/util/wait@v0.29.1#Backoff.Step) -according to the $1.41284738^{(n-1)}+Rand$ where the $n$ represents the `workloadStatus.requeueState.count`, and the $Rand$ represents the random jitter. +according to the $10s*2^{(n-1)}+Rand$ where the $n$ represents the `workloadStatus.requeueState.count`, and the $Rand$ represents the random jitter. -Considering the `.waitForPodsReady.timeout` (default: 300 seconds), -this duration indicates that an evicted workload with `PodsReadyTimeout` reason is continued re-queuing -for the following period where the $t$ represents `.waitForPodsReady.timeout`: +It will spend awaiting to be requeued after eviction: +$$\sum_{k=1}^{n}(10s*2^{(k-1)} + Rand)$$ -$$t(n+1) + \sum_{k=1}^{n}(1.41284738^{(k-1)} + Rand)$$ - -Given that the `backoffLimitCount` equals `30` and the `waitForPodsReady.timeout` equals `300` (default), -the result equals 24 hours (+ $Rand$ seconds). +Assuming `backoffLimitCount` equals 10, and the workload is requeued 10 times +after failing to have all pods ready, then the total time awaiting for requeue +will take (neglecting the jitter): `10s+20s+40s +...+7680s=2h 8min`. +Also, considering `.waitForPodsReady.timeout=300s` (default), +the workload will spend `50min` total waiting for pods ready. #### Evaluation diff --git a/pkg/controller/core/core.go b/pkg/controller/core/core.go index b29eb39aa7..94beae3232 100644 --- a/pkg/controller/core/core.go +++ b/pkg/controller/core/core.go @@ -28,11 +28,26 @@ import ( "sigs.k8s.io/kueue/pkg/queue" ) -const updateChBuffer = 10 +const ( + updateChBuffer = 10 + defaultRequeuingBaseDelaySeconds = 10 +) + +type ControllerOptions struct { + requeuingBaseDelaySeconds int32 +} + +type ControllerOption func(*ControllerOptions) + +func WithControllerRequeuingBaseDelaySeconds(value int32) ControllerOption { + return func(o *ControllerOptions) { + o.requeuingBaseDelaySeconds = value + } +} // SetupControllers sets up the core controllers. It returns the name of the // controller that failed to create and an error, if any. -func SetupControllers(mgr ctrl.Manager, qManager *queue.Manager, cc *cache.Cache, cfg *configapi.Configuration) (string, error) { +func SetupControllers(mgr ctrl.Manager, qManager *queue.Manager, cc *cache.Cache, cfg *configapi.Configuration, controllerOpts ...ControllerOption) (string, error) { rfRec := NewResourceFlavorReconciler(mgr.GetClient(), qManager, cc) if err := rfRec.SetupWithManager(mgr, cfg); err != nil { return "ResourceFlavor", err @@ -63,11 +78,20 @@ func SetupControllers(mgr ctrl.Manager, qManager *queue.Manager, cc *cache.Cache if err := cqRec.SetupWithManager(mgr, cfg); err != nil { return "ClusterQueue", err } + ctrlOpts := ControllerOptions{ + requeuingBaseDelaySeconds: defaultRequeuingBaseDelaySeconds, + } + for _, opt := range controllerOpts { + opt(&ctrlOpts) + } + if err := NewWorkloadReconciler(mgr.GetClient(), qManager, cc, mgr.GetEventRecorderFor(constants.WorkloadControllerName), WithWorkloadUpdateWatchers(qRec, cqRec), WithPodsReadyTimeout(podsReadyTimeout(cfg)), - WithRequeuingBackoffLimitCount(requeuingBackoffLimitCount(cfg))).SetupWithManager(mgr, cfg); err != nil { + WithRequeuingBackoffLimitCount(requeuingBackoffLimitCount(cfg)), + WithRequeuingBaseDelaySeconds(ctrlOpts.requeuingBaseDelaySeconds), + ).SetupWithManager(mgr, cfg); err != nil { return "Workload", err } return "", nil diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go index 0a964ce7b1..47dbfa62e2 100644 --- a/pkg/controller/core/workload_controller.go +++ b/pkg/controller/core/workload_controller.go @@ -72,6 +72,7 @@ type options struct { watchers []WorkloadUpdateWatcher podsReadyTimeout *time.Duration requeuingBackoffLimitCount *int32 + requeuingBaseDelaySeconds int32 } // Option configures the reconciler. @@ -93,6 +94,14 @@ func WithRequeuingBackoffLimitCount(value *int32) Option { } } +// WithRequeuingBaseDelaySeconds indicates the base delay for the computation +// of the requeue delay. +func WithRequeuingBaseDelaySeconds(value int32) Option { + return func(o *options) { + o.requeuingBaseDelaySeconds = value + } +} + // WithWorkloadUpdateWatchers allows to specify the workload update watchers func WithWorkloadUpdateWatchers(value ...WorkloadUpdateWatcher) Option { return func(o *options) { @@ -115,6 +124,7 @@ type WorkloadReconciler struct { watchers []WorkloadUpdateWatcher podsReadyTimeout *time.Duration requeuingBackoffLimitCount *int32 + requeuingBaseDelaySeconds int32 recorder record.EventRecorder } @@ -132,6 +142,7 @@ func NewWorkloadReconciler(client client.Client, queues *queue.Manager, cache *c watchers: options.watchers, podsReadyTimeout: options.podsReadyTimeout, requeuingBackoffLimitCount: options.requeuingBackoffLimitCount, + requeuingBaseDelaySeconds: options.requeuingBaseDelaySeconds, recorder: recorder, } } @@ -389,17 +400,14 @@ func (r *WorkloadReconciler) triggerDeactivationOrBackoffRequeue(ctx context.Con "Deactivated Workload %q by reached re-queue backoffLimitCount", klog.KObj(wl)) return true, nil } - // Every backoff duration is about "1.41284738^(n-1)+Rand" where the "n" represents the "requeuingCount", - // and the "Rand" represents the random jitter. During this time, the workload is taken as an inadmissible and - // other workloads will have a chance to be admitted. - // Considering the ".waitForPodsReady.timeout", - // this indicates that an evicted workload with PodsReadyTimeout reason is continued re-queuing for - // the "t(n+1) + SUM[k=1,n](1.41284738^(k-1) + Rand)" seconds where the "t" represents "waitForPodsReady.timeout". - // Given that the "backoffLimitCount" equals "30" and the "waitForPodsReady.timeout" equals "300" (default), - // the result equals 24 hours (+Rand seconds). + // Every backoff duration is about "10s*2^(n-1)+Rand" where: + // - "n" represents the "requeuingCount", + // - "Rand" represents the random jitter. + // During this time, the workload is taken as an inadmissible and other + // workloads will have a chance to be admitted. backoff := &wait.Backoff{ - Duration: 1 * time.Second, - Factor: 1.41284738, + Duration: time.Duration(r.requeuingBaseDelaySeconds) * time.Second, + Factor: 2, Jitter: 0.0001, Steps: int(requeuingCount), } diff --git a/pkg/controller/core/workload_controller_test.go b/pkg/controller/core/workload_controller_test.go index 1bd9f421dc..8e16f77cd4 100644 --- a/pkg/controller/core/workload_controller_test.go +++ b/pkg/controller/core/workload_controller_test.go @@ -508,6 +508,7 @@ func TestReconcile(t *testing.T) { reconcilerOpts: []Option{ WithPodsReadyTimeout(ptr.To(3 * time.Second)), WithRequeuingBackoffLimitCount(ptr.To[int32](100)), + WithRequeuingBaseDelaySeconds(10), }, workload: utiltesting.MakeWorkload("wl", "ns"). ReserveQuota(utiltesting.MakeAdmission("q1").Obj()). @@ -523,7 +524,7 @@ func TestReconcile(t *testing.T) { Message: "Admitted by ClusterQueue q1", }). Admitted(true). - RequeueState(ptr.To[int32](29), nil). + RequeueState(ptr.To[int32](3), nil). Generation(1). Obj(), wantWorkload: utiltesting.MakeWorkload("wl", "ns"). @@ -541,8 +542,8 @@ func TestReconcile(t *testing.T) { Message: "Exceeded the PodsReady timeout ns/wl", ObservedGeneration: 1, }). - // 1.41284738^(30-1) = 22530.0558 - RequeueState(ptr.To[int32](30), ptr.To(metav1.NewTime(testStartTime.Add(22530*time.Second).Truncate(time.Second)))). + // 10s * 2^(4-1) = 80s + RequeueState(ptr.To[int32](4), ptr.To(metav1.NewTime(testStartTime.Add(80*time.Second).Truncate(time.Second)))). Obj(), }, "deactivated workload": { diff --git a/test/integration/scheduler/podsready/suite_test.go b/test/integration/scheduler/podsready/suite_test.go index 13c25ce7fe..bc2daabd4b 100644 --- a/test/integration/scheduler/podsready/suite_test.go +++ b/test/integration/scheduler/podsready/suite_test.go @@ -87,7 +87,8 @@ func managerAndSchedulerSetupWithTimeoutAdmission( queue.WithPodsReadyRequeuingTimestamp(requeuingTimestamp), ) - failedCtrl, err := core.SetupControllers(mgr, queues, cCache, cfg) + failedCtrl, err := core.SetupControllers(mgr, queues, cCache, cfg, + core.WithControllerRequeuingBaseDelaySeconds(1)) gomega.Expect(err).ToNot(gomega.HaveOccurred(), "controller", failedCtrl) failedWebhook, err := webhooks.Setup(mgr) From b2bb2bfbfb90c318fe50c5311e8334b0b00c093a Mon Sep 17 00:00:00 2001 From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com> Date: Mon, 22 Apr 2024 13:45:19 -0400 Subject: [PATCH 15/49] Fix generation of API reference (#2034) Change-Id: I58e0036c9dba6215e156a927b45554d22b28201a --- Makefile | 2 +- {site => hack}/genref/config.yaml | 0 {site => hack}/genref/markdown/members.tpl | 0 {site => hack}/genref/markdown/pkg.tpl | 0 {site => hack}/genref/markdown/type.tpl | 0 .../en/docs/reference/kueue-config.v1beta1.md | 27 +++++-- .../en/docs/reference/kueue.v1beta1.md | 75 ++++++++++++++++++- 7 files changed, 96 insertions(+), 8 deletions(-) rename {site => hack}/genref/config.yaml (100%) rename {site => hack}/genref/markdown/members.tpl (100%) rename {site => hack}/genref/markdown/pkg.tpl (100%) rename {site => hack}/genref/markdown/type.tpl (100%) diff --git a/Makefile b/Makefile index 240beee7f8..83ce334469 100644 --- a/Makefile +++ b/Makefile @@ -498,4 +498,4 @@ cluster-autoscaler-crd: .PHONY: generate-apiref generate-apiref: genref - cd $(PROJECT_DIR)/site/genref/ && $(GENREF) -o $(PROJECT_DIR)/site/content/en/docs/reference + cd $(PROJECT_DIR)/hack/genref/ && $(GENREF) -o $(PROJECT_DIR)/site/content/en/docs/reference diff --git a/site/genref/config.yaml b/hack/genref/config.yaml similarity index 100% rename from site/genref/config.yaml rename to hack/genref/config.yaml diff --git a/site/genref/markdown/members.tpl b/hack/genref/markdown/members.tpl similarity index 100% rename from site/genref/markdown/members.tpl rename to hack/genref/markdown/members.tpl diff --git a/site/genref/markdown/pkg.tpl b/hack/genref/markdown/pkg.tpl similarity index 100% rename from site/genref/markdown/pkg.tpl rename to hack/genref/markdown/pkg.tpl diff --git a/site/genref/markdown/type.tpl b/hack/genref/markdown/type.tpl similarity index 100% rename from site/genref/markdown/type.tpl rename to hack/genref/markdown/type.tpl diff --git a/site/content/en/docs/reference/kueue-config.v1beta1.md b/site/content/en/docs/reference/kueue-config.v1beta1.md index a3016ae344..55fb12778a 100644 --- a/site/content/en/docs/reference/kueue-config.v1beta1.md +++ b/site/content/en/docs/reference/kueue-config.v1beta1.md @@ -436,6 +436,21 @@ Possible options:

PodOptions defines kueue controller behaviour for pod objects

+labelKeysToCopy [Required]
+[]string + + +

labelKeysToCopy is a list of label keys that should be copied from the job into the +workload object. It is not required for the job to have all the labels from this +list. If a job does not have some label with the given key from this list, the +constructed workload object will be created without this label. In the case +of creating a workload from a composable job (pod group), if multiple objects +have labels with some key from the list, the values of these labels must +match or otherwise the workload creation would fail. The labels are copied only +during the workload creation and are not updated even if the labels of the +underlying job are changed.

+ + @@ -622,12 +637,14 @@ that was evicted due to Pod readiness. The possible values are:

BackoffLimitCount defines the maximum number of re-queuing retries. Once the number is reached, the workload is deactivated (.spec.activate=false). When it is null, the workloads will repeatedly and endless re-queueing.

-

Every backoff duration is about "1.41284738^(n-1)+Rand" where the "n" represents the "workloadStatus.requeueState.count", -and the "Rand" represents the random jitter. During this time, the workload is taken as an inadmissible and +

Every backoff duration is about "10s*2^(n-1)+Rand" where:

+
    +
  • "n" represents the "workloadStatus.requeueState.count",
  • +
  • "Rand" represents the random jitter. +During this time, the workload is taken as an inadmissible and other workloads will have a chance to be admitted. -For example, when the "waitForPodsReady.timeout" is the default, the workload deactivation time is as follows: -{backoffLimitCount, workloadDeactivationSeconds} -~= {1, 601}, {2, 902}, ...,{5, 1811}, ...,{10, 3374}, ...,{20, 8730}, ...,{30, 86400(=24 hours)}, ...

    +By default, the consecutive requeue delays are around: (10s, 20s, 40s, ...).
  • +

Defaults to null.

diff --git a/site/content/en/docs/reference/kueue.v1beta1.md b/site/content/en/docs/reference/kueue.v1beta1.md index c6ccfa3969..069a5cbff7 100644 --- a/site/content/en/docs/reference/kueue.v1beta1.md +++ b/site/content/en/docs/reference/kueue.v1beta1.md @@ -437,6 +437,66 @@ current state.

+## `AdmissionCheckStrategyRule` {#kueue-x-k8s-io-v1beta1-AdmissionCheckStrategyRule} + + +**Appears in:** + +- [AdmissionChecksStrategy](#kueue-x-k8s-io-v1beta1-AdmissionChecksStrategy) + + +

AdmissionCheckStrategyRule defines rules for a single AdmissionCheck

+ + + + + + + + + + + + + + +
FieldDescription
name [Required]
+string +
+

name is an AdmissionCheck's name.

+
onFlavors
+[]ResourceFlavorReference +
+

onFlavors is a list of ResourceFlavors' names that this AdmissionCheck should run for. +If empty, the AdmissionCheck will run for all workloads submitted to the ClusterQueue.

+
+ +## `AdmissionChecksStrategy` {#kueue-x-k8s-io-v1beta1-AdmissionChecksStrategy} + + +**Appears in:** + +- [ClusterQueueSpec](#kueue-x-k8s-io-v1beta1-ClusterQueueSpec) + + +

AdmissionCheckStrategy defines a strategy for a AdmissionCheck.

+ + + + + + + + + + + +
FieldDescription
admissionChecks [Required]
+[]AdmissionCheckStrategyRule +
+

admissionChecks is a list of strategies for AdmissionChecks

+
+ ## `BorrowWithinCohort` {#kueue-x-k8s-io-v1beta1-BorrowWithinCohort} @@ -748,7 +808,7 @@ reclaim its nominal quota. and there are admitted Workloads in the ClusterQueue with lower priority.

The preemption algorithm tries to find a minimal set of Workloads to -preempt to accommodate the pending Workload, preempting Workloads with +preempt to accomomdate the pending Workload, preempting Workloads with lower priority first.

@@ -756,7 +816,16 @@ lower priority first.

[]string -

admissionChecks lists the AdmissionChecks required by this ClusterQueue

+

admissionChecks lists the AdmissionChecks required by this ClusterQueue. +Cannot be used along with AdmissionCheckStrategy.

+ + +admissionChecksStrategy
+AdmissionChecksStrategy + + +

admissionCheckStrategy defines a list of strategies to determine which ResourceFlavors require AdmissionChecks. +This property cannot be used in conjunction with the 'admissionChecks' property.

stopPolicy
@@ -1448,6 +1517,8 @@ this time would be reset to null.

**Appears in:** +- [AdmissionCheckStrategyRule](#kueue-x-k8s-io-v1beta1-AdmissionCheckStrategyRule) + - [FlavorQuotas](#kueue-x-k8s-io-v1beta1-FlavorQuotas) - [FlavorUsage](#kueue-x-k8s-io-v1beta1-FlavorUsage) From a19e8b7163360c749508374dcfb4404703638fa1 Mon Sep 17 00:00:00 2001 From: Mykhailo Bobrovskyi Date: Tue, 23 Apr 2024 09:19:00 +0300 Subject: [PATCH 16/49] Allow run test-multikueue-e2e for mac os (#1971) * [multikueue] Fixed multikueue e2e tests for Mac OS * [multikueue] Fixed imports * [multikueue] Recreate the client to force disconnect * [multikueue] Recreate the client after connect * [metrics] Fixed timeout error. * [metrics] Moved cluster server replacements to multikueue-e2e-test.sh. * [metrics] Fixed multikueue-e2e-test.sh. * [metrics] Using WaitForKueueAvailability instead time.Sleep. * [metrics] Fixed timeout error. * [metrics] Fixed multikueue-e2e-test. * [metrics] Fixed imports. * [metrics] Fixed timeouts. * [multikueue] Fixed timeout error. * [multikueue] Added code explanation. * [metrics] Optimization. * [multikueue] Put DeleteCluster to Eventually. --- hack/multikueue-e2e-test.sh | 26 +++++--------- hack/multikueue/worker-cluster.kind.yaml | 2 -- test/e2e/multikueue/e2e_test.go | 43 ++++++++++++++++++++---- test/e2e/multikueue/suite_test.go | 10 ++++-- 4 files changed, 52 insertions(+), 29 deletions(-) diff --git a/hack/multikueue-e2e-test.sh b/hack/multikueue-e2e-test.sh index 7607bc8493..14470fb173 100755 --- a/hack/multikueue-e2e-test.sh +++ b/hack/multikueue-e2e-test.sh @@ -50,21 +50,9 @@ function startup { mkdir -p "$ARTIFACTS" fi - cluster_create "$MANAGER_KIND_CLUSTER_NAME" "$SOURCE_DIR"/multikueue/manager-cluster.kind.yaml - - # NOTE: for local setup, make sure that your firewall allows tcp from manager to the GW ip - # eg. ufw `sudo ufw allow from 172.18.0.0/16 proto tcp to 172.18.0.1` - # - # eg. iptables `sudo iptables --append INPUT --protocol tcp --src 172.18.0.0/16 --dst 172.18.0.1 --jump ACCEPT - # sudo iptables --append OUTPUT --protocol tcp --src 172.18.0.1 --dst 172.18.0./0/16 --jump ACCEPT` - - # have the worker forward the api to the docker gateway address instead of lo - export GW="$(docker inspect "${MANAGER_KIND_CLUSTER_NAME}"-control-plane -f '{{.NetworkSettings.Networks.kind.Gateway}}')" - $YQ e '.networking.apiServerAddress=env(GW)' "$SOURCE_DIR/multikueue/worker-cluster.kind.yaml" > "$ARTIFACTS"/worker-cluster.yaml - - cluster_create $WORKER1_KIND_CLUSTER_NAME $ARTIFACTS/worker-cluster.yaml - cluster_create $WORKER2_KIND_CLUSTER_NAME $ARTIFACTS/worker-cluster.yaml - + cluster_create "$MANAGER_KIND_CLUSTER_NAME" "$SOURCE_DIR/multikueue/manager-cluster.kind.yaml" + cluster_create $WORKER1_KIND_CLUSTER_NAME "$SOURCE_DIR/multikueue/worker-cluster.kind.yaml" + cluster_create $WORKER2_KIND_CLUSTER_NAME "$SOURCE_DIR/multikueue/worker-cluster.kind.yaml" fi } @@ -96,19 +84,21 @@ function kueue_deploy { function prepare_secrets { kubectl config use-context kind-${WORKER1_KIND_CLUSTER_NAME} source ${SOURCE_DIR}/create-multikueue-kubeconfig.sh ${ARTIFACTS}/worker1.kubeconfig + $YQ e ".clusters[0].cluster.server = \"https://${WORKER1_KIND_CLUSTER_NAME}-control-plane:6443\"" ${ARTIFACTS}/worker1.kubeconfig > ${ARTIFACTS}/worker1.kubeconfig.internal kubectl config use-context kind-${WORKER2_KIND_CLUSTER_NAME} source ${SOURCE_DIR}/create-multikueue-kubeconfig.sh ${ARTIFACTS}/worker2.kubeconfig + $YQ e ".clusters[0].cluster.server = \"https://${WORKER2_KIND_CLUSTER_NAME}-control-plane:6443\"" ${ARTIFACTS}/worker2.kubeconfig > ${ARTIFACTS}/worker2.kubeconfig.internal kubectl config use-context kind-${MANAGER_KIND_CLUSTER_NAME} - kubectl create secret generic multikueue1 -n kueue-system --from-file=kubeconfig=${ARTIFACTS}/worker1.kubeconfig - kubectl create secret generic multikueue2 -n kueue-system --from-file=kubeconfig=${ARTIFACTS}/worker2.kubeconfig + kubectl create secret generic multikueue1 -n kueue-system --from-file=kubeconfig=${ARTIFACTS}/worker1.kubeconfig.internal + kubectl create secret generic multikueue2 -n kueue-system --from-file=kubeconfig=${ARTIFACTS}/worker2.kubeconfig.internal } trap cleanup EXIT startup kind_load -kueue_deploy +kueue_deploy prepare_secrets $GINKGO $GINKGO_ARGS --junit-report=junit.xml --output-dir=$ARTIFACTS -v ./test/e2e/multikueue/... diff --git a/hack/multikueue/worker-cluster.kind.yaml b/hack/multikueue/worker-cluster.kind.yaml index 7d613955fa..c7892087f9 100644 --- a/hack/multikueue/worker-cluster.kind.yaml +++ b/hack/multikueue/worker-cluster.kind.yaml @@ -1,7 +1,5 @@ kind: Cluster apiVersion: kind.x-k8s.io/v1alpha4 -networking: - apiServerAddress: "FILLED_AT_RUNTIME" nodes: - role: control-plane kubeadmConfigPatches: diff --git a/test/e2e/multikueue/e2e_test.go b/test/e2e/multikueue/e2e_test.go index 397192e3f2..c2a9c1c675 100644 --- a/test/e2e/multikueue/e2e_test.go +++ b/test/e2e/multikueue/e2e_test.go @@ -17,6 +17,7 @@ limitations under the License. package mke2e import ( + "fmt" "os/exec" "github.com/google/go-cmp/cmp/cmpopts" @@ -345,15 +346,32 @@ var _ = ginkgo.Describe("MultiKueue", func() { }) ginkgo.When("The connection to a worker cluster is unreliable", func() { ginkgo.It("Should update the cluster status to reflect the connection state", func() { + worker1Cq2 := utiltesting.MakeClusterQueue("q2"). + ResourceGroup( + *utiltesting.MakeFlavorQuotas(worker1Flavor.Name). + Resource(corev1.ResourceCPU, "2"). + Resource(corev1.ResourceMemory, "1G"). + Obj(), + ). + Obj() + gomega.Expect(k8sWorker1Client.Create(ctx, worker1Cq2)).Should(gomega.Succeed()) + + worker1Container := fmt.Sprintf("%s-control-plane", worker1ClusterName) + worker1ClusterKey := client.ObjectKeyFromObject(workerCluster1) + ginkgo.By("Disconnecting worker1 container from the kind network", func() { - cmd := exec.Command("docker", "network", "disconnect", "kind", "kind-worker1-control-plane") + cmd := exec.Command("docker", "network", "disconnect", "kind", worker1Container) output, err := cmd.CombinedOutput() gomega.Expect(err).NotTo(gomega.HaveOccurred(), "%s: %s", err, output) - }) - worker1ClusterKey := client.ObjectKeyFromObject(workerCluster1) + podList := &corev1.PodList{} + podListOptions := client.InNamespace("kueue-system") + gomega.Eventually(func(g gomega.Gomega) error { + return k8sWorker1Client.List(ctx, podList, podListOptions) + }, util.LongTimeout, util.Interval).ShouldNot(gomega.Succeed()) + }) - ginkgo.By("Waiting for the cluster do become inactive", func() { + ginkgo.By("Waiting for the cluster to become inactive", func() { readClient := &kueuealpha.MultiKueueCluster{} gomega.Eventually(func(g gomega.Gomega) { g.Expect(k8sManagerClient.Get(ctx, worker1ClusterKey, readClient)).To(gomega.Succeed()) @@ -364,13 +382,26 @@ var _ = ginkgo.Describe("MultiKueue", func() { Reason: "ClientConnectionFailed", }, util.IgnoreConditionTimestampsAndObservedGeneration, util.IgnoreConditionMessage))) - }, util.Timeout, util.Interval).Should(gomega.Succeed()) + }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) }) ginkgo.By("Reconnecting worker1 container to the kind network", func() { - cmd := exec.Command("docker", "network", "connect", "kind", "kind-worker1-control-plane") + cmd := exec.Command("docker", "network", "connect", "kind", worker1Container) output, err := cmd.CombinedOutput() gomega.Expect(err).NotTo(gomega.HaveOccurred(), "%s: %s", err, output) + gomega.Eventually(func() error { + return util.DeleteClusterQueue(ctx, k8sWorker1Client, worker1Cq2) + }, util.LongTimeout, util.Interval).ShouldNot(gomega.HaveOccurred()) + + // After reconnecting the container to the network, when we try to get pods, + // we get it with the previous values (as before disconnect). Therefore, it + // takes some time for the cluster to restore them, and we got actually values. + // To be sure that the leader of kueue-control-manager successfully recovered + // we can check it by removing already created Cluster Queue. + var cq kueue.ClusterQueue + gomega.Eventually(func() error { + return k8sWorker1Client.Get(ctx, client.ObjectKeyFromObject(worker1Cq2), &cq) + }, util.LongTimeout, util.Interval).Should(utiltesting.BeNotFoundError()) }) ginkgo.By("Waiting for the cluster do become active", func() { diff --git a/test/e2e/multikueue/suite_test.go b/test/e2e/multikueue/suite_test.go index 4356089313..14b28841df 100644 --- a/test/e2e/multikueue/suite_test.go +++ b/test/e2e/multikueue/suite_test.go @@ -31,6 +31,10 @@ import ( ) var ( + managerClusterName string + worker1ClusterName string + worker2ClusterName string + k8sManagerClient client.Client k8sWorker1Client client.Client k8sWorker2Client client.Client @@ -49,13 +53,13 @@ func TestAPIs(t *testing.T) { } var _ = ginkgo.BeforeSuite(func() { - managerClusterName := os.Getenv("MANAGER_KIND_CLUSTER_NAME") + managerClusterName = os.Getenv("MANAGER_KIND_CLUSTER_NAME") gomega.Expect(managerClusterName).NotTo(gomega.BeEmpty(), "MANAGER_KIND_CLUSTER_NAME should not be empty") - worker1ClusterName := os.Getenv("WORKER1_KIND_CLUSTER_NAME") + worker1ClusterName = os.Getenv("WORKER1_KIND_CLUSTER_NAME") gomega.Expect(worker1ClusterName).NotTo(gomega.BeEmpty(), "WORKER1_KIND_CLUSTER_NAME should not be empty") - worker2ClusterName := os.Getenv("WORKER2_KIND_CLUSTER_NAME") + worker2ClusterName = os.Getenv("WORKER2_KIND_CLUSTER_NAME") gomega.Expect(worker2ClusterName).NotTo(gomega.BeEmpty(), "WORKER2_KIND_CLUSTER_NAME should not be empty") k8sManagerClient = util.CreateClientUsingCluster("kind-" + managerClusterName) From 92baacd06e54f57de85a15590a1780eb84455941 Mon Sep 17 00:00:00 2001 From: Mykhailo Bobrovskyi Date: Tue, 23 Apr 2024 09:48:55 +0300 Subject: [PATCH 17/49] [metrics] Add quota_reserved_wait_time_seconds (#1977) * [metrics] Created QuotaReservedWorkloadsTotal and quotaReservedWaitTime metrics. * [metrics] Added integration tests. * [metrics] Fixed imports. * [metrics] Added new metric doc. * [metrics] Added new metric doc. * [metrics] Added new metric doc. * [metrics] Revert new empty lines. * [metrics] Revert formatting changes. * [metrics] Added "Should admit workloads with admission checks" test. * [metrics] Added ExpectAdmittedWorkloadsTotalMetric on rejected workload test. * [metrics] Improving debuggability * [metrics] Renamed quota_reserved_wait_time_seconds to quota_reserved_to_admission_wait_time_seconds. * [metrics] Added buckets for quotaReservedWaitTime and admissionWaitTime. * [metrics] Added generateExponentialBuckets test. * [metrics] Added WorkloadRequeued condition. * [metrics] Change explanation for WorkloadRequeued. * [metrics] Remove extra argument (LastTransitionTime) on SetRequeuedCondition. * [metrics] Added QueuedWaitTime helper. * [metrics] Rename test doc. --- apis/kueue/v1beta1/workload_types.go | 3 + pkg/controller/core/workload_controller.go | 9 +- .../jobs/job/job_controller_test.go | 6 + .../jobs/pod/pod_controller_test.go | 7 ++ .../raycluster/raycluster_controller_test.go | 6 + pkg/metrics/metrics.go | 48 +++++++- pkg/metrics/metrics_test.go | 9 ++ pkg/scheduler/scheduler.go | 15 ++- pkg/workload/workload.go | 37 +++++- site/content/en/docs/reference/metrics.md | 5 +- .../core/workload_controller_test.go | 49 +++++++- .../scheduler/podsready/scheduler_test.go | 5 + test/integration/scheduler/scheduler_test.go | 112 ++++++++++++++++-- test/util/util.go | 19 ++- 14 files changed, 296 insertions(+), 34 deletions(-) diff --git a/apis/kueue/v1beta1/workload_types.go b/apis/kueue/v1beta1/workload_types.go index 698787d8c1..fd96a53704 100644 --- a/apis/kueue/v1beta1/workload_types.go +++ b/apis/kueue/v1beta1/workload_types.go @@ -302,6 +302,9 @@ const ( // more detailed information. The more detailed reasons should be prefixed // by one of the "base" reasons. WorkloadPreempted = "Preempted" + + // WorkloadRequeued means that the Workload was requeued due to eviction. + WorkloadRequeued = "Requeued" ) const ( diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go index 47dbfa62e2..19f97cb456 100644 --- a/pkg/controller/core/workload_controller.go +++ b/pkg/controller/core/workload_controller.go @@ -51,6 +51,7 @@ import ( "sigs.k8s.io/kueue/pkg/cache" "sigs.k8s.io/kueue/pkg/constants" "sigs.k8s.io/kueue/pkg/controller/core/indexer" + "sigs.k8s.io/kueue/pkg/metrics" "sigs.k8s.io/kueue/pkg/queue" utilac "sigs.k8s.io/kueue/pkg/util/admissioncheck" utilslices "sigs.k8s.io/kueue/pkg/util/slices" @@ -192,8 +193,12 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c return ctrl.Result{}, err } if workload.IsAdmitted(&wl) { - c := apimeta.FindStatusCondition(wl.Status.Conditions, kueue.WorkloadQuotaReserved) - r.recorder.Eventf(&wl, corev1.EventTypeNormal, "Admitted", "Admitted by ClusterQueue %v, wait time since reservation was %.0fs", wl.Status.Admission.ClusterQueue, time.Since(c.LastTransitionTime.Time).Seconds()) + queuedWaitTime := workload.QueuedWaitTime(&wl) + quotaReservedCondition := apimeta.FindStatusCondition(wl.Status.Conditions, kueue.WorkloadQuotaReserved) + quotaReservedWaitTime := time.Since(quotaReservedCondition.LastTransitionTime.Time) + r.recorder.Eventf(&wl, corev1.EventTypeNormal, "Admitted", "Admitted by ClusterQueue %v, wait time since reservation was %.0fs", wl.Status.Admission.ClusterQueue, quotaReservedWaitTime.Seconds()) + metrics.AdmittedWorkload(kueue.ClusterQueueReference(cqName), queuedWaitTime) + metrics.AdmissionChecksWaitTime(kueue.ClusterQueueReference(cqName), quotaReservedWaitTime) } return ctrl.Result{}, nil } diff --git a/pkg/controller/jobs/job/job_controller_test.go b/pkg/controller/jobs/job/job_controller_test.go index 52414d2b85..a6eee4360e 100644 --- a/pkg/controller/jobs/job/job_controller_test.go +++ b/pkg/controller/jobs/job/job_controller_test.go @@ -633,6 +633,12 @@ func TestReconciler(t *testing.T) { Reason: "Pending", Message: "The workload is deactivated", }). + Condition(metav1.Condition{ + Type: kueue.WorkloadRequeued, + Status: metav1.ConditionTrue, + Reason: "Pending", + Message: "The workload is deactivated", + }). Condition(metav1.Condition{ Type: kueue.WorkloadEvicted, Status: metav1.ConditionTrue, diff --git a/pkg/controller/jobs/pod/pod_controller_test.go b/pkg/controller/jobs/pod/pod_controller_test.go index 89d3891d03..ea08fd4174 100644 --- a/pkg/controller/jobs/pod/pod_controller_test.go +++ b/pkg/controller/jobs/pod/pod_controller_test.go @@ -1651,6 +1651,13 @@ func TestReconciler(t *testing.T) { Reason: "Pending", Message: "Preempted to accommodate a higher priority Workload", }). + SetOrReplaceCondition(metav1.Condition{ + Type: kueue.WorkloadRequeued, + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Now(), + Reason: "Pending", + Message: "Preempted to accommodate a higher priority Workload", + }). Obj(), }, workloadCmpOpts: defaultWorkloadCmpOpts, diff --git a/pkg/controller/jobs/raycluster/raycluster_controller_test.go b/pkg/controller/jobs/raycluster/raycluster_controller_test.go index 0fec379850..cfb180779f 100644 --- a/pkg/controller/jobs/raycluster/raycluster_controller_test.go +++ b/pkg/controller/jobs/raycluster/raycluster_controller_test.go @@ -325,6 +325,12 @@ func TestReconciler(t *testing.T) { Message: "The workload has no reservation", ObservedGeneration: 1, }). + Condition(metav1.Condition{ + Type: kueue.WorkloadRequeued, + Status: metav1.ConditionTrue, + Reason: "Pending", + ObservedGeneration: 1, + }). Obj(), }, }, diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 74ded34f31..015e476eb0 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -91,6 +91,23 @@ The label 'result' can have the following values: }, []string{"cluster_queue", "status"}, ) + QuotaReservedWorkloadsTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: constants.KueueName, + Name: "quota_reserved_workloads_total", + Help: "The total number of quota reserved workloads per 'cluster_queue'", + }, []string{"cluster_queue"}, + ) + + quotaReservedWaitTime = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Subsystem: constants.KueueName, + Name: "quota_reserved_wait_time_seconds", + Help: "The time between a workload was created or requeued until it got quota reservation, per 'cluster_queue'", + Buckets: generateExponentialBuckets(14), + }, []string{"cluster_queue"}, + ) + AdmittedWorkloadsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: constants.KueueName, @@ -103,7 +120,17 @@ The label 'result' can have the following values: prometheus.HistogramOpts{ Subsystem: constants.KueueName, Name: "admission_wait_time_seconds", - Help: "The time between a Workload was created until it was admitted, per 'cluster_queue'", + Help: "The time between a workload was created or requeued until admission, per 'cluster_queue'", + Buckets: generateExponentialBuckets(14), + }, []string{"cluster_queue"}, + ) + + admissionChecksWaitTime = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Subsystem: constants.KueueName, + Name: "admission_checks_wait_time_seconds", + Help: "The time from when a workload got the quota reservation until admission, per 'cluster_queue'", + Buckets: generateExponentialBuckets(14), }, []string{"cluster_queue"}, ) @@ -176,16 +203,29 @@ For a ClusterQueue, the metric only reports a value of 1 for one of the statuses ) ) +func generateExponentialBuckets(count int) []float64 { + return append([]float64{1}, prometheus.ExponentialBuckets(2.5, 2, count-1)...) +} + func AdmissionAttempt(result AdmissionResult, duration time.Duration) { AdmissionAttemptsTotal.WithLabelValues(string(result)).Inc() admissionAttemptDuration.WithLabelValues(string(result)).Observe(duration.Seconds()) } +func QuotaReservedWorkload(cqName kueue.ClusterQueueReference, waitTime time.Duration) { + QuotaReservedWorkloadsTotal.WithLabelValues(string(cqName)).Inc() + quotaReservedWaitTime.WithLabelValues(string(cqName)).Observe(waitTime.Seconds()) +} + func AdmittedWorkload(cqName kueue.ClusterQueueReference, waitTime time.Duration) { AdmittedWorkloadsTotal.WithLabelValues(string(cqName)).Inc() admissionWaitTime.WithLabelValues(string(cqName)).Observe(waitTime.Seconds()) } +func AdmissionChecksWaitTime(cqName kueue.ClusterQueueReference, waitTime time.Duration) { + admissionChecksWaitTime.WithLabelValues(string(cqName)).Observe(waitTime.Seconds()) +} + func ReportPendingWorkloads(cqName string, active, inadmissible int) { PendingWorkloads.WithLabelValues(cqName, PendingStatusActive).Set(float64(active)) PendingWorkloads.WithLabelValues(cqName, PendingStatusInadmissible).Set(float64(inadmissible)) @@ -194,8 +234,11 @@ func ReportPendingWorkloads(cqName string, active, inadmissible int) { func ClearQueueSystemMetrics(cqName string) { PendingWorkloads.DeleteLabelValues(cqName, PendingStatusActive) PendingWorkloads.DeleteLabelValues(cqName, PendingStatusInadmissible) + QuotaReservedWorkloadsTotal.DeleteLabelValues(cqName) + quotaReservedWaitTime.DeleteLabelValues(cqName) AdmittedWorkloadsTotal.DeleteLabelValues(cqName) admissionWaitTime.DeleteLabelValues(cqName) + admissionChecksWaitTime.DeleteLabelValues(cqName) } func ReportClusterQueueStatus(cqName string, cqStatus ClusterQueueStatus) { @@ -295,8 +338,11 @@ func Register() { PendingWorkloads, ReservingActiveWorkloads, AdmittedActiveWorkloads, + QuotaReservedWorkloadsTotal, + quotaReservedWaitTime, AdmittedWorkloadsTotal, admissionWaitTime, + admissionChecksWaitTime, ClusterQueueResourceUsage, ClusterQueueResourceReservations, ClusterQueueResourceNominalQuota, diff --git a/pkg/metrics/metrics_test.go b/pkg/metrics/metrics_test.go index 4cdc4dd001..d133f1c79b 100644 --- a/pkg/metrics/metrics_test.go +++ b/pkg/metrics/metrics_test.go @@ -19,6 +19,7 @@ package metrics import ( "testing" + "github.com/google/go-cmp/cmp" "github.com/prometheus/client_golang/prometheus" "sigs.k8s.io/kueue/pkg/features" @@ -37,6 +38,14 @@ func expectFilteredMetricsCount(t *testing.T, vec *prometheus.GaugeVec, count in } } +func TestGenerateExponentialBuckets(t *testing.T) { + expect := []float64{1, 2.5, 5, 10, 20, 40, 80, 160, 320, 640, 1280, 2560, 5120, 10240} + result := generateExponentialBuckets(14) + if diff := cmp.Diff(result, expect); len(diff) != 0 { + t.Errorf("Unexpected buckets (-want,+got):\n%s", diff) + } +} + func TestReportAndCleanupClusterQueueMetrics(t *testing.T) { defer features.SetFeatureGateDuringTest(t, features.LendingLimit, true)() ReportClusterQueueQuotas("cohort", "queue", "flavor", "res", 5, 10, 3) diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 65c87b6408..4665047079 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -27,7 +27,6 @@ import ( "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" - apimeta "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" @@ -528,16 +527,16 @@ func (s *Scheduler) admit(ctx context.Context, e *entry, cq *cache.ClusterQueue) s.admissionRoutineWrapper.Run(func() { err := s.applyAdmission(ctx, newWorkload) if err == nil { - waitStarted := e.Obj.CreationTimestamp.Time - if c := apimeta.FindStatusCondition(e.Obj.Status.Conditions, kueue.WorkloadEvicted); c != nil { - waitStarted = c.LastTransitionTime.Time - } - waitTime := time.Since(waitStarted) + waitTime := workload.QueuedWaitTime(newWorkload) s.recorder.Eventf(newWorkload, corev1.EventTypeNormal, "QuotaReserved", "Quota reserved in ClusterQueue %v, wait time since queued was %.0fs", admission.ClusterQueue, waitTime.Seconds()) + metrics.QuotaReservedWorkload(admission.ClusterQueue, waitTime) if workload.IsAdmitted(newWorkload) { - s.recorder.Eventf(newWorkload, corev1.EventTypeNormal, "Admitted", "Admitted by ClusterQueue %v, wait time since reservation was 0s ", admission.ClusterQueue) + s.recorder.Eventf(newWorkload, corev1.EventTypeNormal, "Admitted", "Admitted by ClusterQueue %v, wait time since reservation was 0s", admission.ClusterQueue) + metrics.AdmittedWorkload(admission.ClusterQueue, waitTime) + if len(newWorkload.Status.AdmissionChecks) > 0 { + metrics.AdmissionChecksWaitTime(admission.ClusterQueue, 0) + } } - metrics.AdmittedWorkload(admission.ClusterQueue, waitTime) log.V(2).Info("Workload successfully admitted and assigned flavors", "assignments", admission.PodSetAssignments) return } diff --git a/pkg/workload/workload.go b/pkg/workload/workload.go index de377d9d28..d72f8a0fb9 100644 --- a/pkg/workload/workload.go +++ b/pkg/workload/workload.go @@ -21,6 +21,7 @@ import ( "fmt" "maps" "strings" + "time" "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" @@ -43,7 +44,13 @@ import ( ) var ( - admissionManagedConditions = []string{kueue.WorkloadQuotaReserved, kueue.WorkloadEvicted, kueue.WorkloadAdmitted, kueue.WorkloadPreempted} + admissionManagedConditions = []string{ + kueue.WorkloadQuotaReserved, + kueue.WorkloadEvicted, + kueue.WorkloadAdmitted, + kueue.WorkloadPreempted, + kueue.WorkloadRequeued, + } ) type AssignmentClusterQueueState struct { @@ -340,10 +347,14 @@ func UpdateStatus(ctx context.Context, return c.Status().Patch(ctx, newWl, client.Apply, client.FieldOwner(managerPrefix+"-"+condition.Type)) } -// UnsetQuotaReservationWithCondition sets the QuotaReserved condition to false and clears -// the admission. +// UnsetQuotaReservationWithCondition sets the QuotaReserved condition to false, clears +// the admission and set the WorkloadRequeued status. // Returns whether any change was done. func UnsetQuotaReservationWithCondition(wl *kueue.Workload, reason, message string) bool { + if HasQuotaReservation(wl) { + SetRequeuedCondition(wl, reason, message) + } + condition := metav1.Condition{ Type: kueue.WorkloadQuotaReserved, Status: metav1.ConditionFalse, @@ -365,6 +376,26 @@ func UnsetQuotaReservationWithCondition(wl *kueue.Workload, reason, message stri return changed } +// SetRequeuedCondition sets the WorkloadRequeued condition to true +func SetRequeuedCondition(wl *kueue.Workload, reason string, message string) { + condition := metav1.Condition{ + Type: kueue.WorkloadRequeued, + Status: metav1.ConditionTrue, + Reason: reason, + Message: api.TruncateConditionMessage(message), + ObservedGeneration: wl.Generation, + } + apimeta.SetStatusCondition(&wl.Status.Conditions, condition) +} + +func QueuedWaitTime(wl *kueue.Workload) time.Duration { + queuedTime := wl.CreationTimestamp.Time + if c := apimeta.FindStatusCondition(wl.Status.Conditions, kueue.WorkloadRequeued); c != nil { + queuedTime = c.LastTransitionTime.Time + } + return time.Since(queuedTime) +} + // BaseSSAWorkload creates a new object based on the input workload that // only contains the fields necessary to identify the original object. // The object can be used in as a base for Server-Side-Apply. diff --git a/site/content/en/docs/reference/metrics.md b/site/content/en/docs/reference/metrics.md index 4412eb1125..c35b76e251 100644 --- a/site/content/en/docs/reference/metrics.md +++ b/site/content/en/docs/reference/metrics.md @@ -25,8 +25,11 @@ Use the following metrics to monitor the status of your ClusterQueues: | Metric name | Type | Description | Labels | | ----------- | ---- | ----------- | ------ | | `kueue_pending_workloads` | Gauge | The number of pending workloads. | `cluster_queue`: the name of the ClusterQueue
`status`: possible values are `active` or `inadmissible` | +| `kueue_quota_reserved_workloads_total` | Counter | The total number of quota reserved workloads. | `cluster_queue`: the name of the ClusterQueue | +| `kueue_quota_reserved_wait_time_seconds` | Histogram | The time between a workload was created or requeued until it got quota reservation. | `cluster_queue`: the name of the ClusterQueue | | `kueue_admitted_workloads_total` | Counter | The total number of admitted workloads. | `cluster_queue`: the name of the ClusterQueue | -| `kueue_admission_wait_time_seconds` | Histogram | The time between a Workload was created until it was admitted. | `cluster_queue`: the name of the ClusterQueue | +| `kueue_admission_wait_time_seconds` | Histogram | The time between a workload was created or requeued until admission. | `cluster_queue`: the name of the ClusterQueue | +| `kueue_admission_checks_wait_time_seconds` | Histogram | The time from when a workload got the quota reservation until admission. | `cluster_queue`: the name of the ClusterQueue | | `kueue_admitted_active_workloads` | Gauge | The number of admitted Workloads that are active (unsuspended and not finished) | `cluster_queue`: the name of the ClusterQueue | | `kueue_cluster_queue_status` | Gauge | Reports the status of the ClusterQueue | `cluster_queue`: The name of the ClusterQueue
`status`: Possible values are `pending`, `active` or `terminated`. For a ClusterQueue, the metric only reports a value of 1 for one of the statuses. | diff --git a/test/integration/controller/core/workload_controller_test.go b/test/integration/controller/core/workload_controller_test.go index fdd34cf610..e8cdf361b1 100644 --- a/test/integration/controller/core/workload_controller_test.go +++ b/test/integration/controller/core/workload_controller_test.go @@ -86,7 +86,14 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), &updatedQueueWorkload)).To(gomega.Succeed()) return len(updatedQueueWorkload.Status.Conditions) }, util.Timeout, util.Interval).Should(gomega.BeComparableTo(1)) - gomega.Expect(updatedQueueWorkload.Status.Conditions[0].Message).To(gomega.BeComparableTo(message)) + gomega.Expect(updatedQueueWorkload.Status.Conditions[0]).To( + gomega.BeComparableTo(metav1.Condition{ + Type: kueue.WorkloadQuotaReserved, + Status: metav1.ConditionFalse, + Reason: "Inadmissible", + Message: message, + }, util.IgnoreConditionTimestampsAndObservedGeneration), + ) }) }) @@ -102,7 +109,14 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), &updatedQueueWorkload)).To(gomega.Succeed()) return len(updatedQueueWorkload.Status.Conditions) }, util.Timeout, util.Interval).Should(gomega.BeComparableTo(1)) - gomega.Expect(updatedQueueWorkload.Status.Conditions[0].Message).To(gomega.BeComparableTo(message)) + gomega.Expect(updatedQueueWorkload.Status.Conditions[0]).To( + gomega.BeComparableTo(metav1.Condition{ + Type: kueue.WorkloadQuotaReserved, + Status: metav1.ConditionFalse, + Reason: "Inadmissible", + Message: message, + }, util.IgnoreConditionTimestampsAndObservedGeneration), + ) }) }) @@ -122,7 +136,14 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), &updatedQueueWorkload)).To(gomega.Succeed()) return updatedQueueWorkload.Status.Conditions }, util.Timeout, util.Interval).ShouldNot(gomega.BeNil()) - gomega.Expect(updatedQueueWorkload.Status.Conditions[0].Message).To(gomega.BeComparableTo(message)) + gomega.Expect(updatedQueueWorkload.Status.Conditions[0]).To( + gomega.BeComparableTo(metav1.Condition{ + Type: kueue.WorkloadQuotaReserved, + Status: metav1.ConditionFalse, + Reason: "Inadmissible", + Message: message, + }, util.IgnoreConditionTimestampsAndObservedGeneration), + ) }) }) @@ -273,6 +294,8 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn Reason: "AdmissionChecksRejected", Message: "Admission checks [check1] are rejected", }, util.IgnoreConditionTimestampsAndObservedGeneration)) + + util.ExpectAdmittedWorkloadsTotalMetric(clusterQueue, 0) }) }) @@ -319,6 +342,8 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn Message: "The workload is admitted", }, util.IgnoreConditionTimestampsAndObservedGeneration))) }, util.Timeout, util.Interval).Should(gomega.Succeed()) + + util.ExpectAdmittedWorkloadsTotalMetric(clusterQueue, 1) }) ginkgo.By("setting a rejected check conditions the workload should be evicted and admitted condition kept", func() { @@ -347,6 +372,12 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn Reason: "Admitted", Message: "The workload is admitted", }, util.IgnoreConditionTimestampsAndObservedGeneration), + gomega.BeComparableTo(metav1.Condition{ + Type: kueue.WorkloadQuotaReserved, + Status: metav1.ConditionTrue, + Reason: "QuotaReserved", + Message: "Quota reserved in ClusterQueue cluster-queue", + }, util.IgnoreConditionTimestampsAndObservedGeneration), )) }, util.Timeout, util.Interval).Should(gomega.Succeed()) }) @@ -368,6 +399,18 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn Reason: "NoReservationNoChecks", Message: "The workload has no reservation and not all checks ready", }, util.IgnoreConditionTimestampsAndObservedGeneration), + gomega.BeComparableTo(metav1.Condition{ + Type: kueue.WorkloadQuotaReserved, + Status: metav1.ConditionFalse, + Reason: "Pending", + Message: "By test", + }, util.IgnoreConditionTimestampsAndObservedGeneration), + gomega.BeComparableTo(metav1.Condition{ + Type: kueue.WorkloadRequeued, + Status: metav1.ConditionTrue, + Reason: "Pending", + Message: "By test", + }, util.IgnoreConditionTimestampsAndObservedGeneration), )) }, util.Timeout, util.Interval).Should(gomega.Succeed()) }) diff --git a/test/integration/scheduler/podsready/scheduler_test.go b/test/integration/scheduler/podsready/scheduler_test.go index 4216b2f3de..d3d3010c76 100644 --- a/test/integration/scheduler/podsready/scheduler_test.go +++ b/test/integration/scheduler/podsready/scheduler_test.go @@ -254,6 +254,7 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReady", func() { gomega.Expect(k8sClient.Create(ctx, prodWl)).Should(gomega.Succeed()) ginkgo.By("checking the 'prod' workload is admitted") util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, prodWl) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 1) ginkgo.By("exceed the timeout for the 'prod' workload") time.Sleep(podsReadyTimeout) @@ -263,10 +264,12 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReady", func() { ginkgo.By("verify the 'prod' workload gets re-admitted twice") util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, prodWl) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 2) util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 2) time.Sleep(podsReadyTimeout) util.FinishEvictionForWorkloads(ctx, k8sClient, prodWl) util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, prodWl) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 3) util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 3) time.Sleep(podsReadyTimeout) ginkgo.By("evicted re-admitted workload should have 2 in the re-queue count") @@ -626,6 +629,7 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReadyNonblockingMode", func() { gomega.Expect(k8sClient.Create(ctx, prodWl)).Should(gomega.Succeed()) ginkgo.By("checking the 'prod' workload is admitted") util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, prodWl) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 1) ginkgo.By("exceed the timeout for the 'prod' workload") time.Sleep(podsReadyTimeout) @@ -634,6 +638,7 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReadyNonblockingMode", func() { ginkgo.By("verify the 'prod' workload gets re-admitted once") util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, prodWl) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 2) util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 2) time.Sleep(podsReadyTimeout) util.ExpectWorkloadToHaveRequeueCount(ctx, k8sClient, client.ObjectKeyFromObject(prodWl), ptr.To[int32](2)) diff --git a/test/integration/scheduler/scheduler_test.go b/test/integration/scheduler/scheduler_test.go index e901643db8..631c3e7399 100644 --- a/test/integration/scheduler/scheduler_test.go +++ b/test/integration/scheduler/scheduler_test.go @@ -81,17 +81,21 @@ var _ = ginkgo.Describe("Scheduler", func() { ginkgo.When("Scheduling workloads on clusterQueues", func() { var ( - prodClusterQ *kueue.ClusterQueue - devClusterQ *kueue.ClusterQueue - podsCountClusterQ *kueue.ClusterQueue - podsCountOnlyClusterQ *kueue.ClusterQueue - preemptionClusterQ *kueue.ClusterQueue - prodQueue *kueue.LocalQueue - devQueue *kueue.LocalQueue - podsCountQueue *kueue.LocalQueue - podsCountOnlyQueue *kueue.LocalQueue - preemptionQueue *kueue.LocalQueue - cqsStopPolicy *kueue.StopPolicy + admissionCheck1 *kueue.AdmissionCheck + admissionCheck2 *kueue.AdmissionCheck + prodClusterQ *kueue.ClusterQueue + devClusterQ *kueue.ClusterQueue + podsCountClusterQ *kueue.ClusterQueue + podsCountOnlyClusterQ *kueue.ClusterQueue + preemptionClusterQ *kueue.ClusterQueue + admissionCheckClusterQ *kueue.ClusterQueue + prodQueue *kueue.LocalQueue + devQueue *kueue.LocalQueue + podsCountQueue *kueue.LocalQueue + podsCountOnlyQueue *kueue.LocalQueue + preemptionQueue *kueue.LocalQueue + admissionCheckQueue *kueue.LocalQueue + cqsStopPolicy *kueue.StopPolicy ) ginkgo.JustBeforeEach(func() { @@ -100,6 +104,14 @@ var _ = ginkgo.Describe("Scheduler", func() { gomega.Expect(k8sClient.Create(ctx, spotUntaintedFlavor)).To(gomega.Succeed()) cqsStopPolicy := ptr.Deref(cqsStopPolicy, kueue.None) + admissionCheck1 = testing.MakeAdmissionCheck("check1").ControllerName("ctrl").Obj() + gomega.Expect(k8sClient.Create(ctx, admissionCheck1)).Should(gomega.Succeed()) + util.SetAdmissionCheckActive(ctx, k8sClient, admissionCheck1, metav1.ConditionTrue) + + admissionCheck2 = testing.MakeAdmissionCheck("check2").ControllerName("ctrl").Obj() + gomega.Expect(k8sClient.Create(ctx, admissionCheck2)).Should(gomega.Succeed()) + util.SetAdmissionCheckActive(ctx, k8sClient, admissionCheck2, metav1.ConditionTrue) + prodClusterQ = testing.MakeClusterQueue("prod-cq"). ResourceGroup( *testing.MakeFlavorQuotas("spot-tainted").Resource(corev1.ResourceCPU, "5", "5").Obj(), @@ -151,6 +163,15 @@ var _ = ginkgo.Describe("Scheduler", func() { Obj() gomega.Expect(k8sClient.Create(ctx, preemptionClusterQ)).Should(gomega.Succeed()) + admissionCheckClusterQ = testing.MakeClusterQueue("admission-check-cq"). + ResourceGroup( + *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(), + ). + AdmissionChecks("check1", "check2"). + StopPolicy(cqsStopPolicy). + Obj() + gomega.Expect(k8sClient.Create(ctx, admissionCheckClusterQ)).Should(gomega.Succeed()) + prodQueue = testing.MakeLocalQueue("prod-queue", ns.Name).ClusterQueue(prodClusterQ.Name).Obj() gomega.Expect(k8sClient.Create(ctx, prodQueue)).Should(gomega.Succeed()) @@ -165,6 +186,9 @@ var _ = ginkgo.Describe("Scheduler", func() { preemptionQueue = testing.MakeLocalQueue("preemption-queue", ns.Name).ClusterQueue(preemptionClusterQ.Name).Obj() gomega.Expect(k8sClient.Create(ctx, preemptionQueue)).Should(gomega.Succeed()) + + admissionCheckQueue = testing.MakeLocalQueue("admission-check-queue", ns.Name).ClusterQueue(admissionCheckClusterQ.Name).Obj() + gomega.Expect(k8sClient.Create(ctx, admissionCheckQueue)).Should(gomega.Succeed()) }) ginkgo.JustAfterEach(func() { @@ -174,6 +198,9 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, podsCountClusterQ, true) util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, podsCountOnlyClusterQ, true) util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, preemptionClusterQ, true) + util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, admissionCheckClusterQ, true) + util.ExpectAdmissionCheckToBeDeleted(ctx, k8sClient, admissionCheck2, true) + util.ExpectAdmissionCheckToBeDeleted(ctx, k8sClient, admissionCheck1, true) util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true) util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotTaintedFlavor, true) util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotUntaintedFlavor, true) @@ -187,6 +214,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, prodWl1, prodWl1Admission) util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 1) ginkgo.By("checking a second no-fit workload does not get admitted") @@ -202,6 +230,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, devWl, spotUntaintedFlavorAdmission) util.ExpectPendingWorkloadsMetric(devClusterQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(devClusterQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(devClusterQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(devClusterQ, 1) ginkgo.By("checking the second workload gets admitted when the first workload finishes") @@ -210,6 +239,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, prodWl2, prodWl2Admission) util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 2) util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 2) }) @@ -231,6 +261,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, wl1Admission) util.ExpectPendingWorkloadsMetric(podsCountClusterQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(podsCountClusterQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(podsCountClusterQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(podsCountClusterQ, 1) }) @@ -258,6 +289,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2) util.ExpectPendingWorkloadsMetric(podsCountClusterQ, 0, 1) util.ExpectReservingActiveWorkloadsMetric(podsCountClusterQ, 2) + util.ExpectQuotaReservedWorkloadsTotalMetric(podsCountClusterQ, 2) util.ExpectAdmittedWorkloadsTotalMetric(podsCountClusterQ, 2) }) @@ -269,6 +301,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, podsCountClusterQ.Name, wl2, wl3) util.ExpectPendingWorkloadsMetric(podsCountClusterQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(podsCountClusterQ, 2) + util.ExpectQuotaReservedWorkloadsTotalMetric(podsCountClusterQ, 3) util.ExpectAdmittedWorkloadsTotalMetric(podsCountClusterQ, 3) }) }) @@ -289,6 +322,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, wl1Admission) util.ExpectPendingWorkloadsMetric(podsCountOnlyClusterQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(podsCountOnlyClusterQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(podsCountOnlyClusterQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(podsCountOnlyClusterQ, 1) }) @@ -314,6 +348,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2) util.ExpectPendingWorkloadsMetric(podsCountOnlyClusterQ, 0, 1) util.ExpectReservingActiveWorkloadsMetric(podsCountOnlyClusterQ, 2) + util.ExpectQuotaReservedWorkloadsTotalMetric(podsCountOnlyClusterQ, 2) util.ExpectAdmittedWorkloadsTotalMetric(podsCountOnlyClusterQ, 2) }) @@ -325,6 +360,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, podsCountOnlyClusterQ.Name, wl2, wl3) util.ExpectPendingWorkloadsMetric(podsCountOnlyClusterQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(podsCountOnlyClusterQ, 2) + util.ExpectQuotaReservedWorkloadsTotalMetric(podsCountOnlyClusterQ, 3) util.ExpectAdmittedWorkloadsTotalMetric(podsCountOnlyClusterQ, 3) }) }) @@ -378,6 +414,22 @@ var _ = ginkgo.Describe("Scheduler", func() { }) }) + ginkgo.It("Should admit workloads with admission checks", func() { + wl1 := testing.MakeWorkload("admission-check-wl1", ns.Name). + Queue(admissionCheckQueue.Name). + Request(corev1.ResourceCPU, "2"). + Obj() + + ginkgo.By("checking the first workload gets created and gets quota reserved", func() { + gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) + util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, nil) + util.ExpectPendingWorkloadsMetric(admissionCheckClusterQ, 0, 0) + util.ExpectReservingActiveWorkloadsMetric(admissionCheckClusterQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(admissionCheckClusterQ, 1) + util.ExpectAdmittedWorkloadsTotalMetric(admissionCheckClusterQ, 0) + }) + }) + ginkgo.When("Hold at startup", func() { ginkgo.BeforeEach(func() { cqsStopPolicy = ptr.To(kueue.Hold) @@ -412,6 +464,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 3) util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 2) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 2) util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 2) ginkgo.By("after the high priority workloads finish, only the mid priority workloads should be admitted") @@ -420,6 +473,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, wlMid1, wlMid2) util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 1) util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 2) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 4) util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 4) }) }) @@ -432,6 +486,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, bigWl) util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 1) smallWl1 := testing.MakeWorkload("small-wl-1", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "2.5").Obj() @@ -450,6 +505,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, smallWl1, smallWl2) util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 2) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 3) util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 3) }) @@ -515,6 +571,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, expectWl1Admission) util.ExpectPendingWorkloadsMetric(cq, 0, 0) util.ExpectReservingActiveWorkloadsMetric(cq, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1) util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) ginkgo.By("Second big workload is pending") @@ -523,6 +580,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2) util.ExpectPendingWorkloadsMetric(cq, 0, 1) util.ExpectReservingActiveWorkloadsMetric(cq, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1) util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) ginkgo.By("Third small workload starts") @@ -532,6 +590,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl3, expectWl3Admission) util.ExpectPendingWorkloadsMetric(cq, 0, 1) util.ExpectReservingActiveWorkloadsMetric(cq, 2) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 2) util.ExpectAdmittedWorkloadsTotalMetric(cq, 2) ginkgo.By("Second big workload starts after the first one is deleted") @@ -540,6 +599,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl2, expectWl2Admission) util.ExpectPendingWorkloadsMetric(cq, 0, 0) util.ExpectReservingActiveWorkloadsMetric(cq, 2) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 3) util.ExpectAdmittedWorkloadsTotalMetric(cq, 3) }) @@ -563,6 +623,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, expectAdmission) util.ExpectPendingWorkloadsMetric(fooCQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(fooCQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(fooCQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(fooCQ, 1) ginkgo.By("Second big workload is pending") @@ -571,6 +632,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2) util.ExpectPendingWorkloadsMetric(cq, 0, 1) util.ExpectReservingActiveWorkloadsMetric(cq, 0) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 0) util.ExpectAdmittedWorkloadsTotalMetric(cq, 0) ginkgo.By("Third small workload starts") @@ -580,6 +642,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl3, expectAdmission) util.ExpectPendingWorkloadsMetric(fooCQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(fooCQ, 2) + util.ExpectQuotaReservedWorkloadsTotalMetric(fooCQ, 2) util.ExpectAdmittedWorkloadsTotalMetric(fooCQ, 2) ginkgo.By("Second big workload starts after the first one is deleted") @@ -588,6 +651,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl2, expectAdmission) util.ExpectPendingWorkloadsMetric(cq, 0, 0) util.ExpectReservingActiveWorkloadsMetric(cq, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1) util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) }) }) @@ -621,6 +685,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToBePending(ctx, k8sClient, wl) util.ExpectPendingWorkloadsMetric(cq, 0, 1) util.ExpectReservingActiveWorkloadsMetric(cq, 0) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 0) util.ExpectAdmittedWorkloadsTotalMetric(cq, 0) util.ExpectAdmissionAttemptsMetric(1, 0) @@ -645,6 +710,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl, expectAdmission) util.ExpectPendingWorkloadsMetric(cq, 0, 0) util.ExpectReservingActiveWorkloadsMetric(cq, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1) util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) util.ExpectAdmissionAttemptsMetric(1, 1) }) @@ -703,6 +769,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToBePending(ctx, k8sClient, wl1, wl2) util.ExpectPendingWorkloadsMetric(cq, 0, 2) util.ExpectReservingActiveWorkloadsMetric(cq, 0) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 0) util.ExpectAdmittedWorkloadsTotalMetric(cq, 0) ginkgo.By("checking the first workload gets admitted after updating the namespace labels to match CQ selector") @@ -710,6 +777,7 @@ var _ = ginkgo.Describe("Scheduler", func() { gomega.Expect(k8sClient.Update(ctx, ns)).Should(gomega.Succeed()) util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, cq.Name, wl1) util.ExpectReservingActiveWorkloadsMetric(cq, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1) util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) util.ExpectPendingWorkloadsMetric(cq, 0, 1) }) @@ -744,6 +812,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToBeFrozen(ctx, k8sClient, fooCQ.Name, wl) util.ExpectPendingWorkloadsMetric(fooCQ, 0, 1) util.ExpectReservingActiveWorkloadsMetric(fooCQ, 0) + util.ExpectQuotaReservedWorkloadsTotalMetric(fooCQ, 0) util.ExpectAdmittedWorkloadsTotalMetric(fooCQ, 0) ginkgo.By("Creating foo flavor") @@ -756,6 +825,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, fooCQ.Name, wl) util.ExpectPendingWorkloadsMetric(fooCQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(fooCQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(fooCQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(fooCQ, 1) }) }) @@ -800,6 +870,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, expectAdmission) util.ExpectPendingWorkloadsMetric(cq, 0, 0) util.ExpectReservingActiveWorkloadsMetric(cq, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1) util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) ginkgo.By("checking a second workload without toleration doesn't start") @@ -808,6 +879,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2) util.ExpectPendingWorkloadsMetric(cq, 0, 1) util.ExpectReservingActiveWorkloadsMetric(cq, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1) util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) ginkgo.By("checking a third workload with toleration starts") @@ -818,6 +890,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl3, expectAdmission) util.ExpectPendingWorkloadsMetric(cq, 0, 1) util.ExpectReservingActiveWorkloadsMetric(cq, 2) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 2) util.ExpectAdmittedWorkloadsTotalMetric(cq, 2) }) }) @@ -857,6 +930,7 @@ var _ = ginkgo.Describe("Scheduler", func() { expectAdmission := testing.MakeAdmission(cq.Name).Assignment(corev1.ResourceCPU, "spot-untainted", "1").Obj() util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, expectAdmission) util.ExpectReservingActiveWorkloadsMetric(cq, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1) util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) util.ExpectPendingWorkloadsMetric(cq, 0, 0) @@ -870,6 +944,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl2, expectAdmission) util.ExpectPendingWorkloadsMetric(cq, 0, 0) util.ExpectReservingActiveWorkloadsMetric(cq, 2) + util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 2) util.ExpectAdmittedWorkloadsTotalMetric(cq, 2) }) }) @@ -971,6 +1046,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectPendingWorkloadsMetric(prodCQ, 0, 1) util.ExpectReservingActiveWorkloadsMetric(prodCQ, 0) util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 0) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 0) ginkgo.By("checking the workload gets admitted when a fallback ClusterQueue gets added") fallbackClusterQueue := testing.MakeClusterQueue("fallback-cq"). @@ -988,6 +1064,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl, expectAdmission) util.ExpectPendingWorkloadsMetric(prodCQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1) }) @@ -1020,7 +1097,9 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectPendingWorkloadsMetric(devCQ, 0, 1) util.ExpectReservingActiveWorkloadsMetric(prodCQ, 0) util.ExpectReservingActiveWorkloadsMetric(devCQ, 0) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 0) util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 0) + util.ExpectQuotaReservedWorkloadsTotalMetric(devCQ, 0) util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 0) // Delay cluster queue creation to make sure workloads are in the same @@ -1040,7 +1119,9 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectPendingWorkloadsMetric(devCQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1) util.ExpectReservingActiveWorkloadsMetric(devCQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(devCQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 1) }) @@ -1130,6 +1211,7 @@ var _ = ginkgo.Describe("Scheduler", func() { testing.MakeAdmission(prodCQ.Name).Assignment(corev1.ResourceCPU, "spot-untainted", "1").Obj()) util.ExpectPendingWorkloadsMetric(prodCQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(prodCQ, 3) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 3) util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 3) }) @@ -1162,6 +1244,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, prodWl1Admission) util.ExpectPendingWorkloadsMetric(prodCQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1) ginkgo.By("Creating another workload") @@ -1171,6 +1254,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl2, prodWl2Admission) util.ExpectPendingWorkloadsMetric(devCQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(devCQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(devCQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 1) }) @@ -1258,6 +1342,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadsToBePending(ctx, k8sClient, wl) util.ExpectPendingWorkloadsMetric(prodCQ, 0, 1) util.ExpectReservingActiveWorkloadsMetric(prodCQ, 0) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 0) util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 0) ginkgo.By("checking the workload gets admitted when another ClusterQueue gets added") @@ -1275,6 +1360,7 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl, expectAdmission) util.ExpectPendingWorkloadsMetric(prodCQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1) }) @@ -1309,7 +1395,9 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectPendingWorkloadsMetric(devCQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1) util.ExpectReservingActiveWorkloadsMetric(devCQ, 0) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 1) util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1) + util.ExpectQuotaReservedWorkloadsTotalMetric(devCQ, 0) util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 0) // Update lending limit of cluster queue @@ -1331,7 +1419,9 @@ var _ = ginkgo.Describe("Scheduler", func() { util.ExpectPendingWorkloadsMetric(devCQ, 0, 0) util.ExpectReservingActiveWorkloadsMetric(prodCQ, 2) util.ExpectReservingActiveWorkloadsMetric(devCQ, 0) + util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 2) util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 2) + util.ExpectQuotaReservedWorkloadsTotalMetric(devCQ, 0) util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 0) }) }) diff --git a/test/util/util.go b/test/util/util.go index bf919c0714..a19d899f35 100644 --- a/test/util/util.go +++ b/test/util/util.go @@ -441,11 +441,20 @@ func ExpectReservingActiveWorkloadsMetric(cq *kueue.ClusterQueue, v int) { func ExpectAdmittedWorkloadsTotalMetric(cq *kueue.ClusterQueue, v int) { metric := metrics.AdmittedWorkloadsTotal.WithLabelValues(cq.Name) - gomega.EventuallyWithOffset(1, func() int { - v, err := testutil.GetCounterMetricValue(metric) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - return int(v) - }, Timeout, Interval).Should(gomega.Equal(v)) + gomega.EventuallyWithOffset(1, func(g gomega.Gomega) { + count, err := testutil.GetCounterMetricValue(metric) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(int(count)).Should(gomega.Equal(v)) + }, Timeout, Interval).Should(gomega.Succeed()) +} + +func ExpectQuotaReservedWorkloadsTotalMetric(cq *kueue.ClusterQueue, v int) { + metric := metrics.QuotaReservedWorkloadsTotal.WithLabelValues(cq.Name) + gomega.EventuallyWithOffset(1, func(g gomega.Gomega) { + count, err := testutil.GetCounterMetricValue(metric) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(int(count)).Should(gomega.Equal(v)) + }, Timeout, Interval).Should(gomega.Succeed()) } func ExpectClusterQueueStatusMetric(cq *kueue.ClusterQueue, status metrics.ClusterQueueStatus) { From e8fc9b7cf6bf9d964c6e5363aba711a5d841f5cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Paj=C4=85k?= Date: Tue, 23 Apr 2024 19:40:40 +0200 Subject: [PATCH 18/49] Propagate provisioning status of a ProvReq into the Workload status (#2007) * Copying the prov req status message into workload * Integration test for ETA message propagation * Refactor * Update the message after successfull provisoning. * Try to update even if a previous updated happened. * Comment explaining when the update happens * PR comments --- .../provisioning/controller.go | 54 ++++++++++++------ .../provisioning/controller_test.go | 57 +++++++++++++++++++ .../provisioning/provisioning_test.go | 32 +++++++++++ 3 files changed, 126 insertions(+), 17 deletions(-) diff --git a/pkg/controller/admissionchecks/provisioning/controller.go b/pkg/controller/admissionchecks/provisioning/controller.go index 19b22b95e2..d2a828c142 100644 --- a/pkg/controller/admissionchecks/provisioning/controller.go +++ b/pkg/controller/admissionchecks/provisioning/controller.go @@ -462,6 +462,22 @@ func passProvReqParams(wl *kueue.Workload, req *autoscaling.ProvisioningRequest) } } +func updateCheckMessage(checkState *kueue.AdmissionCheckState, message string) bool { + if message == "" || checkState.Message == message { + return false + } + checkState.Message = message + return true +} + +func updateCheckState(checkState *kueue.AdmissionCheckState, state kueue.CheckState) bool { + if checkState.State == state { + return false + } + checkState.State = state + return true +} + func (c *Controller) syncCheckStates(ctx context.Context, wl *kueue.Workload, checks []string, activeOrLastPRForChecks map[string]*autoscaling.ProvisioningRequest) error { log := ctrl.LoggerFrom(ctx) checksMap := slices.ToRefMap(wl.Status.AdmissionChecks, func(c *kueue.AdmissionCheckState) string { return c.Name }) @@ -472,15 +488,11 @@ func (c *Controller) syncCheckStates(ctx context.Context, wl *kueue.Workload, ch checkState := *checksMap[check] if prc, err := c.helper.ConfigForAdmissionCheck(ctx, check); err != nil { // the check is not active - if checkState.State != kueue.CheckStatePending || checkState.Message != CheckInactiveMessage { - updated = true - checkState.State = kueue.CheckStatePending - checkState.Message = CheckInactiveMessage - } + updated = updateCheckState(&checkState, kueue.CheckStatePending) || updated + updated = updateCheckMessage(&checkState, CheckInactiveMessage) || updated } else if !c.reqIsNeeded(ctx, wl, prc) { - if checkState.State != kueue.CheckStateReady { + if updateCheckState(&checkState, kueue.CheckStateReady) { updated = true - checkState.State = kueue.CheckStateReady checkState.Message = NoRequestNeeded checkState.PodSetUpdates = nil } @@ -492,7 +504,13 @@ func (c *Controller) syncCheckStates(ctx context.Context, wl *kueue.Workload, ch prFailed := apimeta.IsStatusConditionTrue(pr.Status.Conditions, autoscaling.Failed) prProvisioned := apimeta.IsStatusConditionTrue(pr.Status.Conditions, autoscaling.Provisioned) - log.V(3).Info("Synchronizing admission check state based on provisioning request", "wl", klog.KObj(wl), "check", check, "prName", pr.Name, "failed", prFailed, "accepted", prProvisioned) + prAccepted := apimeta.IsStatusConditionTrue(pr.Status.Conditions, autoscaling.Accepted) + log.V(3).Info("Synchronizing admission check state based on provisioning request", "wl", klog.KObj(wl), + "check", check, + "prName", pr.Name, + "failed", prFailed, + "provisioned", prProvisioned, + "accepted", prAccepted) switch { case prFailed: @@ -500,9 +518,8 @@ func (c *Controller) syncCheckStates(ctx context.Context, wl *kueue.Workload, ch if attempt := getAttempt(ctx, pr, wl.Name, check); attempt <= MaxRetries { // it is going to be retried message := fmt.Sprintf("Retrying after failure: %s", apimeta.FindStatusCondition(pr.Status.Conditions, autoscaling.Failed).Message) - updated = updated || checkState.State != kueue.CheckStatePending || checkState.Message != message - checkState.State = kueue.CheckStatePending - checkState.Message = message + updated = updateCheckState(&checkState, kueue.CheckStatePending) || updated + updated = updateCheckMessage(&checkState, message) || updated } else { updated = true checkState.State = kueue.CheckStateRejected @@ -510,17 +527,20 @@ func (c *Controller) syncCheckStates(ctx context.Context, wl *kueue.Workload, ch } } case prProvisioned: - if checkState.State != kueue.CheckStateReady { + if updateCheckState(&checkState, kueue.CheckStateReady) { updated = true - checkState.State = kueue.CheckStateReady // add the pod podSetUpdates checkState.PodSetUpdates = podSetUpdates(wl, pr) + updateCheckMessage(&checkState, apimeta.FindStatusCondition(pr.Status.Conditions, autoscaling.Provisioned).Message) } + case prAccepted: + // we propagate the message from the provisioning request status into the workload + // this happens for provisioned = false (ETA updates) and also for provisioned = true + // to change to the "successfully provisioned" message after provisioning + updated = updateCheckMessage(&checkState, apimeta.FindStatusCondition(pr.Status.Conditions, autoscaling.Provisioned).Message) || updated + updated = updateCheckState(&checkState, kueue.CheckStatePending) || updated default: - if checkState.State != kueue.CheckStatePending { - updated = true - checkState.State = kueue.CheckStatePending - } + updated = updateCheckState(&checkState, kueue.CheckStatePending) || updated } } diff --git a/pkg/controller/admissionchecks/provisioning/controller_test.go b/pkg/controller/admissionchecks/provisioning/controller_test.go index f386fa2c2d..1ceccd38b4 100644 --- a/pkg/controller/admissionchecks/provisioning/controller_test.go +++ b/pkg/controller/admissionchecks/provisioning/controller_test.go @@ -64,6 +64,14 @@ var ( } ) +func requestWithConditions(r *autoscaling.ProvisioningRequest, conditions []metav1.Condition) *autoscaling.ProvisioningRequest { + r = r.DeepCopy() + for _, condition := range conditions { + apimeta.SetStatusCondition(&r.Status.Conditions, condition) + } + return r +} + func requestWithCondition(r *autoscaling.ProvisioningRequest, conditionType string, status metav1.ConditionStatus) *autoscaling.ProvisioningRequest { r = r.DeepCopy() apimeta.SetStatusCondition(&r.Status.Conditions, metav1.Condition{ @@ -647,6 +655,55 @@ func TestReconcile(t *testing.T) { GetProvisioningRequestName("wl", "check2", 1), }, }, + "workloads status gets updated based on the provisioning request": { + workload: baseWorkload.DeepCopy(), + checks: []kueue.AdmissionCheck{*baseCheck.DeepCopy()}, + flavors: []kueue.ResourceFlavor{*baseFlavor1.DeepCopy(), *baseFlavor2.DeepCopy()}, + configs: []kueue.ProvisioningRequestConfig{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "config1", + }, + Spec: kueue.ProvisioningRequestConfigSpec{ + ProvisioningClassName: "class1", + Parameters: map[string]kueue.Parameter{ + "p1": "v1", + }, + }, + }, + }, + templates: []corev1.PodTemplate{*baseTemplate1.DeepCopy(), *baseTemplate2.DeepCopy()}, + requests: []autoscaling.ProvisioningRequest{ + *requestWithConditions(baseRequest, + []metav1.Condition{ + { + Type: autoscaling.Failed, + Status: metav1.ConditionFalse, + }, + { + Type: autoscaling.Provisioned, + Status: metav1.ConditionFalse, + Message: "Provisioning Request wasn't provisioned. ETA: 2024-02-22T10:36:40Z", + }, + { + Type: autoscaling.Accepted, + Status: metav1.ConditionTrue, + }, + }), + }, + wantWorkloads: map[string]*kueue.Workload{ + baseWorkload.Name: (&utiltesting.WorkloadWrapper{Workload: *baseWorkload.DeepCopy()}). + AdmissionChecks(kueue.AdmissionCheckState{ + Name: "check1", + State: kueue.CheckStatePending, + Message: "Provisioning Request wasn't provisioned. ETA: 2024-02-22T10:36:40Z", + }, kueue.AdmissionCheckState{ + Name: "not-provisioning", + State: kueue.CheckStatePending, + }). + Obj(), + }, + }, } for name, tc := range cases { diff --git a/test/integration/controller/admissionchecks/provisioning/provisioning_test.go b/test/integration/controller/admissionchecks/provisioning/provisioning_test.go index cfae97d28b..554ed4b055 100644 --- a/test/integration/controller/admissionchecks/provisioning/provisioning_test.go +++ b/test/integration/controller/admissionchecks/provisioning/provisioning_test.go @@ -317,6 +317,38 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure Namespace: wlKey.Namespace, Name: provisioning.GetProvisioningRequestName(wlKey.Name, ac.Name, 1), } + ginkgo.By("Setting the provision request as Not Provisioned and providing ETA", func() { + createdRequest := &autoscaling.ProvisioningRequest{} + gomega.Eventually(func() error { + err := k8sClient.Get(ctx, provReqKey, createdRequest) + if err != nil { + return err + } + apimeta.SetStatusCondition(&createdRequest.Status.Conditions, metav1.Condition{ + Type: autoscaling.Accepted, + Status: metav1.ConditionTrue, + Reason: "Reason", + }) + apimeta.SetStatusCondition(&createdRequest.Status.Conditions, metav1.Condition{ + Type: autoscaling.Provisioned, + Status: metav1.ConditionFalse, + Reason: "Reason", + Message: "Not provisioned, ETA: 2024-02-22T10:36:40Z.", + }) + return k8sClient.Status().Update(ctx, createdRequest) + }, util.Timeout, util.Interval).Should(gomega.Succeed()) + }) + ginkgo.By("Checking that the ETA is propagated to workload", func() { + updatedWl := &kueue.Workload{} + gomega.Eventually(func(g gomega.Gomega) { + g.Expect(k8sClient.Get(ctx, wlKey, updatedWl)).To(gomega.Succeed()) + state := workload.FindAdmissionCheck(updatedWl.Status.AdmissionChecks, ac.Name) + g.Expect(state).NotTo(gomega.BeNil()) + g.Expect(state.State).To(gomega.Equal(kueue.CheckStatePending)) + g.Expect(state.Message).To(gomega.Equal("Not provisioned, ETA: 2024-02-22T10:36:40Z.")) + }, util.Timeout, util.Interval).Should(gomega.Succeed()) + }) + ginkgo.By("Setting the provision request as Provisioned", func() { createdRequest := &autoscaling.ProvisioningRequest{} gomega.Eventually(func() error { From 2991eccbadf7cdefa8f2d5f5146fd53bfbb9c18a Mon Sep 17 00:00:00 2001 From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com> Date: Wed, 24 Apr 2024 03:39:15 -0400 Subject: [PATCH 19/49] Make dominant resource share flavor aware (#2037) Change-Id: I21da836d55a63d788931e6212e0be30d6f78497b --- keps/1714-fair-sharing/README.md | 10 +- pkg/cache/clusterqueue.go | 50 ++-- pkg/cache/clusterqueue_test.go | 261 +++++++++++++----- .../flavorassigner/flavorassigner.go | 16 ++ pkg/scheduler/preemption/preemption.go | 24 +- pkg/scheduler/scheduler.go | 4 +- pkg/workload/workload.go | 21 +- pkg/workload/workload_test.go | 66 ++++- 8 files changed, 314 insertions(+), 138 deletions(-) diff --git a/keps/1714-fair-sharing/README.md b/keps/1714-fair-sharing/README.md index db47815e86..908b119a0b 100644 --- a/keps/1714-fair-sharing/README.md +++ b/keps/1714-fair-sharing/README.md @@ -206,10 +206,12 @@ The value function is a variation of DRF (see [1](https://amplab.cs.berkeley.edu/wp-content/uploads/2011/06/Dominant-Resource-Fairness-Fair-Allocation-of-Multiple-Resource-Types.pdf), [2](https://dash.harvard.edu/bitstream/handle/1/11956916/Parkes_BeyondDominant.pdf;jsessionid=AC0D06C2CC07C693BD42008D7AE25D99?sequence=1)): -For a given resource r provided by a ClusterQueue or cohort c, we calculate T_r as the total -requests consumed by the Workloads for that resource in that CQ or cohort, independent of the -flavor, that are above the nominal quota. The value for a resource is the ratio of T_r and the -total nominal quotas (or lendingLimits, if defined) in the hierarchy of the parent of C. +For a given resource _r_ provided by a ClusterQueue or cohort _c_, we calculate $T_r$ as the +total requests consumed by the Workloads for resource _r_ in that CQ or cohort, +that are above the nominal quota, added up for all flavors. +The value for a resource is the ratio of $T_r$ and the total nominal quotas +(or lendingLimits, if defined) for the resource _r_, added up for all flavors, +in the hierarchy of the parent of _c_. Note that the share value for a suborganization (a node in the tree) is independent of the share value for its children. In other words, the calculation of the share value only diff --git a/pkg/cache/clusterqueue.go b/pkg/cache/clusterqueue.go index 42661dcbe3..142ee5ad73 100644 --- a/pkg/cache/clusterqueue.go +++ b/pkg/cache/clusterqueue.go @@ -128,8 +128,7 @@ type ResourceQuota struct { LendingLimit *int64 } -type ResourceQuantities map[corev1.ResourceName]int64 -type FlavorResourceQuantities map[kueue.ResourceFlavorReference]ResourceQuantities +type FlavorResourceQuantities map[kueue.ResourceFlavorReference]workload.Requests type queue struct { key string @@ -684,40 +683,53 @@ func (c *ClusterQueue) UsedCohortQuota(fName kueue.ResourceFlavorReference, rNam return cohortUsage } -// DominantResourceShare returns a value from 0 to 100 representing the maximum of the ratios +// DominantResourceShare returns a value from 0 to 1000 representing the maximum of the ratios // of usage above nominal quota to the lendable resources in the cohort, among all the resources // provided by the ClusterQueue. // If zero, it means that the usage of the ClusterQueue is below the nominal quota. // The function also returns the resource name that yielded this value. func (c *ClusterQueue) DominantResourceShare() (int, corev1.ResourceName) { - return c.dominantResourceShare(nil, 1) + return c.dominantResourceShare(nil, 0) } -func (c *ClusterQueue) DominantResourceShareWith(w *workload.Info) (int, corev1.ResourceName) { - return c.dominantResourceShare(w, 1) +func (c *ClusterQueue) DominantResourceShareWith(wlReq FlavorResourceQuantities) (int, corev1.ResourceName) { + return c.dominantResourceShare(wlReq, 1) } func (c *ClusterQueue) DominantResourceShareWithout(w *workload.Info) (int, corev1.ResourceName) { - return c.dominantResourceShare(w, -1) + return c.dominantResourceShare(w.FlavorResourceUsage(), -1) } -func (c *ClusterQueue) dominantResourceShare(w *workload.Info, m int64) (int, corev1.ResourceName) { +func (c *ClusterQueue) dominantResourceShare(wlReq FlavorResourceQuantities, m int64) (int, corev1.ResourceName) { if c.Cohort == nil { return 0, "" } + + borrowing := make(map[corev1.ResourceName]int64) + for _, rg := range c.ResourceGroups { + for _, flv := range rg.Flavors { + for rName, quotas := range flv.Resources { + b := c.Usage[flv.Name][rName] + m*wlReq[flv.Name][rName] - quotas.Nominal + if b > 0 { + borrowing[rName] += b + } + } + } + } + if len(borrowing) == 0 { + return 0, "" + } + var drs int64 = -1 var dRes corev1.ResourceName - wUsage := w.ResourceUsage() - for rName, rStats := range c.ResourceStats { - var ratio int64 - if c.Cohort.ResourceStats[rName].Lendable > 0 { - ratio = max(rStats.Usage+wUsage[rName]*m-rStats.Nominal, 0) * 100 / - c.Cohort.ResourceStats[rName].Lendable - } - // Use alphabetical order to get a deterministic resource name. - if ratio > drs || (ratio == drs && rName < dRes) { - drs = ratio - dRes = rName + for rName, b := range borrowing { + if lendable := c.Cohort.ResourceStats[rName].Lendable; lendable > 0 { + ratio := b * 1000 / lendable + // Use alphabetical order to get a deterministic resource name. + if ratio > drs || (ratio == drs && rName < dRes) { + drs = ratio + dRes = rName + } } } return int(drs), dRes diff --git a/pkg/cache/clusterqueue_test.go b/pkg/cache/clusterqueue_test.go index bc153e431f..9ad9d5bd8b 100644 --- a/pkg/cache/clusterqueue_test.go +++ b/pkg/cache/clusterqueue_test.go @@ -22,12 +22,12 @@ import ( "github.com/google/go-cmp/cmp" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" "sigs.k8s.io/kueue/pkg/features" "sigs.k8s.io/kueue/pkg/metrics" utiltesting "sigs.k8s.io/kueue/pkg/util/testing" - "sigs.k8s.io/kueue/pkg/workload" ) func TestClusterQueueUpdateWithFlavors(t *testing.T) { @@ -759,38 +759,60 @@ func TestClusterQueueUpdateWithAdmissionCheck(t *testing.T) { func TestDominantResourceShare(t *testing.T) { cases := map[string]struct { cq ClusterQueue - workload *workload.Info + flvResQ FlavorResourceQuantities wantDRValue int wantDRName corev1.ResourceName }{ "no cohort": { cq: ClusterQueue{ - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 2_000, - Lendable: 2_000, - Usage: 1_000, + Usage: FlavorResourceQuantities{ + "default": { + corev1.ResourceCPU: 1_000, + "example.com/gpu": 2, }, - "example.com/gpu": { - Nominal: 5, - Lendable: 5, - Usage: 2_000, + }, + ResourceGroups: []ResourceGroup{ + { + Flavors: []FlavorQuotas{ + { + Name: "default", + Resources: map[corev1.ResourceName]*ResourceQuota{ + corev1.ResourceCPU: { + Nominal: 2_000, + }, + "example.com/gpu": { + Nominal: 5, + }, + }, + }, + }, }, }, }, }, "usage below nominal": { cq: ClusterQueue{ - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 2_000, - Lendable: 2_000, - Usage: 1_000, + Usage: FlavorResourceQuantities{ + "default": { + corev1.ResourceCPU: 1_000, + "example.com/gpu": 2, }, - "example.com/gpu": { - Nominal: 5, - Lendable: 5, - Usage: 2, + }, + ResourceGroups: []ResourceGroup{ + { + Flavors: []FlavorQuotas{ + { + Name: "default", + Resources: map[corev1.ResourceName]*ResourceQuota{ + corev1.ResourceCPU: { + Nominal: 2_000, + }, + "example.com/gpu": { + Nominal: 5, + }, + }, + }, + }, }, }, Cohort: &Cohort{ @@ -808,20 +830,30 @@ func TestDominantResourceShare(t *testing.T) { }, }, }, - wantDRName: corev1.ResourceCPU, // due to alphabetical order. }, "usage above nominal": { cq: ClusterQueue{ - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 2_000, - Lendable: 2_000, - Usage: 3_000, + Usage: FlavorResourceQuantities{ + "default": { + corev1.ResourceCPU: 3_000, + "example.com/gpu": 7, }, - "example.com/gpu": { - Nominal: 5, - Lendable: 5, - Usage: 7, + }, + ResourceGroups: []ResourceGroup{ + { + Flavors: []FlavorQuotas{ + { + Name: "default", + Resources: map[corev1.ResourceName]*ResourceQuota{ + corev1.ResourceCPU: { + Nominal: 2_000, + }, + "example.com/gpu": { + Nominal: 5, + }, + }, + }, + }, }, }, Cohort: &Cohort{ @@ -840,20 +872,31 @@ func TestDominantResourceShare(t *testing.T) { }, }, wantDRName: "example.com/gpu", - wantDRValue: 20, // (7-5)/10 + wantDRValue: 200, // (7-5)*1000/10 }, "one resource above nominal": { cq: ClusterQueue{ - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 2_000, - Lendable: 2_000, - Usage: 3_000, + Usage: FlavorResourceQuantities{ + "default": { + corev1.ResourceCPU: 3_000, + "example.com/gpu": 3, }, - "example.com/gpu": { - Nominal: 5, - Lendable: 5, - Usage: 3, + }, + ResourceGroups: []ResourceGroup{ + { + Flavors: []FlavorQuotas{ + { + Name: "default", + Resources: map[corev1.ResourceName]*ResourceQuota{ + corev1.ResourceCPU: { + Nominal: 2_000, + }, + "example.com/gpu": { + Nominal: 5, + }, + }, + }, + }, }, }, Cohort: &Cohort{ @@ -872,20 +915,31 @@ func TestDominantResourceShare(t *testing.T) { }, }, wantDRName: corev1.ResourceCPU, - wantDRValue: 10, // (3-2)/10 + wantDRValue: 100, // (3-2)*1000/10 }, "usage with workload above nominal": { cq: ClusterQueue{ - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 2_000, - Lendable: 2_000, - Usage: 1_000, + Usage: FlavorResourceQuantities{ + "default": { + corev1.ResourceCPU: 1_000, + "example.com/gpu": 2, }, - "example.com/gpu": { - Nominal: 5, - Lendable: 5, - Usage: 2, + }, + ResourceGroups: []ResourceGroup{ + { + Flavors: []FlavorQuotas{ + { + Name: "default", + Resources: map[corev1.ResourceName]*ResourceQuota{ + corev1.ResourceCPU: { + Nominal: 2_000, + }, + "example.com/gpu": { + Nominal: 5, + }, + }, + }, + }, }, }, Cohort: &Cohort{ @@ -903,28 +957,39 @@ func TestDominantResourceShare(t *testing.T) { }, }, }, - workload: &workload.Info{ - TotalRequests: []workload.PodSetResources{{ - Requests: workload.Requests{ - corev1.ResourceCPU: 4_000, - "example.com/gpu": 4, - }, - }}, + flvResQ: FlavorResourceQuantities{ + "default": { + corev1.ResourceCPU: 4_000, + "example.com/gpu": 4, + }, }, wantDRName: corev1.ResourceCPU, - wantDRValue: 30, // (1+4-2)/10 + wantDRValue: 300, // (1+4-2)*1000/10 }, "A resource with zero lendable": { cq: ClusterQueue{ - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 2_000, - Lendable: 2_000, - Usage: 1_000, + Usage: FlavorResourceQuantities{ + "default": { + corev1.ResourceCPU: 1_000, + "example.com/gpu": 1, }, - "example.com/gpu": { - Nominal: 2_000, - Usage: 1_000, + }, + ResourceGroups: []ResourceGroup{ + { + Flavors: []FlavorQuotas{ + { + Name: "default", + Resources: map[corev1.ResourceName]*ResourceQuota{ + corev1.ResourceCPU: { + Nominal: 2_000, + }, + "example.com/gpu": { + Nominal: 2, + LendingLimit: ptr.To[int64](0), + }, + }, + }, + }, }, }, Cohort: &Cohort{ @@ -941,21 +1006,69 @@ func TestDominantResourceShare(t *testing.T) { }, }, }, - workload: &workload.Info{ - TotalRequests: []workload.PodSetResources{{ - Requests: workload.Requests{ - corev1.ResourceCPU: 4_000, - "example.com/gpu": 4, + flvResQ: FlavorResourceQuantities{ + "default": { + corev1.ResourceCPU: 4_000, + "example.com/gpu": 4, + }, + }, + wantDRName: corev1.ResourceCPU, + wantDRValue: 300, // (1+4-2)*1000/10 + }, + "multiple flavors": { + cq: ClusterQueue{ + Usage: FlavorResourceQuantities{ + "on-demand": { + corev1.ResourceCPU: 15_000, + }, + "spot": { + corev1.ResourceCPU: 5_000, + }, + }, + ResourceGroups: []ResourceGroup{ + { + Flavors: []FlavorQuotas{ + { + Name: "on-demand", + Resources: map[corev1.ResourceName]*ResourceQuota{ + corev1.ResourceCPU: { + Nominal: 20_000, + }, + }, + }, + { + Name: "spot", + Resources: map[corev1.ResourceName]*ResourceQuota{ + corev1.ResourceCPU: { + Nominal: 80_000, + }, + }, + }, + }, + }, + }, + Cohort: &Cohort{ + ResourceStats: ResourceStats{ + corev1.ResourceCPU: { + Nominal: 200_000, + Lendable: 200_000, + Usage: 20_000, + }, }, - }}, + }, + }, + flvResQ: FlavorResourceQuantities{ + "on-demand": { + corev1.ResourceCPU: 10_000, + }, }, wantDRName: corev1.ResourceCPU, - wantDRValue: 30, // (1+4-2)/10 + wantDRValue: 25, // ((15+10-20)+0)*1000/200 (spot under nominal) }, } for name, tc := range cases { t.Run(name, func(t *testing.T) { - drValue, drName := tc.cq.DominantResourceShareWith(tc.workload) + drValue, drName := tc.cq.DominantResourceShareWith(tc.flvResQ) if drValue != tc.wantDRValue { t.Errorf("DominantResourceShare(_) returned value %d, want %d", drValue, tc.wantDRValue) } diff --git a/pkg/scheduler/flavorassigner/flavorassigner.go b/pkg/scheduler/flavorassigner/flavorassigner.go index c36ffc2e2a..7ffe9e0bd5 100644 --- a/pkg/scheduler/flavorassigner/flavorassigner.go +++ b/pkg/scheduler/flavorassigner/flavorassigner.go @@ -105,6 +105,22 @@ func (a *Assignment) ToAPI() []kueue.PodSetAssignment { return psFlavors } +func (a *Assignment) TotalRequestsFor(wl *workload.Info) cache.FlavorResourceQuantities { + usage := make(cache.FlavorResourceQuantities) + for i, ps := range wl.TotalRequests { + for res, q := range ps.Requests { + flv := a.PodSets[i].Flavors[res].Name + resUsage := usage[flv] + if resUsage == nil { + resUsage = make(map[corev1.ResourceName]int64) + usage[flv] = resUsage + } + resUsage[res] += q + } + } + return usage +} + type Status struct { reasons []string err error diff --git a/pkg/scheduler/preemption/preemption.go b/pkg/scheduler/preemption/preemption.go index 52681a4fad..e5670de8b3 100644 --- a/pkg/scheduler/preemption/preemption.go +++ b/pkg/scheduler/preemption/preemption.go @@ -92,7 +92,7 @@ func (p *Preemptor) GetTargets(wl workload.Info, assignment flavorassigner.Assig sort.Slice(candidates, candidatesOrdering(candidates, cq.Name, time.Now())) sameQueueCandidates := candidatesOnlyFromQueue(candidates, wl.ClusterQueue) - wlReq := totalRequestsForAssignment(&wl, assignment) + wlReq := assignment.TotalRequestsFor(&wl) // To avoid flapping, Kueue only allows preemption of workloads from the same // queue if borrowing. Preemption of workloads from queues can happen only @@ -258,8 +258,8 @@ func restoreSnapshot(snapshot *cache.Snapshot, targets []*workload.Info) { func fairPreemptions(wl *workload.Info, assignment flavorassigner.Assignment, snapshot *cache.Snapshot, resPerFlv resourcesPerFlavor, candidates []*workload.Info, allowBorrowingBelowPriority *int32) []*workload.Info { cqHeap := cqHeapFromCandidates(candidates, false, snapshot) nominatedCQ := snapshot.ClusterQueues[wl.ClusterQueue] - newNominatedShareValue, _ := nominatedCQ.DominantResourceShareWith(wl) - wlReq := totalRequestsForAssignment(wl, assignment) + wlReq := assignment.TotalRequestsFor(wl) + newNominatedShareValue, _ := nominatedCQ.DominantResourceShareWith(wlReq) var targets []*workload.Info fits := false var retryCandidates []*workload.Info @@ -274,7 +274,7 @@ func fairPreemptions(wl *workload.Info, assignment flavorassigner.Assignment, sn fits = true break } - newNominatedShareValue, _ = nominatedCQ.DominantResourceShareWith(wl) + newNominatedShareValue, _ = nominatedCQ.DominantResourceShareWith(wlReq) candCQ.workloads = candCQ.workloads[1:] if len(candCQ.workloads) > 0 { candCQ.share, _ = candCQ.cq.DominantResourceShare() @@ -469,22 +469,6 @@ func workloadUsesResources(wl *workload.Info, resPerFlv resourcesPerFlavor) bool return false } -func totalRequestsForAssignment(wl *workload.Info, assignment flavorassigner.Assignment) cache.FlavorResourceQuantities { - usage := make(cache.FlavorResourceQuantities) - for i, ps := range wl.TotalRequests { - for res, q := range ps.Requests { - flv := assignment.PodSets[i].Flavors[res].Name - resUsage := usage[flv] - if resUsage == nil { - resUsage = make(map[corev1.ResourceName]int64) - usage[flv] = resUsage - } - resUsage[res] += q - } - } - return usage -} - // workloadFits determines if the workload requests would fit given the // requestable resources and simulated usage of the ClusterQueue and its cohort, // if it belongs to one. diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 4665047079..d14dbc38e0 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -158,7 +158,7 @@ func (cu *cohortsUsage) add(cohort string, assignment cache.FlavorResourceQuanti } func (cu *cohortsUsage) totalUsageForCommonFlavorResources(cohort string, assignment cache.FlavorResourceQuantities) cache.FlavorResourceQuantities { - return utilmaps.Intersect((*cu)[cohort], assignment, func(a, b cache.ResourceQuantities) cache.ResourceQuantities { + return utilmaps.Intersect((*cu)[cohort], assignment, func(a, b workload.Requests) workload.Requests { return utilmaps.Intersect(a, b, func(a, b int64) int64 { return a + b }) }) } @@ -357,7 +357,7 @@ func (s *Scheduler) nominate(ctx context.Context, workloads []workload.Info, sna e.inadmissibleMsg = e.assignment.Message() e.Info.LastAssignment = &e.assignment.LastState if s.enableFairSharing { - e.dominantResourceShare, e.dominantResourceName = cq.DominantResourceShareWith(&w) + e.dominantResourceShare, e.dominantResourceName = cq.DominantResourceShareWith(e.assignment.TotalRequestsFor(&w)) } } entries = append(entries, e) diff --git a/pkg/workload/workload.go b/pkg/workload/workload.go index d72f8a0fb9..cfce1ea7c8 100644 --- a/pkg/workload/workload.go +++ b/pkg/workload/workload.go @@ -159,18 +159,25 @@ func (i *Info) CanBePartiallyAdmitted() bool { } // ResourceUsage returns the total resource usage for the workload, -// per resource. -func (i *Info) ResourceUsage() Requests { +// per flavor (if assigned, otherwise flavor shows as empty string), per resource. +func (i *Info) FlavorResourceUsage() map[kueue.ResourceFlavorReference]Requests { if i == nil || len(i.TotalRequests) == 0 { return nil } - req := maps.Clone(i.TotalRequests[0].Requests) - for j := 1; j < len(i.TotalRequests); j++ { - for rName, rVal := range i.TotalRequests[j].Requests { - req[rName] += rVal + total := make(map[kueue.ResourceFlavorReference]Requests) + for _, psReqs := range i.TotalRequests { + for res, q := range psReqs.Requests { + flv := psReqs.Flavors[res] + if requests, found := total[flv]; found { + requests[res] += q + } else { + total[flv] = Requests{ + res: q, + } + } } } - return req + return total } func CanBePartiallyAdmitted(wl *kueue.Workload) bool { diff --git a/pkg/workload/workload_test.go b/pkg/workload/workload_test.go index 78a6bc0f54..5dbebf62d5 100644 --- a/pkg/workload/workload_test.go +++ b/pkg/workload/workload_test.go @@ -591,13 +591,13 @@ func TestIsEvictedByPodsReadyTimeout(t *testing.T) { } } -func TestResourceUsage(t *testing.T) { +func TestFlavorResourceUsage(t *testing.T) { cases := map[string]struct { info *Info - want Requests + want map[kueue.ResourceFlavorReference]Requests }{ "nil": {}, - "one podset": { + "one podset, no flavors": { info: &Info{ TotalRequests: []PodSetResources{{ Requests: Requests{ @@ -606,12 +606,36 @@ func TestResourceUsage(t *testing.T) { }, }}, }, - want: Requests{ - corev1.ResourceCPU: 1_000, - "example.com/gpu": 3, + want: map[kueue.ResourceFlavorReference]Requests{ + "": { + corev1.ResourceCPU: 1_000, + "example.com/gpu": 3, + }, }, }, - "multiple podsets": { + "one podset, multiple flavors": { + info: &Info{ + TotalRequests: []PodSetResources{{ + Requests: Requests{ + corev1.ResourceCPU: 1_000, + "example.com/gpu": 3, + }, + Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ + corev1.ResourceCPU: "default", + "example.com/gpu": "gpu", + }, + }}, + }, + want: map[kueue.ResourceFlavorReference]Requests{ + "default": { + corev1.ResourceCPU: 1_000, + }, + "gpu": { + "example.com/gpu": 3, + }, + }, + }, + "multiple podsets, multiple flavors": { info: &Info{ TotalRequests: []PodSetResources{ { @@ -619,30 +643,48 @@ func TestResourceUsage(t *testing.T) { corev1.ResourceCPU: 1_000, "example.com/gpu": 3, }, + Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ + corev1.ResourceCPU: "default", + "example.com/gpu": "model_a", + }, }, { Requests: Requests{ corev1.ResourceCPU: 2_000, corev1.ResourceMemory: 2 * utiltesting.Gi, }, + Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ + corev1.ResourceCPU: "default", + corev1.ResourceMemory: "default", + }, }, { Requests: Requests{ "example.com/gpu": 1, }, + Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ + "example.com/gpu": "model_b", + }, }, }, }, - want: Requests{ - corev1.ResourceCPU: 3_000, - corev1.ResourceMemory: 2 * utiltesting.Gi, - "example.com/gpu": 4, + want: map[kueue.ResourceFlavorReference]Requests{ + "default": { + corev1.ResourceCPU: 3_000, + corev1.ResourceMemory: 2 * utiltesting.Gi, + }, + "model_a": { + "example.com/gpu": 3, + }, + "model_b": { + "example.com/gpu": 1, + }, }, }, } for name, tc := range cases { t.Run(name, func(t *testing.T) { - got := tc.info.ResourceUsage() + got := tc.info.FlavorResourceUsage() if diff := cmp.Diff(tc.want, got); diff != "" { t.Errorf("info.ResourceUsage() returned (-want,+got):\n%s", diff) } From 36b340e396341c905444576bf797a221cb5f92ac Mon Sep 17 00:00:00 2001 From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com> Date: Wed, 24 Apr 2024 03:39:25 -0400 Subject: [PATCH 20/49] Fix number of pod reconcilers in default chart values (#2046) Change-Id: Ibeaf93a0f897524860e7046586249528161fd65a --- charts/kueue/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/kueue/values.yaml b/charts/kueue/values.yaml index 02ade6c8c9..e592129ba0 100644 --- a/charts/kueue/values.yaml +++ b/charts/kueue/values.yaml @@ -68,7 +68,7 @@ managerConfig: controller: groupKindConcurrency: Job.batch: 5 - Pod.: 5 + Pod: 5 Workload.kueue.x-k8s.io: 5 LocalQueue.kueue.x-k8s.io: 1 ClusterQueue.kueue.x-k8s.io: 1 @@ -92,7 +92,7 @@ managerConfig: - "ray.io/raycluster" - "jobset.x-k8s.io/jobset" - "kubeflow.org/mxjob" - - "kubeflow.org/paddlejob" + - "kubeflow.org/paddlejob" - "kubeflow.org/pytorchjob" - "kubeflow.org/tfjob" - "kubeflow.org/xgboostjob" From 9b25d998b4cc317ab9539e55497be49fa19588d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?= Date: Wed, 24 Apr 2024 11:01:23 +0200 Subject: [PATCH 21/49] [WaitForPodsReady] Make requeue base delay confiurable (#2040) * Make requeuing backoff base configurable * Add a validation test * Pass config for waitForPodsReady --- apis/config/v1beta1/configuration_types.go | 10 ++- apis/config/v1beta1/defaults.go | 13 +++- apis/config/v1beta1/defaults_test.go | 12 ++- apis/config/v1beta1/zz_generated.deepcopy.go | 5 ++ .../README.md | 16 +++- pkg/config/config_test.go | 9 ++- pkg/config/validation.go | 4 + pkg/config/validation_test.go | 17 +++++ pkg/controller/core/core.go | 48 ++++-------- pkg/controller/core/workload_controller.go | 75 +++++++------------ .../core/workload_controller_test.go | 30 ++++---- .../en/docs/reference/kueue-config.v1beta1.md | 12 ++- .../scheduler/podsready/suite_test.go | 8 +- 13 files changed, 144 insertions(+), 115 deletions(-) diff --git a/apis/config/v1beta1/configuration_types.go b/apis/config/v1beta1/configuration_types.go index 88f36ddaf5..3c8d27939e 100644 --- a/apis/config/v1beta1/configuration_types.go +++ b/apis/config/v1beta1/configuration_types.go @@ -238,7 +238,8 @@ type RequeuingStrategy struct { // Once the number is reached, the workload is deactivated (`.spec.activate`=`false`). // When it is null, the workloads will repeatedly and endless re-queueing. // - // Every backoff duration is about "10s*2^(n-1)+Rand" where: + // Every backoff duration is about "b*2^(n-1)+Rand" where: + // - "b" represents the base set by "BackoffBaseSeconds" parameter, // - "n" represents the "workloadStatus.requeueState.count", // - "Rand" represents the random jitter. // During this time, the workload is taken as an inadmissible and @@ -248,6 +249,13 @@ type RequeuingStrategy struct { // Defaults to null. // +optional BackoffLimitCount *int32 `json:"backoffLimitCount,omitempty"` + + // BackoffBaseSeconds defines the base for the exponential backoff for + // re-queuing an evicted workload. + // + // Defaults to 10. + // +optional + BackoffBaseSeconds *int32 `json:"backoffBaseSeconds,omitempty"` } type RequeuingTimestamp string diff --git a/apis/config/v1beta1/defaults.go b/apis/config/v1beta1/defaults.go index 8f11d2cd87..059530eb0f 100644 --- a/apis/config/v1beta1/defaults.go +++ b/apis/config/v1beta1/defaults.go @@ -47,6 +47,7 @@ const ( DefaultMultiKueueGCInterval = time.Minute DefaultMultiKueueOrigin = "multikueue" DefaultMultiKueueWorkerLostTimeout = 15 * time.Minute + DefaultRequeuingBackoffBaseSeconds = 10 ) func getOperatorNamespace() string { @@ -121,10 +122,14 @@ func SetDefaults_Configuration(cfg *Configuration) { } cfg.WaitForPodsReady.BlockAdmission = &defaultBlockAdmission } - if cfg.WaitForPodsReady.RequeuingStrategy == nil || cfg.WaitForPodsReady.RequeuingStrategy.Timestamp == nil { - cfg.WaitForPodsReady.RequeuingStrategy = &RequeuingStrategy{ - Timestamp: ptr.To(EvictionTimestamp), - } + if cfg.WaitForPodsReady.RequeuingStrategy == nil { + cfg.WaitForPodsReady.RequeuingStrategy = &RequeuingStrategy{} + } + if cfg.WaitForPodsReady.RequeuingStrategy.Timestamp == nil { + cfg.WaitForPodsReady.RequeuingStrategy.Timestamp = ptr.To(EvictionTimestamp) + } + if cfg.WaitForPodsReady.RequeuingStrategy.BackoffBaseSeconds == nil { + cfg.WaitForPodsReady.RequeuingStrategy.BackoffBaseSeconds = ptr.To[int32](DefaultRequeuingBackoffBaseSeconds) } } if cfg.Integrations == nil { diff --git a/apis/config/v1beta1/defaults_test.go b/apis/config/v1beta1/defaults_test.go index 29cd949c9e..3385303c99 100644 --- a/apis/config/v1beta1/defaults_test.go +++ b/apis/config/v1beta1/defaults_test.go @@ -364,7 +364,8 @@ func TestSetDefaults_Configuration(t *testing.T) { BlockAdmission: ptr.To(true), Timeout: &podsReadyTimeoutTimeout, RequeuingStrategy: &RequeuingStrategy{ - Timestamp: ptr.To(EvictionTimestamp), + Timestamp: ptr.To(EvictionTimestamp), + BackoffBaseSeconds: ptr.To[int32](DefaultRequeuingBackoffBaseSeconds), }, }, Namespace: ptr.To(DefaultNamespace), @@ -393,7 +394,8 @@ func TestSetDefaults_Configuration(t *testing.T) { BlockAdmission: ptr.To(false), Timeout: &podsReadyTimeoutTimeout, RequeuingStrategy: &RequeuingStrategy{ - Timestamp: ptr.To(EvictionTimestamp), + Timestamp: ptr.To(EvictionTimestamp), + BackoffBaseSeconds: ptr.To[int32](DefaultRequeuingBackoffBaseSeconds), }, }, Namespace: ptr.To(DefaultNamespace), @@ -413,7 +415,8 @@ func TestSetDefaults_Configuration(t *testing.T) { Enable: true, Timeout: &podsReadyTimeoutOverwrite, RequeuingStrategy: &RequeuingStrategy{ - Timestamp: ptr.To(CreationTimestamp), + Timestamp: ptr.To(CreationTimestamp), + BackoffBaseSeconds: ptr.To[int32](63), }, }, InternalCertManagement: &InternalCertManagement{ @@ -426,7 +429,8 @@ func TestSetDefaults_Configuration(t *testing.T) { BlockAdmission: ptr.To(true), Timeout: &podsReadyTimeoutOverwrite, RequeuingStrategy: &RequeuingStrategy{ - Timestamp: ptr.To(CreationTimestamp), + Timestamp: ptr.To(CreationTimestamp), + BackoffBaseSeconds: ptr.To[int32](63), }, }, Namespace: ptr.To(DefaultNamespace), diff --git a/apis/config/v1beta1/zz_generated.deepcopy.go b/apis/config/v1beta1/zz_generated.deepcopy.go index f53a7f8fa7..1a19f9cc08 100644 --- a/apis/config/v1beta1/zz_generated.deepcopy.go +++ b/apis/config/v1beta1/zz_generated.deepcopy.go @@ -380,6 +380,11 @@ func (in *RequeuingStrategy) DeepCopyInto(out *RequeuingStrategy) { *out = new(int32) **out = **in } + if in.BackoffBaseSeconds != nil { + in, out := &in.BackoffBaseSeconds, &out.BackoffBaseSeconds + *out = new(int32) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RequeuingStrategy. diff --git a/keps/1282-pods-ready-requeue-strategy/README.md b/keps/1282-pods-ready-requeue-strategy/README.md index b025b3bb9f..60c6944aaa 100644 --- a/keps/1282-pods-ready-requeue-strategy/README.md +++ b/keps/1282-pods-ready-requeue-strategy/README.md @@ -153,6 +153,13 @@ type RequeuingStrategy struct { // Defaults to null. // +optional BackoffLimitCount *int32 `json:"backoffLimitCount,omitempty"` + + // BackoffBaseSeconds defines the base for the exponential backoff for + // re-queuing an evicted workload. + // + // Defaults to 10. + // +optional + BackoffBaseSeconds *int32 `json:"backoffBaseSeconds,omitempty"` } type RequeuingTimestamp string @@ -222,12 +229,15 @@ the queueManager holds the evicted workloads as inadmissible workloads while exp Duration this time, other workloads will have a chance to be admitted. The queueManager calculates an exponential backoff duration by [the Step function](https://pkg.go.dev/k8s.io/apimachinery/pkg/util/wait@v0.29.1#Backoff.Step) -according to the $10s*2^{(n-1)}+Rand$ where the $n$ represents the `workloadStatus.requeueState.count`, and the $Rand$ represents the random jitter. +according to the $b*2^{(n-1)}+Rand$ where: +- $b$ represents the base delay, configured by `baseDelaySeconds` +- $n$ represents the `workloadStatus.requeueState.count`, +- $Rand$ represents the random jitter. It will spend awaiting to be requeued after eviction: -$$\sum_{k=1}^{n}(10s*2^{(k-1)} + Rand)$$ +$$\sum_{k=1}^{n}(b*2^{(k-1)} + Rand)$$ -Assuming `backoffLimitCount` equals 10, and the workload is requeued 10 times +Assuming `backoffLimitCount` equals 10, and `baseDelaySeconds` equals 10 (default) the workload is requeued 10 times after failing to have all pods ready, then the total time awaiting for requeue will take (neglecting the jitter): `10s+20s+40s +...+7680s=2h 8min`. Also, considering `.waitForPodsReady.timeout=300s` (default), diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 4caa3d90ea..7a12c73d14 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -218,7 +218,7 @@ clientConnection: apiVersion: config.kueue.x-k8s.io/v1beta1 kind: Configuration integrations: - frameworks: + frameworks: - batch/job `), os.FileMode(0600)); err != nil { t.Fatal(err) @@ -239,7 +239,7 @@ queueVisibility: apiVersion: config.kueue.x-k8s.io/v1beta1 kind: Configuration integrations: - frameworks: + frameworks: - pod podOptions: namespaceSelector: @@ -543,8 +543,9 @@ multiKueue: BlockAdmission: ptr.To(true), Timeout: &metav1.Duration{Duration: 5 * time.Minute}, RequeuingStrategy: &configapi.RequeuingStrategy{ - Timestamp: ptr.To(configapi.CreationTimestamp), - BackoffLimitCount: ptr.To[int32](10), + Timestamp: ptr.To(configapi.CreationTimestamp), + BackoffLimitCount: ptr.To[int32](10), + BackoffBaseSeconds: ptr.To[int32](10), }, }, ClientConnection: defaultClientConnection, diff --git a/pkg/config/validation.go b/pkg/config/validation.go index 6be5d650ac..275170690e 100644 --- a/pkg/config/validation.go +++ b/pkg/config/validation.go @@ -72,6 +72,10 @@ func validateWaitForPodsReady(c *configapi.Configuration) field.ErrorList { allErrs = append(allErrs, field.Invalid(requeuingStrategyPath.Child("backoffLimitCount"), *strategy.BackoffLimitCount, constants.IsNegativeErrorMsg)) } + if strategy.BackoffBaseSeconds != nil && *strategy.BackoffBaseSeconds < 0 { + allErrs = append(allErrs, field.Invalid(requeuingStrategyPath.Child("backoffBaseSeconds"), + *strategy.BackoffBaseSeconds, constants.IsNegativeErrorMsg)) + } } return allErrs } diff --git a/pkg/config/validation_test.go b/pkg/config/validation_test.go index f23b647bfc..c85322f8b8 100644 --- a/pkg/config/validation_test.go +++ b/pkg/config/validation_test.go @@ -271,6 +271,23 @@ func TestValidate(t *testing.T) { }, }, }, + "negative waitForPodsReady.requeuingStrategy.backoffBaseSeconds": { + cfg: &configapi.Configuration{ + Integrations: defaultIntegrations, + WaitForPodsReady: &configapi.WaitForPodsReady{ + Enable: true, + RequeuingStrategy: &configapi.RequeuingStrategy{ + BackoffBaseSeconds: ptr.To[int32](-1), + }, + }, + }, + wantErr: field.ErrorList{ + &field.Error{ + Type: field.ErrorTypeInvalid, + Field: "waitForPodsReady.requeuingStrategy.backoffBaseSeconds", + }, + }, + }, } for name, tc := range testCases { diff --git a/pkg/controller/core/core.go b/pkg/controller/core/core.go index 94beae3232..4f1ac3f5b4 100644 --- a/pkg/controller/core/core.go +++ b/pkg/controller/core/core.go @@ -23,31 +23,17 @@ import ( configapi "sigs.k8s.io/kueue/apis/config/v1beta1" "sigs.k8s.io/kueue/pkg/cache" - "sigs.k8s.io/kueue/pkg/config" "sigs.k8s.io/kueue/pkg/constants" "sigs.k8s.io/kueue/pkg/queue" ) const ( - updateChBuffer = 10 - defaultRequeuingBaseDelaySeconds = 10 + updateChBuffer = 10 ) -type ControllerOptions struct { - requeuingBaseDelaySeconds int32 -} - -type ControllerOption func(*ControllerOptions) - -func WithControllerRequeuingBaseDelaySeconds(value int32) ControllerOption { - return func(o *ControllerOptions) { - o.requeuingBaseDelaySeconds = value - } -} - // SetupControllers sets up the core controllers. It returns the name of the // controller that failed to create and an error, if any. -func SetupControllers(mgr ctrl.Manager, qManager *queue.Manager, cc *cache.Cache, cfg *configapi.Configuration, controllerOpts ...ControllerOption) (string, error) { +func SetupControllers(mgr ctrl.Manager, qManager *queue.Manager, cc *cache.Cache, cfg *configapi.Configuration) (string, error) { rfRec := NewResourceFlavorReconciler(mgr.GetClient(), qManager, cc) if err := rfRec.SetupWithManager(mgr, cfg); err != nil { return "ResourceFlavor", err @@ -78,37 +64,29 @@ func SetupControllers(mgr ctrl.Manager, qManager *queue.Manager, cc *cache.Cache if err := cqRec.SetupWithManager(mgr, cfg); err != nil { return "ClusterQueue", err } - ctrlOpts := ControllerOptions{ - requeuingBaseDelaySeconds: defaultRequeuingBaseDelaySeconds, - } - for _, opt := range controllerOpts { - opt(&ctrlOpts) - } if err := NewWorkloadReconciler(mgr.GetClient(), qManager, cc, mgr.GetEventRecorderFor(constants.WorkloadControllerName), WithWorkloadUpdateWatchers(qRec, cqRec), - WithPodsReadyTimeout(podsReadyTimeout(cfg)), - WithRequeuingBackoffLimitCount(requeuingBackoffLimitCount(cfg)), - WithRequeuingBaseDelaySeconds(ctrlOpts.requeuingBaseDelaySeconds), + WithWaitForPodsReady(waitForPodsReady(cfg.WaitForPodsReady)), ).SetupWithManager(mgr, cfg); err != nil { return "Workload", err } return "", nil } -func podsReadyTimeout(cfg *configapi.Configuration) *time.Duration { - if config.WaitForPodsReadyIsEnabled(cfg) && cfg.WaitForPodsReady.Timeout != nil { - return &cfg.WaitForPodsReady.Timeout.Duration +func waitForPodsReady(cfg *configapi.WaitForPodsReady) *waitForPodsReadyConfig { + if cfg == nil || !cfg.Enable { + return nil } - return nil -} - -func requeuingBackoffLimitCount(cfg *configapi.Configuration) *int32 { - if config.WaitForPodsReadyIsEnabled(cfg) && cfg.WaitForPodsReady.RequeuingStrategy != nil { - return cfg.WaitForPodsReady.RequeuingStrategy.BackoffLimitCount + result := waitForPodsReadyConfig{ + timeout: cfg.Timeout.Duration, + } + if cfg.RequeuingStrategy != nil { + result.requeuingBackoffBaseSeconds = *cfg.RequeuingStrategy.BackoffBaseSeconds + result.requeuingBackoffLimitCount = cfg.RequeuingStrategy.BackoffLimitCount } - return nil + return &result } func queueVisibilityUpdateInterval(cfg *configapi.Configuration) time.Duration { diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go index 19f97cb456..f67b0b8dae 100644 --- a/pkg/controller/core/workload_controller.go +++ b/pkg/controller/core/workload_controller.go @@ -69,37 +69,24 @@ var ( realClock = clock.RealClock{} ) +type waitForPodsReadyConfig struct { + timeout time.Duration + requeuingBackoffLimitCount *int32 + requeuingBackoffBaseSeconds int32 +} + type options struct { - watchers []WorkloadUpdateWatcher - podsReadyTimeout *time.Duration - requeuingBackoffLimitCount *int32 - requeuingBaseDelaySeconds int32 + watchers []WorkloadUpdateWatcher + waitForPodsReadyConfig *waitForPodsReadyConfig } // Option configures the reconciler. type Option func(*options) -// WithPodsReadyTimeout indicates if the controller should interrupt startup -// of a workload if it exceeds the timeout to reach the PodsReady=True condition. -func WithPodsReadyTimeout(value *time.Duration) Option { - return func(o *options) { - o.podsReadyTimeout = value - } -} - -// WithRequeuingBackoffLimitCount indicates if the controller should deactivate a workload -// if it reaches the limitation. -func WithRequeuingBackoffLimitCount(value *int32) Option { - return func(o *options) { - o.requeuingBackoffLimitCount = value - } -} - -// WithRequeuingBaseDelaySeconds indicates the base delay for the computation -// of the requeue delay. -func WithRequeuingBaseDelaySeconds(value int32) Option { +// WithWaitForPodsReady indicates the configuration for the WaitForPodsReady feature. +func WithWaitForPodsReady(value *waitForPodsReadyConfig) Option { return func(o *options) { - o.requeuingBaseDelaySeconds = value + o.waitForPodsReadyConfig = value } } @@ -118,15 +105,13 @@ type WorkloadUpdateWatcher interface { // WorkloadReconciler reconciles a Workload object type WorkloadReconciler struct { - log logr.Logger - queues *queue.Manager - cache *cache.Cache - client client.Client - watchers []WorkloadUpdateWatcher - podsReadyTimeout *time.Duration - requeuingBackoffLimitCount *int32 - requeuingBaseDelaySeconds int32 - recorder record.EventRecorder + log logr.Logger + queues *queue.Manager + cache *cache.Cache + client client.Client + watchers []WorkloadUpdateWatcher + waitForPodsReady *waitForPodsReadyConfig + recorder record.EventRecorder } func NewWorkloadReconciler(client client.Client, queues *queue.Manager, cache *cache.Cache, recorder record.EventRecorder, opts ...Option) *WorkloadReconciler { @@ -136,15 +121,13 @@ func NewWorkloadReconciler(client client.Client, queues *queue.Manager, cache *c } return &WorkloadReconciler{ - log: ctrl.Log.WithName("workload-reconciler"), - client: client, - queues: queues, - cache: cache, - watchers: options.watchers, - podsReadyTimeout: options.podsReadyTimeout, - requeuingBackoffLimitCount: options.requeuingBackoffLimitCount, - requeuingBaseDelaySeconds: options.requeuingBaseDelaySeconds, - recorder: recorder, + log: ctrl.Log.WithName("workload-reconciler"), + client: client, + queues: queues, + cache: cache, + watchers: options.watchers, + waitForPodsReady: options.waitForPodsReadyConfig, + recorder: recorder, } } @@ -396,7 +379,7 @@ func (r *WorkloadReconciler) triggerDeactivationOrBackoffRequeue(ctx context.Con } // If requeuingBackoffLimitCount equals to null, the workloads is repeatedly and endless re-queued. requeuingCount := ptr.Deref(wl.Status.RequeueState.Count, 0) + 1 - if r.requeuingBackoffLimitCount != nil && requeuingCount > *r.requeuingBackoffLimitCount { + if r.waitForPodsReady.requeuingBackoffLimitCount != nil && requeuingCount > *r.waitForPodsReady.requeuingBackoffLimitCount { wl.Spec.Active = ptr.To(false) if err := r.client.Update(ctx, wl); err != nil { return false, err @@ -411,7 +394,7 @@ func (r *WorkloadReconciler) triggerDeactivationOrBackoffRequeue(ctx context.Con // During this time, the workload is taken as an inadmissible and other // workloads will have a chance to be admitted. backoff := &wait.Backoff{ - Duration: time.Duration(r.requeuingBaseDelaySeconds) * time.Second, + Duration: time.Duration(r.waitForPodsReady.requeuingBackoffBaseSeconds) * time.Second, Factor: 2, Jitter: 0.0001, Steps: int(requeuingCount), @@ -645,7 +628,7 @@ func (r *WorkloadReconciler) SetupWithManager(mgr ctrl.Manager, cfg *config.Conf // specified timeout counted since max of the LastTransitionTime's for the // Admitted and PodsReady conditions. func (r *WorkloadReconciler) admittedNotReadyWorkload(wl *kueue.Workload, clock clock.Clock) (bool, time.Duration) { - if r.podsReadyTimeout == nil { + if r.waitForPodsReady == nil { // the timeout is not configured for the workload controller return false, 0 } @@ -663,7 +646,7 @@ func (r *WorkloadReconciler) admittedNotReadyWorkload(wl *kueue.Workload, clock if podsReadyCond != nil && podsReadyCond.Status == metav1.ConditionFalse && podsReadyCond.LastTransitionTime.After(admittedCond.LastTransitionTime.Time) { elapsedTime = clock.Since(podsReadyCond.LastTransitionTime.Time) } - waitFor := *r.podsReadyTimeout - elapsedTime + waitFor := r.waitForPodsReady.timeout - elapsedTime if waitFor < 0 { waitFor = 0 } diff --git a/pkg/controller/core/workload_controller_test.go b/pkg/controller/core/workload_controller_test.go index 8e16f77cd4..6df7f8edde 100644 --- a/pkg/controller/core/workload_controller_test.go +++ b/pkg/controller/core/workload_controller_test.go @@ -48,7 +48,7 @@ func TestAdmittedNotReadyWorkload(t *testing.T) { testCases := map[string]struct { workload kueue.Workload - podsReadyTimeout *time.Duration + waitForPodsReady *waitForPodsReadyConfig wantCountingTowardsTimeout bool wantRecheckAfter time.Duration }{ @@ -68,7 +68,7 @@ func TestAdmittedNotReadyWorkload(t *testing.T) { }, }, }, - podsReadyTimeout: ptr.To(5 * time.Minute), + waitForPodsReady: &waitForPodsReadyConfig{timeout: 5 * time.Minute}, wantCountingTowardsTimeout: true, wantRecheckAfter: 4 * time.Minute, }, @@ -99,7 +99,7 @@ func TestAdmittedNotReadyWorkload(t *testing.T) { }, }, }, - podsReadyTimeout: ptr.To(5 * time.Minute), + waitForPodsReady: &waitForPodsReadyConfig{timeout: 5 * time.Minute}, wantCountingTowardsTimeout: true, }, "workload with Admitted=True, PodsReady=False; counting since PodsReady.LastTransitionTime": { @@ -120,7 +120,7 @@ func TestAdmittedNotReadyWorkload(t *testing.T) { }, }, }, - podsReadyTimeout: ptr.To(5 * time.Minute), + waitForPodsReady: &waitForPodsReadyConfig{timeout: 5 * time.Minute}, wantCountingTowardsTimeout: true, wantRecheckAfter: 5 * time.Minute, }, @@ -137,7 +137,7 @@ func TestAdmittedNotReadyWorkload(t *testing.T) { }, }, }, - podsReadyTimeout: ptr.To(5 * time.Minute), + waitForPodsReady: &waitForPodsReadyConfig{timeout: 5 * time.Minute}, }, "workload with Admitted=False, not counting": { workload: kueue.Workload{ @@ -152,7 +152,7 @@ func TestAdmittedNotReadyWorkload(t *testing.T) { }, }, }, - podsReadyTimeout: ptr.To(5 * time.Minute), + waitForPodsReady: &waitForPodsReadyConfig{timeout: 5 * time.Minute}, }, "workload with Admitted=True, PodsReady=True; not counting": { workload: kueue.Workload{ @@ -172,13 +172,13 @@ func TestAdmittedNotReadyWorkload(t *testing.T) { }, }, }, - podsReadyTimeout: ptr.To(5 * time.Minute), + waitForPodsReady: &waitForPodsReadyConfig{timeout: 5 * time.Minute}, }, } for name, tc := range testCases { t.Run(name, func(t *testing.T) { - wRec := WorkloadReconciler{podsReadyTimeout: tc.podsReadyTimeout} + wRec := WorkloadReconciler{waitForPodsReady: tc.waitForPodsReady} countingTowardsTimeout, recheckAfter := wRec.admittedNotReadyWorkload(&tc.workload, fakeClock) if tc.wantCountingTowardsTimeout != countingTowardsTimeout { @@ -506,9 +506,11 @@ func TestReconcile(t *testing.T) { }, "increment re-queue count": { reconcilerOpts: []Option{ - WithPodsReadyTimeout(ptr.To(3 * time.Second)), - WithRequeuingBackoffLimitCount(ptr.To[int32](100)), - WithRequeuingBaseDelaySeconds(10), + WithWaitForPodsReady(&waitForPodsReadyConfig{ + timeout: 3 * time.Second, + requeuingBackoffLimitCount: ptr.To[int32](100), + requeuingBackoffBaseSeconds: 10, + }), }, workload: utiltesting.MakeWorkload("wl", "ns"). ReserveQuota(utiltesting.MakeAdmission("q1").Obj()). @@ -548,8 +550,10 @@ func TestReconcile(t *testing.T) { }, "deactivated workload": { reconcilerOpts: []Option{ - WithPodsReadyTimeout(ptr.To(3 * time.Second)), - WithRequeuingBackoffLimitCount(ptr.To[int32](1)), + WithWaitForPodsReady(&waitForPodsReadyConfig{ + timeout: 3 * time.Second, + requeuingBackoffLimitCount: ptr.To[int32](1), + }), }, workload: utiltesting.MakeWorkload("wl", "ns"). ReserveQuota(utiltesting.MakeAdmission("q1").Obj()). diff --git a/site/content/en/docs/reference/kueue-config.v1beta1.md b/site/content/en/docs/reference/kueue-config.v1beta1.md index 55fb12778a..040c2fecf9 100644 --- a/site/content/en/docs/reference/kueue-config.v1beta1.md +++ b/site/content/en/docs/reference/kueue-config.v1beta1.md @@ -637,8 +637,9 @@ that was evicted due to Pod readiness. The possible values are:

BackoffLimitCount defines the maximum number of re-queuing retries. Once the number is reached, the workload is deactivated (.spec.activate=false). When it is null, the workloads will repeatedly and endless re-queueing.

-

Every backoff duration is about "10s*2^(n-1)+Rand" where:

+

Every backoff duration is about "b*2^(n-1)+Rand" where:

    +
  • "b" represents the base set by "BackoffBaseSeconds" parameter,
  • "n" represents the "workloadStatus.requeueState.count",
  • "Rand" represents the random jitter. During this time, the workload is taken as an inadmissible and @@ -648,6 +649,15 @@ By default, the consecutive requeue delays are around: (10s, 20s, 40s, ...).
  • Defaults to null.

    +backoffBaseSeconds
    +int32 + + +

    BackoffBaseSeconds defines the base for the exponential backoff for +re-queuing an evicted workload.

    +

    Defaults to 10.

    + + diff --git a/test/integration/scheduler/podsready/suite_test.go b/test/integration/scheduler/podsready/suite_test.go index bc2daabd4b..45dd9a5c07 100644 --- a/test/integration/scheduler/podsready/suite_test.go +++ b/test/integration/scheduler/podsready/suite_test.go @@ -71,8 +71,9 @@ func managerAndSchedulerSetupWithTimeoutAdmission( BlockAdmission: &blockAdmission, Timeout: &metav1.Duration{Duration: value}, RequeuingStrategy: &config.RequeuingStrategy{ - Timestamp: ptr.To(requeuingTimestamp), - BackoffLimitCount: requeuingBackoffLimitCount, + Timestamp: ptr.To(requeuingTimestamp), + BackoffLimitCount: requeuingBackoffLimitCount, + BackoffBaseSeconds: ptr.To[int32](1), }, }, } @@ -87,8 +88,7 @@ func managerAndSchedulerSetupWithTimeoutAdmission( queue.WithPodsReadyRequeuingTimestamp(requeuingTimestamp), ) - failedCtrl, err := core.SetupControllers(mgr, queues, cCache, cfg, - core.WithControllerRequeuingBaseDelaySeconds(1)) + failedCtrl, err := core.SetupControllers(mgr, queues, cCache, cfg) gomega.Expect(err).ToNot(gomega.HaveOccurred(), "controller", failedCtrl) failedWebhook, err := webhooks.Setup(mgr) From a0290b3f67770b34b6169bcd10e74e07ad831945 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?= Date: Wed, 24 Apr 2024 12:01:10 +0200 Subject: [PATCH 22/49] Use clock in workload controller consistently (#2044) # Conflicts: # pkg/controller/core/workload_controller.go # pkg/controller/core/workload_controller_test.go --- pkg/controller/core/core.go | 1 + pkg/controller/core/workload_controller.go | 15 +++++++++------ pkg/controller/core/workload_controller_test.go | 14 +++++++------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/pkg/controller/core/core.go b/pkg/controller/core/core.go index 4f1ac3f5b4..15096169bb 100644 --- a/pkg/controller/core/core.go +++ b/pkg/controller/core/core.go @@ -85,6 +85,7 @@ func waitForPodsReady(cfg *configapi.WaitForPodsReady) *waitForPodsReadyConfig { if cfg.RequeuingStrategy != nil { result.requeuingBackoffBaseSeconds = *cfg.RequeuingStrategy.BackoffBaseSeconds result.requeuingBackoffLimitCount = cfg.RequeuingStrategy.BackoffLimitCount + result.requeuingBackoffJitter = 0.0001 } return &result } diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go index f67b0b8dae..49c784a184 100644 --- a/pkg/controller/core/workload_controller.go +++ b/pkg/controller/core/workload_controller.go @@ -73,6 +73,7 @@ type waitForPodsReadyConfig struct { timeout time.Duration requeuingBackoffLimitCount *int32 requeuingBackoffBaseSeconds int32 + requeuingBackoffJitter float64 } type options struct { @@ -112,6 +113,7 @@ type WorkloadReconciler struct { watchers []WorkloadUpdateWatcher waitForPodsReady *waitForPodsReadyConfig recorder record.EventRecorder + clock clock.Clock } func NewWorkloadReconciler(client client.Client, queues *queue.Manager, cache *cache.Cache, recorder record.EventRecorder, opts ...Option) *WorkloadReconciler { @@ -128,6 +130,7 @@ func NewWorkloadReconciler(client client.Client, queues *queue.Manager, cache *c watchers: options.watchers, waitForPodsReady: options.waitForPodsReadyConfig, recorder: recorder, + clock: realClock, } } @@ -352,7 +355,7 @@ func (r *WorkloadReconciler) reconcileNotReadyTimeout(ctx context.Context, req c // the workload has already been evicted by the PodsReadyTimeout or been deactivated. return ctrl.Result{}, nil } - countingTowardsTimeout, recheckAfter := r.admittedNotReadyWorkload(wl, realClock) + countingTowardsTimeout, recheckAfter := r.admittedNotReadyWorkload(wl) if !countingTowardsTimeout { return ctrl.Result{}, nil } @@ -396,14 +399,14 @@ func (r *WorkloadReconciler) triggerDeactivationOrBackoffRequeue(ctx context.Con backoff := &wait.Backoff{ Duration: time.Duration(r.waitForPodsReady.requeuingBackoffBaseSeconds) * time.Second, Factor: 2, - Jitter: 0.0001, + Jitter: r.waitForPodsReady.requeuingBackoffJitter, Steps: int(requeuingCount), } var waitDuration time.Duration for backoff.Steps > 0 { waitDuration = backoff.Step() } - wl.Status.RequeueState.RequeueAt = ptr.To(metav1.NewTime(time.Now().Add(waitDuration))) + wl.Status.RequeueState.RequeueAt = ptr.To(metav1.NewTime(r.clock.Now().Add(waitDuration))) wl.Status.RequeueState.Count = &requeuingCount return false, nil } @@ -627,7 +630,7 @@ func (r *WorkloadReconciler) SetupWithManager(mgr ctrl.Manager, cfg *config.Conf // True (False or not set). The second value is the remaining time to exceed the // specified timeout counted since max of the LastTransitionTime's for the // Admitted and PodsReady conditions. -func (r *WorkloadReconciler) admittedNotReadyWorkload(wl *kueue.Workload, clock clock.Clock) (bool, time.Duration) { +func (r *WorkloadReconciler) admittedNotReadyWorkload(wl *kueue.Workload) (bool, time.Duration) { if r.waitForPodsReady == nil { // the timeout is not configured for the workload controller return false, 0 @@ -642,9 +645,9 @@ func (r *WorkloadReconciler) admittedNotReadyWorkload(wl *kueue.Workload, clock return false, 0 } admittedCond := apimeta.FindStatusCondition(wl.Status.Conditions, kueue.WorkloadAdmitted) - elapsedTime := clock.Since(admittedCond.LastTransitionTime.Time) + elapsedTime := r.clock.Since(admittedCond.LastTransitionTime.Time) if podsReadyCond != nil && podsReadyCond.Status == metav1.ConditionFalse && podsReadyCond.LastTransitionTime.After(admittedCond.LastTransitionTime.Time) { - elapsedTime = clock.Since(podsReadyCond.LastTransitionTime.Time) + elapsedTime = r.clock.Since(podsReadyCond.LastTransitionTime.Time) } waitFor := r.waitForPodsReady.timeout - elapsedTime if waitFor < 0 { diff --git a/pkg/controller/core/workload_controller_test.go b/pkg/controller/core/workload_controller_test.go index 6df7f8edde..e776911b78 100644 --- a/pkg/controller/core/workload_controller_test.go +++ b/pkg/controller/core/workload_controller_test.go @@ -178,8 +178,8 @@ func TestAdmittedNotReadyWorkload(t *testing.T) { for name, tc := range testCases { t.Run(name, func(t *testing.T) { - wRec := WorkloadReconciler{waitForPodsReady: tc.waitForPodsReady} - countingTowardsTimeout, recheckAfter := wRec.admittedNotReadyWorkload(&tc.workload, fakeClock) + wRec := WorkloadReconciler{waitForPodsReady: tc.waitForPodsReady, clock: fakeClock} + countingTowardsTimeout, recheckAfter := wRec.admittedNotReadyWorkload(&tc.workload) if tc.wantCountingTowardsTimeout != countingTowardsTimeout { t.Errorf("Unexpected countingTowardsTimeout, want=%v, got=%v", tc.wantCountingTowardsTimeout, countingTowardsTimeout) @@ -510,6 +510,7 @@ func TestReconcile(t *testing.T) { timeout: 3 * time.Second, requeuingBackoffLimitCount: ptr.To[int32](100), requeuingBackoffBaseSeconds: 10, + requeuingBackoffJitter: 0, }), }, workload: utiltesting.MakeWorkload("wl", "ns"). @@ -553,6 +554,7 @@ func TestReconcile(t *testing.T) { WithWaitForPodsReady(&waitForPodsReadyConfig{ timeout: 3 * time.Second, requeuingBackoffLimitCount: ptr.To[int32](1), + requeuingBackoffJitter: 0, }), }, workload: utiltesting.MakeWorkload("wl", "ns"). @@ -599,6 +601,8 @@ func TestReconcile(t *testing.T) { cqCache := cache.New(cl) qManager := queue.NewManager(cl, cqCache) reconciler := NewWorkloadReconciler(cl, qManager, cqCache, recorder, tc.reconcilerOpts...) + // use a fake clock with jitter = 0 to be able to assert on the requeueAt. + reconciler.clock = testingclock.NewFakeClock(testStartTime) ctxWithLogger, _ := utiltesting.ContextWithLog(t) ctx, ctxCancel := context.WithCancel(ctxWithLogger) @@ -638,11 +642,7 @@ func TestReconcile(t *testing.T) { if requeueState := tc.wantWorkload.Status.RequeueState; requeueState != nil && requeueState.RequeueAt != nil { gotRequeueState := gotWorkload.Status.RequeueState if gotRequeueState != nil && gotRequeueState.RequeueAt != nil { - // We verify the got requeueAt if the got requeueAt is after the desired requeueAt - // since the requeueAt is included in positive seconds of random jitter. - // Additionally, we need to verify the requeueAt by "Equal" function - // as the "After" function evaluates the nanoseconds despite the metav1.Time is seconds level precision. - if !gotRequeueState.RequeueAt.After(requeueState.RequeueAt.Time) && !gotRequeueState.RequeueAt.Equal(requeueState.RequeueAt) { + if !gotRequeueState.RequeueAt.Equal(requeueState.RequeueAt) { t.Errorf("Unexpected requeueState.requeueAt; gotRequeueAt %v needs to be after requeueAt %v", requeueState.RequeueAt, gotRequeueState.RequeueAt) } } else { From ae3e551b3e0aacf39a075eedad267be3e2e87e64 Mon Sep 17 00:00:00 2001 From: Ryo Tozawa Date: Wed, 24 Apr 2024 20:58:09 +0900 Subject: [PATCH 23/49] add: custom annotations on service and deployment (#2030) Signed-off-by: tozastation --- charts/kueue/templates/manager/auth_proxy_service.yaml | 4 ++++ charts/kueue/templates/manager/manager.yaml | 3 +++ charts/kueue/values.yaml | 2 ++ 3 files changed, 9 insertions(+) diff --git a/charts/kueue/templates/manager/auth_proxy_service.yaml b/charts/kueue/templates/manager/auth_proxy_service.yaml index 21352fcc4f..02942cab0b 100644 --- a/charts/kueue/templates/manager/auth_proxy_service.yaml +++ b/charts/kueue/templates/manager/auth_proxy_service.yaml @@ -5,6 +5,10 @@ metadata: namespace: '{{ .Release.Namespace }}' labels: {{- include "kueue.labels" . | nindent 4 }} + {{- if .Values.metricsService.annotations }} + annotations: + {{- toYaml .Values.metricsService.annotations | nindent 4 }} + {{- end }} spec: type: {{ .Values.metricsService.type }} selector: diff --git a/charts/kueue/templates/manager/manager.yaml b/charts/kueue/templates/manager/manager.yaml index 4c2277cdaf..4eaa06c8bc 100644 --- a/charts/kueue/templates/manager/manager.yaml +++ b/charts/kueue/templates/manager/manager.yaml @@ -16,6 +16,9 @@ spec: {{- include "kueue.selectorLabels" . | nindent 8 }} annotations: kubectl.kubernetes.io/default-container: manager + {{- if .Values.controllerManager.manager.podAnnotations }} + {{- toYaml .Values.controllerManager.manager.podAnnotations | nindent 8 }} + {{- end }} spec: containers: - args: diff --git a/charts/kueue/values.yaml b/charts/kueue/values.yaml index e592129ba0..a382870214 100644 --- a/charts/kueue/values.yaml +++ b/charts/kueue/values.yaml @@ -24,6 +24,7 @@ controllerManager: repository: gcr.io/k8s-staging-kueue/kueue # This should be set to 'IfNotPresent' for released version pullPolicy: Always + podAnnotations: {} resources: limits: cpu: 500m @@ -111,6 +112,7 @@ metricsService: protocol: TCP targetPort: https type: ClusterIP + annotations: {} webhookService: ipDualStack: enabled: false From 2bff8c345c9b69cd0da6d4f97cbc0a15c4c6435c Mon Sep 17 00:00:00 2001 From: Mykhailo Bobrovskyi Date: Wed, 24 Apr 2024 17:43:55 +0300 Subject: [PATCH 24/49] [multikueue] Increase timeout for "Waiting for the cluster do become active" test. (#2049) --- test/e2e/multikueue/e2e_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/multikueue/e2e_test.go b/test/e2e/multikueue/e2e_test.go index c2a9c1c675..e24fc09f03 100644 --- a/test/e2e/multikueue/e2e_test.go +++ b/test/e2e/multikueue/e2e_test.go @@ -416,7 +416,7 @@ var _ = ginkgo.Describe("MultiKueue", func() { Message: "Connected", }, util.IgnoreConditionTimestampsAndObservedGeneration))) - }, util.Timeout, util.Interval).Should(gomega.Succeed()) + }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) }) }) }) From 78ccc865b71960d14e45c8db299f8210443a2148 Mon Sep 17 00:00:00 2001 From: Traian Schiau <55734665+trasc@users.noreply.github.com> Date: Wed, 24 Apr 2024 18:00:34 +0300 Subject: [PATCH 25/49] Scalability scrape (#2018) * [sclability] Scrape metrics * Review remarks. * Review Remarks * Review Remarks --- Makefile | 12 +- test/scalability/README.md | 18 +++ test/scalability/minimalkueue/main.go | 10 ++ test/scalability/runner/main.go | 66 +++++++++-- test/scalability/runner/scraper/scraper.go | 124 +++++++++++++++++++++ 5 files changed, 216 insertions(+), 14 deletions(-) create mode 100644 test/scalability/runner/scraper/scraper.go diff --git a/Makefile b/Makefile index 83ce334469..691c5c011f 100644 --- a/Makefile +++ b/Makefile @@ -230,6 +230,14 @@ ifdef SCALABILITY_KUEUE_LOGS SCALABILITY_EXTRA_ARGS += --withLogs=true --logToFile=true endif +ifdef SCALABILITY_SCRAPE_INTERVAL +SCALABILITY_SCRAPE_ARGS += --metricsScrapeInterval=$(SCALABILITY_SCRAPE_INTERVAL) +endif + +ifdef SCALABILITY_SCRAPE_URL +SCALABILITY_SCRAPE_ARGS += --metricsScrapeURL=$(SCALABILITY_SCRAPE_URL) +endif + SCALABILITY_GENERATOR_CONFIG ?= $(PROJECT_DIR)/test/scalability/default_generator_config.yaml SCALABILITY_RUN_DIR := $(ARTIFACTS)/run-scalability @@ -241,7 +249,7 @@ run-scalability: envtest scalability-runner minimalkueue --o $(SCALABILITY_RUN_DIR) \ --crds=$(PROJECT_DIR)/config/components/crd/bases \ --generatorConfig=$(SCALABILITY_GENERATOR_CONFIG) \ - --minimalKueue=$(ARTIFACTS)/minimalkueue $(SCALABILITY_EXTRA_ARGS) + --minimalKueue=$(ARTIFACTS)/minimalkueue $(SCALABILITY_EXTRA_ARGS) $(SCALABILITY_SCRAPE_ARGS) .PHONY: test-scalability test-scalability: gotestsum run-scalability @@ -257,7 +265,7 @@ run-scalability-in-cluster: envtest scalability-runner $(SCALABILITY_RUNNER) \ --o $(ARTIFACTS)/run-scalability-in-cluster \ --generatorConfig=$(SCALABILITY_GENERATOR_CONFIG) \ - --qps=1000 --burst=2000 --timeout=15m + --qps=1000 --burst=2000 --timeout=15m $(SCALABILITY_SCRAPE_ARGS) .PHONY: ci-lint ci-lint: golangci-lint diff --git a/test/scalability/README.md b/test/scalability/README.md index 492a7edc47..817d9a167e 100644 --- a/test/scalability/README.md +++ b/test/scalability/README.md @@ -37,6 +37,8 @@ Will run a scalability scenario against an existing cluster (connectable by the The generation config to be used can be set in `SCALABILITY_GENERATOR_CONFIG` by default using `$(PROJECT_DIR)/test/scalability/default_generator_config.yaml` +Setting `SCALABILITY_SCRAPE_INTERVAL` to an interval value and `SCALABILITY_SCRAPE_URL` to an URL exposing kueue's metrics will cause the scalability runner to scrape that URL every interval and store the results in `$(PROJECT_DIR)/bin/run-scalability-in-cluster/metricsDump.tgz`. + Check [installation guide](https://kueue.sigs.k8s.io/docs/installation) for cluster and [observability](https://kueue.sigs.k8s.io/docs/installation/#add-metrics-scraping-for-prometheus-operator). ## Run with minimalkueue @@ -55,6 +57,8 @@ Setting `SCALABILITY_CPU_PROFILE=1` will generate a cpuprofile of minimalkueue i Setting `SCALABILITY_KUEUE_LOGS=1` will save the logs of minimalkueue in `$(PROJECT_DIR)/bin/run-scalability/minimalkueue.out.log` and `$(PROJECT_DIR)/bin/run-scalability/minimalkueue.err.log` +Setting `SCALABILITY_SCRAPE_INTERVAL` to an interval value (e.g. `1s`) will expose the metrics of `minimalkueue` and have them collected by the scalability runner in `$(PROJECT_DIR)/bin/run-scalability/metricsDump.tgz` every interval. + ## Run scalability test ```bash @@ -62,3 +66,17 @@ make test-scalability ``` Runs the scalability with minimalkueue and checks the results against `$(PROJECT_DIR)/test/scalability/default_rangespec.yaml` + +## Scrape result + +The scrape result `metricsDump.tgz` contains a set of `.prometheus` files, where `ts` is the millisecond representation of the epoch time at the moment each scrape was stared and can be used during the import in a visualization tool. + +If an instance of [VictoriaMetrics](https://docs.victoriametrics.com/) listening at `http://localhost:8428` is used, a metrics dump can be imported like: + +```bash + TMPDIR=$(mktemp -d) + tar -xf ./bin/run-scalability/metricsDump.tgz -C $TMPDIR + for file in ${TMPDIR}/*.prometheus; do timestamp=$(basename "$file" .prometheus); curl -vX POST -T "$file" http://localhost:8428/api/v1/import/prometheus?timestamp="$timestamp"; done + rm -r $TMPDIR + +``` diff --git a/test/scalability/minimalkueue/main.go b/test/scalability/minimalkueue/main.go index 6beaebcc85..34ef89188a 100644 --- a/test/scalability/minimalkueue/main.go +++ b/test/scalability/minimalkueue/main.go @@ -19,6 +19,7 @@ package main import ( "context" "flag" + "fmt" "os" "os/signal" "runtime/pprof" @@ -41,12 +42,15 @@ import ( "sigs.k8s.io/kueue/pkg/constants" "sigs.k8s.io/kueue/pkg/controller/core" "sigs.k8s.io/kueue/pkg/controller/core/indexer" + "sigs.k8s.io/kueue/pkg/metrics" "sigs.k8s.io/kueue/pkg/queue" "sigs.k8s.io/kueue/pkg/scheduler" ) var ( cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") + + metricsPort = flag.Int("metricsPort", 0, "metrics serving port") ) var ( @@ -119,6 +123,12 @@ func mainWithExitCode() int { BindAddress: "0", }, } + + if *metricsPort > 0 { + options.Metrics.BindAddress = fmt.Sprintf(":%d", *metricsPort) + metrics.Register() + } + mgr, err := ctrl.NewManager(kubeConfig, options) if err != nil { log.Error(err, "Unable to create manager") diff --git a/test/scalability/runner/main.go b/test/scalability/runner/main.go index b134845506..08fcf6c7dc 100644 --- a/test/scalability/runner/main.go +++ b/test/scalability/runner/main.go @@ -24,6 +24,7 @@ import ( "os/exec" "os/signal" "path" + "strconv" "sync" "syscall" "time" @@ -34,6 +35,7 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" + "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" crconfig "sigs.k8s.io/controller-runtime/pkg/config" @@ -49,6 +51,7 @@ import ( "sigs.k8s.io/kueue/test/scalability/runner/controller" "sigs.k8s.io/kueue/test/scalability/runner/generator" "sigs.k8s.io/kueue/test/scalability/runner/recorder" + "sigs.k8s.io/kueue/test/scalability/runner/scraper" "sigs.k8s.io/kueue/test/scalability/runner/stats" ) @@ -60,6 +63,10 @@ var ( qps = flag.Float64("qps", 0, "qps used by the runner clients, use default if 0") burst = flag.Int("burst", 0, "qps used by the runner clients, use default if 0") + // metrics scarping + metricsScrapeInterval = flag.Duration("metricsScrapeInterval", 0, "the duration between two metrics scraping, if 0 the metrics scraping is disabled") + metricsScrapeURL = flag.String("metricsScrapeURL", "", "the URL to scrape metrics from, ignored when minimal kueue is used") + // related to minimalkueue minimalKueuePath = flag.String("minimalKueue", "", "path to minimalkueue, run in the hosts default cluster if empty") withCpuProfile = flag.Bool("withCPUProfile", false, "generate a CPU profile for minimalkueue") @@ -131,9 +138,17 @@ func main() { os.Exit(1) } + metricsPort := 0 + if *metricsScrapeInterval != 0 { + metricsPort, err = scraper.GetFreePort() + if err != nil { + log.Error(err, "getting a free port, metrics scraping disabled") + } + metricsScrapeURL = ptr.To(fmt.Sprintf("http://localhost:%d/metrics", metricsPort)) + } + // start the minimal kueue manager process - wg.Add(1) - err = runCommand(ctx, *outputDir, *minimalKueuePath, "kubeconfig", *withCpuProfile, *withLogs, *logToFile, *logLevel, errCh, wg) + err = runCommand(ctx, *outputDir, *minimalKueuePath, "kubeconfig", *withCpuProfile, *withLogs, *logToFile, *logLevel, errCh, wg, metricsPort) if err != nil { log.Error(err, "MinimalKueue start") os.Exit(1) @@ -156,24 +171,31 @@ func main() { } generationDoneCh := make(chan struct{}) - wg.Add(1) err := runGenerator(ctx, cfg, *generatorConfig, errCh, wg, generationDoneCh) if err != nil { log.Error(err, "Generator start") os.Exit(1) } - wg.Add(1) recorder, err := startRecorder(ctx, errCh, wg, generationDoneCh, *timeout) if err != nil { log.Error(err, "Recorder start") os.Exit(1) } - wg.Add(1) + if *metricsScrapeInterval != 0 && *metricsScrapeURL != "" { + dumpTar := path.Join(*outputDir, "metricsDump.tgz") + err := runScraper(ctx, *metricsScrapeInterval, dumpTar, *metricsScrapeURL, errCh, wg) + if err != nil { + log.Error(err, "Scraper start") + os.Exit(1) + } + + } + err = runManager(ctx, cfg, errCh, wg, recorder) if err != nil { - log.Error(err, "manager start") + log.Error(err, "Failed to start manager") os.Exit(1) } @@ -222,8 +244,7 @@ func main() { } } -func runCommand(ctx context.Context, workDir, cmdPath, kubeconfig string, withCPUProf, withLogs, logToFile bool, logLevel int, errCh chan<- error, wg *sync.WaitGroup) error { - defer wg.Done() +func runCommand(ctx context.Context, workDir, cmdPath, kubeconfig string, withCPUProf, withLogs, logToFile bool, logLevel int, errCh chan<- error, wg *sync.WaitGroup, metricsPort int) error { log := ctrl.LoggerFrom(ctx).WithName("Run command") cmd := exec.CommandContext(ctx, cmdPath, "--kubeconfig", path.Join(workDir, kubeconfig)) @@ -260,6 +281,10 @@ func runCommand(ctx context.Context, workDir, cmdPath, kubeconfig string, withCP cmd.Stderr = errWriter } + if metricsPort != 0 { + cmd.Args = append(cmd.Args, "--metricsPort", strconv.Itoa(metricsPort)) + } + log.Info("Starting process", "path", cmd.Path, "args", cmd.Args) err := cmd.Start() if err != nil { @@ -305,8 +330,6 @@ func runCommand(ctx context.Context, workDir, cmdPath, kubeconfig string, withCP } func runGenerator(ctx context.Context, cfg *rest.Config, generatorConfig string, errCh chan<- error, wg *sync.WaitGroup, genDone chan<- struct{}) error { - defer wg.Done() - log := ctrl.LoggerFrom(ctx).WithName("Run generator") c, err := client.New(cfg, client.Options{Scheme: scheme}) if err != nil { @@ -341,7 +364,6 @@ func runGenerator(ctx context.Context, cfg *rest.Config, generatorConfig string, } func startRecorder(ctx context.Context, errCh chan<- error, wg *sync.WaitGroup, genDone <-chan struct{}, recordTimeout time.Duration) (*recorder.Recorder, error) { - defer wg.Done() log := ctrl.LoggerFrom(ctx).WithName("Start recorder") recorder := recorder.New(recordTimeout) wg.Add(1) @@ -361,7 +383,6 @@ func startRecorder(ctx context.Context, errCh chan<- error, wg *sync.WaitGroup, } func runManager(ctx context.Context, cfg *rest.Config, errCh chan<- error, wg *sync.WaitGroup, r *recorder.Recorder) error { - defer wg.Done() log := ctrl.LoggerFrom(ctx).WithName("Run manager") options := ctrl.Options{ @@ -401,3 +422,24 @@ func runManager(ctx context.Context, cfg *rest.Config, errCh chan<- error, wg *s log.Info("Manager started") return nil } + +func runScraper(ctx context.Context, interval time.Duration, output, url string, errCh chan<- error, wg *sync.WaitGroup) error { + log := ctrl.LoggerFrom(ctx).WithName("Run metrics scraper") + + s := scraper.NewScraper(interval, url, "%d.prometheus") + + wg.Add(1) + go func() { + defer wg.Done() + err := s.Run(ctx, output) + if err != nil { + log.Error(err, "Running the scraper") + errCh <- err + return + } + log.Info("Scrape done") + }() + + log.Info("Scrape started") + return nil +} diff --git a/test/scalability/runner/scraper/scraper.go b/test/scalability/runner/scraper/scraper.go new file mode 100644 index 0000000000..298aa7e722 --- /dev/null +++ b/test/scalability/runner/scraper/scraper.go @@ -0,0 +1,124 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scraper + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "context" + "errors" + "fmt" + "io" + "net" + "net/http" + "os" + "time" +) + +func GetFreePort() (int, error) { + l, err := net.Listen("tcp", ":0") + if err != nil { + return 0, err + } + defer l.Close() + l.Close() + if taddr, isTcp := l.Addr().(*net.TCPAddr); isTcp { + return taddr.Port, nil + } + return 0, errors.New("cannot get a free tcp address") +} + +type Scraper struct { + interval time.Duration + url string + fileNameFormat string + c http.Client +} + +func NewScraper(interval time.Duration, url, fileNameFormat string) *Scraper { + return &Scraper{ + interval: interval, + url: url, + fileNameFormat: fileNameFormat, + c: http.Client{}, + } +} + +func (s *Scraper) doScrape(ctx context.Context, tw *tar.Writer) error { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, s.url, nil) + if err != nil { + return err + } + + start := time.Now() + + resp, err := s.c.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + tmp := bytes.NewBuffer(nil) + contentLen, err := io.Copy(tmp, resp.Body) + if err != nil { + return err + } + + hdr := &tar.Header{ + Name: fmt.Sprintf(s.fileNameFormat, start.UnixMilli()), + Size: contentLen, + Mode: 0666, + ModTime: start, + } + + err = tw.WriteHeader(hdr) + if err != nil { + return err + } + + _, err = io.Copy(tw, tmp) + return err +} + +func (s *Scraper) Run(ctx context.Context, output string) error { + ticker := time.NewTicker(s.interval) + defer ticker.Stop() + + out, err := os.Create(output) + if err != nil { + return err + } + + defer out.Close() + gw := gzip.NewWriter(out) + defer gw.Close() + tw := tar.NewWriter(gw) + defer tw.Close() + + for { + select { + case <-ctx.Done(): + return nil + case <-ticker.C: + err := s.doScrape(ctx, tw) + if err != nil { + return err + } + } + } +} From 13750338549f986efda121583e6261afedca8cc6 Mon Sep 17 00:00:00 2001 From: jiangjiang <86391540+googs1025@users.noreply.github.com> Date: Wed, 24 Apr 2024 23:33:15 +0800 Subject: [PATCH 26/49] docs: web url (#2055) --- site/content/en/docs/tasks/run/jobsets.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/content/en/docs/tasks/run/jobsets.md b/site/content/en/docs/tasks/run/jobsets.md index 5d57953578..f9ab64b9a0 100644 --- a/site/content/en/docs/tasks/run/jobsets.md +++ b/site/content/en/docs/tasks/run/jobsets.md @@ -7,7 +7,7 @@ description: > Run a Kueue scheduled JobSet. --- -This document explains how you can use Kueue’s scheduling and resource management functionality when running [JobSet Operator](https://github.com/kubernetes-sigs/jobset) [JobSets](https://github.com/kubernetes-sigs/jobset/blob/main/docs/concepts/README.md). +This document explains how you can use Kueue’s scheduling and resource management functionality when running [JobSet Operator](https://github.com/kubernetes-sigs/jobset) [JobSet](https://jobset.sigs.k8s.io/docs/concepts/). This guide is for [batch users](/docs/tasks#batch-user) that have a basic understanding of Kueue. For more information, see [Kueue's overview](/docs/overview). From 9890f41dfbdaef632ffcc17cfb99e61ae00d9ca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?= Date: Wed, 24 Apr 2024 17:59:57 +0200 Subject: [PATCH 27/49] Add a note on PodsReady timeout requeuing (#2053) Co-authored-by: Yuki Iwai --- .../en/docs/tasks/manage/setup_sequential_admission.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/site/content/en/docs/tasks/manage/setup_sequential_admission.md b/site/content/en/docs/tasks/manage/setup_sequential_admission.md index b2d63cc24a..70cc1f3333 100644 --- a/site/content/en/docs/tasks/manage/setup_sequential_admission.md +++ b/site/content/en/docs/tasks/manage/setup_sequential_admission.md @@ -47,6 +47,7 @@ fields: requeuingStrategy: timestamp: Eviction | Creation backoffLimitCount: 5 + backoffBaseSeconds: 10 ``` {{% alert title="Note" color="primary" %}} @@ -93,6 +94,14 @@ If you don't specify any value for `backoffLimitCount`, a Workload is repeatedly and endlessly re-queued to the queue based on the `timestamp`. Once the number of re-queues reaches the limit, Kueue [deactivates the Workload](/docs/concepts/workload/#active). +{{% alert title="Note" color="primary" %}} +_The `backoffBaseSeconds` is available in Kueue v0.7.0 and later_ +{{% /alert %}} +The time to re-queue a workload after each consecutive timeout is increased +exponentially, with the exponent of 2. The first delay is determined by the +`backoffBaseSeconds` parameter (defaulting to 10). So, after the consecutive timeouts +the evicted workload is re-queued after approximately `10, 20, 40, ...` seconds. + ## Example In this example we demonstrate the impact of enabling `waitForPodsReady` in Kueue. From 9ea94ac20f4a4b5546ab899c60f3627b23bd0a74 Mon Sep 17 00:00:00 2001 From: jiangjiang <86391540+googs1025@users.noreply.github.com> Date: Thu, 25 Apr 2024 12:09:41 +0800 Subject: [PATCH 28/49] cleanup useless comments, and fix receiver names are different (#2060) --- .../admissionchecks/multikueue/workload.go | 40 +++++++++---------- pkg/controller/jobframework/reconciler.go | 2 +- pkg/queue/cluster_queue.go | 10 ++--- pkg/util/testingjobs/jobset/wrappers.go | 3 +- pkg/util/testingjobs/mxjob/wrappers.go | 2 +- pkg/util/testingjobs/pod/wrappers.go | 2 +- pkg/util/testingjobs/raycluster/wrappers.go | 2 +- 7 files changed, 31 insertions(+), 30 deletions(-) diff --git a/pkg/controller/admissionchecks/multikueue/workload.go b/pkg/controller/admissionchecks/multikueue/workload.go index 35caa886ec..dd2d4f0f64 100644 --- a/pkg/controller/admissionchecks/multikueue/workload.go +++ b/pkg/controller/admissionchecks/multikueue/workload.go @@ -152,11 +152,11 @@ func (g *wlGroup) RemoveRemoteObjects(ctx context.Context, cluster string) error return nil } -func (a *wlReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { +func (w *wlReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { log := ctrl.LoggerFrom(ctx) log.V(2).Info("Reconcile Workload") wl := &kueue.Workload{} - if err := a.client.Get(ctx, req.NamespacedName, wl); err != nil { + if err := w.client.Get(ctx, req.NamespacedName, wl); err != nil { return reconcile.Result{}, client.IgnoreNotFound(err) } // NOTE: the not found needs to be treated and should result in the deletion of all the remote workloads. @@ -164,7 +164,7 @@ func (a *wlReconciler) Reconcile(ctx context.Context, req reconcile.Request) (re // 1. use a finalizer // 2. try to trigger the remote deletion from an event filter. - mkAc, err := a.multikueueAC(ctx, wl) + mkAc, err := w.multikueueAC(ctx, wl) if err != nil { return reconcile.Result{}, err } @@ -174,7 +174,7 @@ func (a *wlReconciler) Reconcile(ctx context.Context, req reconcile.Request) (re return reconcile.Result{}, nil } - adapter, owner := a.adapter(wl) + adapter, owner := w.adapter(wl) if adapter == nil { // Reject the workload since there is no chance for it to run. var rejectionMessage string @@ -183,24 +183,24 @@ func (a *wlReconciler) Reconcile(ctx context.Context, req reconcile.Request) (re } else { rejectionMessage = "No multikueue adapter found" } - return reconcile.Result{}, a.updateACS(ctx, wl, mkAc, kueue.CheckStateRejected, rejectionMessage) + return reconcile.Result{}, w.updateACS(ctx, wl, mkAc, kueue.CheckStateRejected, rejectionMessage) } - managed, unmanagedReason, err := adapter.IsJobManagedByKueue(ctx, a.client, types.NamespacedName{Name: owner.Name, Namespace: wl.Namespace}) + managed, unmanagedReason, err := adapter.IsJobManagedByKueue(ctx, w.client, types.NamespacedName{Name: owner.Name, Namespace: wl.Namespace}) if err != nil { return reconcile.Result{}, err } if !managed { - return reconcile.Result{}, a.updateACS(ctx, wl, mkAc, kueue.CheckStateRejected, fmt.Sprintf("The owner is not managed by Kueue: %s", unmanagedReason)) + return reconcile.Result{}, w.updateACS(ctx, wl, mkAc, kueue.CheckStateRejected, fmt.Sprintf("The owner is not managed by Kueue: %s", unmanagedReason)) } - grp, err := a.readGroup(ctx, wl, mkAc.Name, adapter, owner.Name) + grp, err := w.readGroup(ctx, wl, mkAc.Name, adapter, owner.Name) if err != nil { return reconcile.Result{}, err } - return a.reconcileGroup(ctx, grp) + return w.reconcileGroup(ctx, grp) } func (w *wlReconciler) updateACS(ctx context.Context, wl *kueue.Workload, acs *kueue.AdmissionCheckState, status kueue.CheckState, message string) error { @@ -252,8 +252,8 @@ func (w *wlReconciler) adapter(local *kueue.Workload) (jobAdapter, *metav1.Owner return nil, nil } -func (a *wlReconciler) readGroup(ctx context.Context, local *kueue.Workload, acName string, adapter jobAdapter, controllerName string) (*wlGroup, error) { - rClients, err := a.remoteClientsForAC(ctx, acName) +func (w *wlReconciler) readGroup(ctx context.Context, local *kueue.Workload, acName string, adapter jobAdapter, controllerName string) (*wlGroup, error) { + rClients, err := w.remoteClientsForAC(ctx, acName) if err != nil { return nil, fmt.Errorf("admission check %q: %w", acName, err) } @@ -281,7 +281,7 @@ func (a *wlReconciler) readGroup(ctx context.Context, local *kueue.Workload, acN return &grp, nil } -func (a *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reconcile.Result, error) { +func (w *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reconcile.Result, error) { log := ctrl.LoggerFrom(ctx).WithValues("op", "reconcileGroup") log.V(3).Info("Reconcile Workload Group") @@ -298,7 +298,7 @@ func (a *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reco } if !workload.HasQuotaReservation(group.local) && acs.State == kueue.CheckStateRetry { - errs = append(errs, a.updateACS(ctx, group.local, acs, kueue.CheckStatePending, "Requeued")) + errs = append(errs, w.updateACS(ctx, group.local, acs, kueue.CheckStatePending, "Requeued")) } return reconcile.Result{}, errors.Join(errs...) @@ -309,7 +309,7 @@ func (a *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reco // it should not be problematic but the "From remote xxxx:" could be lost .... if group.jobAdapter != nil { - if err := group.jobAdapter.SyncJob(ctx, a.client, group.remoteClients[remote].client, group.controllerKey, group.local.Name, a.origin); err != nil { + if err := group.jobAdapter.SyncJob(ctx, w.client, group.remoteClients[remote].client, group.controllerKey, group.local.Name, w.origin); err != nil { log.V(2).Error(err, "copying remote controller status", "workerCluster", remote) // we should retry this return reconcile.Result{}, err @@ -326,7 +326,7 @@ func (a *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reco Reason: remoteFinishedCond.Reason, Message: remoteFinishedCond.Message, }) - return reconcile.Result{}, a.client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(ControllerName+"-finish"), client.ForceOwnership) + return reconcile.Result{}, w.client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(ControllerName+"-finish"), client.ForceOwnership) } // 2. delete all workloads that are out of sync or are not in the chosen worker @@ -355,7 +355,7 @@ func (a *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reco } acs := workload.FindAdmissionCheck(group.local.Status.AdmissionChecks, group.acName) - if err := group.jobAdapter.SyncJob(ctx, a.client, group.remoteClients[reservingRemote].client, group.controllerKey, group.local.Name, a.origin); err != nil { + if err := group.jobAdapter.SyncJob(ctx, w.client, group.remoteClients[reservingRemote].client, group.controllerKey, group.local.Name, w.origin); err != nil { log.V(2).Error(err, "creating remote controller object", "remote", reservingRemote) // We'll retry this in the next reconcile. return reconcile.Result{}, err @@ -374,16 +374,16 @@ func (a *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reco wlPatch := workload.BaseSSAWorkload(group.local) workload.SetAdmissionCheckState(&wlPatch.Status.AdmissionChecks, *acs) - err := a.client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(ControllerName), client.ForceOwnership) + err := w.client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(ControllerName), client.ForceOwnership) if err != nil { return reconcile.Result{}, err } } - return reconcile.Result{RequeueAfter: a.workerLostTimeout}, nil + return reconcile.Result{RequeueAfter: w.workerLostTimeout}, nil } else if acs.State == kueue.CheckStateReady { // If there is no reserving and the AC is ready, the connection with the reserving remote might // be lost, keep the workload admitted for keepReadyTimeout and put it back in the queue after that. - remainingWaitTime := a.workerLostTimeout - time.Since(acs.LastTransitionTime.Time) + remainingWaitTime := w.workerLostTimeout - time.Since(acs.LastTransitionTime.Time) if remainingWaitTime > 0 { log.V(3).Info("Reserving remote lost, retry", "retryAfter", remainingWaitTime) return reconcile.Result{RequeueAfter: remainingWaitTime}, nil @@ -393,7 +393,7 @@ func (a *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reco acs.LastTransitionTime = metav1.NewTime(time.Now()) wlPatch := workload.BaseSSAWorkload(group.local) workload.SetAdmissionCheckState(&wlPatch.Status.AdmissionChecks, *acs) - return reconcile.Result{}, a.client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(ControllerName), client.ForceOwnership) + return reconcile.Result{}, w.client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(ControllerName), client.ForceOwnership) } } diff --git a/pkg/controller/jobframework/reconciler.go b/pkg/controller/jobframework/reconciler.go index e61257e4ac..2e412cde88 100644 --- a/pkg/controller/jobframework/reconciler.go +++ b/pkg/controller/jobframework/reconciler.go @@ -142,7 +142,7 @@ func WithManagerName(n string) Option { } } -// WithLabelKeysToCopy +// WithLabelKeysToCopy adds the label keys func WithLabelKeysToCopy(n []string) Option { return func(o *Options) { o.LabelKeysToCopy = n diff --git a/pkg/queue/cluster_queue.go b/pkg/queue/cluster_queue.go index d252e07591..8d5179d5f4 100644 --- a/pkg/queue/cluster_queue.go +++ b/pkg/queue/cluster_queue.go @@ -372,7 +372,7 @@ func (c *ClusterQueue) totalElements() []*workload.Info { return elements } -// Returns true if the queue is active +// Active returns true if the queue is active func (c *ClusterQueue) Active() bool { c.rwm.RLock() defer c.rwm.RUnlock() @@ -388,11 +388,11 @@ func (c *ClusterQueue) Active() bool { // compete with other workloads, until cluster events free up quota. // The workload should not be reinserted if it's already in the ClusterQueue. // Returns true if the workload was inserted. -func (cq *ClusterQueue) RequeueIfNotPresent(wInfo *workload.Info, reason RequeueReason) bool { - if cq.queueingStrategy == kueue.StrictFIFO { - return cq.requeueIfNotPresent(wInfo, reason != RequeueReasonNamespaceMismatch) +func (c *ClusterQueue) RequeueIfNotPresent(wInfo *workload.Info, reason RequeueReason) bool { + if c.queueingStrategy == kueue.StrictFIFO { + return c.requeueIfNotPresent(wInfo, reason != RequeueReasonNamespaceMismatch) } - return cq.requeueIfNotPresent(wInfo, reason == RequeueReasonFailedAfterNomination || reason == RequeueReasonPendingPreemption) + return c.requeueIfNotPresent(wInfo, reason == RequeueReasonFailedAfterNomination || reason == RequeueReasonPendingPreemption) } // queueOrderingFunc returns a function used by the clusterQueue heap algorithm diff --git a/pkg/util/testingjobs/jobset/wrappers.go b/pkg/util/testingjobs/jobset/wrappers.go index 1b4a746f88..f1a167c196 100644 --- a/pkg/util/testingjobs/jobset/wrappers.go +++ b/pkg/util/testingjobs/jobset/wrappers.go @@ -109,7 +109,7 @@ func (j *JobSetWrapper) Label(k, v string) *JobSetWrapper { return j } -// Annotation sets annotations to the JobSet. +// Annotations sets annotations to the JobSet. func (j *JobSetWrapper) Annotations(annotations map[string]string) *JobSetWrapper { j.ObjectMeta.Annotations = annotations return j @@ -162,6 +162,7 @@ func (j *JobSetWrapper) Condition(c metav1.Condition) *JobSetWrapper { return j } +// ManagedBy adds a managedby. func (j *JobSetWrapper) ManagedBy(c string) *JobSetWrapper { j.Spec.ManagedBy = &c return j diff --git a/pkg/util/testingjobs/mxjob/wrappers.go b/pkg/util/testingjobs/mxjob/wrappers.go index 5e06fe95bc..22fe790d25 100644 --- a/pkg/util/testingjobs/mxjob/wrappers.go +++ b/pkg/util/testingjobs/mxjob/wrappers.go @@ -185,7 +185,7 @@ func (j *MXJobWrapper) NodeSelector(k, v string) *MXJobWrapper { RoleNodeSelector(kftraining.MXJobReplicaTypeWorker, k, v) } -// NodeSelector updates the nodeSelector of job. +// RoleNodeSelector updates the nodeSelector of job. func (j *MXJobWrapper) RoleNodeSelector(role kftraining.ReplicaType, k, v string) *MXJobWrapper { if j.Spec.MXReplicaSpecs[role].Template.Spec.NodeSelector == nil { j.Spec.MXReplicaSpecs[role].Template.Spec.NodeSelector = make(map[string]string) diff --git a/pkg/util/testingjobs/pod/wrappers.go b/pkg/util/testingjobs/pod/wrappers.go index f62390749c..a6e95ff337 100644 --- a/pkg/util/testingjobs/pod/wrappers.go +++ b/pkg/util/testingjobs/pod/wrappers.go @@ -84,7 +84,7 @@ func (p *PodWrapper) Queue(q string) *PodWrapper { return p.Label(constants.QueueLabel, q) } -// Queue updates the queue name of the Pod +// PriorityClass updates the priority class name of the Pod func (p *PodWrapper) PriorityClass(pc string) *PodWrapper { p.Spec.PriorityClassName = pc return p diff --git a/pkg/util/testingjobs/raycluster/wrappers.go b/pkg/util/testingjobs/raycluster/wrappers.go index 7d55e93bdf..08aa3820d7 100644 --- a/pkg/util/testingjobs/raycluster/wrappers.go +++ b/pkg/util/testingjobs/raycluster/wrappers.go @@ -74,7 +74,7 @@ func MakeCluster(name, ns string) *ClusterWrapper { }} } -// NodeSelector adds a node selector to the job's head. +// NodeSelectorHeadGroup adds a node selector to the job's head. func (j *ClusterWrapper) NodeSelectorHeadGroup(k, v string) *ClusterWrapper { j.Spec.HeadGroupSpec.Template.Spec.NodeSelector[k] = v return j From 36486486c80d31e3435fdb56e8debe801dcf246d Mon Sep 17 00:00:00 2001 From: Traian Schiau <55734665+trasc@users.noreply.github.com> Date: Thu, 25 Apr 2024 16:53:38 +0300 Subject: [PATCH 29/49] Adapt scalability range spec to the CI results (#2043) * Adapt scalability rangespec to the CI results * Round up --- test/scalability/default_rangespec.yaml | 37 ++++++++++++++++++------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/test/scalability/default_rangespec.yaml b/test/scalability/default_rangespec.yaml index cd13b6714b..018c9472c7 100644 --- a/test/scalability/default_rangespec.yaml +++ b/test/scalability/default_rangespec.yaml @@ -1,15 +1,32 @@ -# Until we have a clear picture on how the setup -# performs in CI keep the values "very relaxed" +# The values are computed based on the result of 5 trial runs: +# - #1782760671465705472 +# - #1782764439129296896 +# - #1782768037514973184 +# - #1782772615836864512 +# - #1782775995984515072 cmd: - maxWallMs: 3600_000 #1h - maxUserMs: 3600_000 - maxSysMs: 3600_000 - maxrss: 1024_000 #1000MiB + # Average value 351116.4 (+/- 0.9%), setting at +5% + maxWallMs: 368_000 + + # Average value 111500 (+/- 14%), setting at +20% + maxUserMs: 134_000 + + # Average value 27875 (+/- 16%), setting at +20% + maxSysMs: 34_000 + + # Average value 445012 (+/- 0.3%), setting at +5% + maxrss: 468_000 clusterQueueClassesMinUsage: - cq: 10 #10% + # Average value 58.7 (+/- 1.2%), setting at -5% + cq: 56 #% wlClassesMaxAvgTimeToAdmissionMs: - large: 3600_000 #1h - medium: 3600_000 - small: 3600_000 + # Average value 6666 (+/- 14%), setting at +20% + large: 8_000 + + # Average value 76768 (+/- 2%), setting at +5% + medium: 81_000 + + # Average value 215468 (+/- 2%), setting at +5% + small: 227_000 From abc2a8b819f16310f684e470c00ca29ae3d9dbf5 Mon Sep 17 00:00:00 2001 From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com> Date: Thu, 25 Apr 2024 10:21:23 -0400 Subject: [PATCH 30/49] Cleanup CQ and cohort resource stats (#2058) * Cleanup CQ and cohort resource stats Change-Id: I8831ccc0cf566058ac1dbd11591ad3d761121a75 * Remove unused types Change-Id: Ib5afb1319644e05a008c84fbd90de0625c266a60 --- pkg/cache/cache_test.go | 386 ++++++++++++--------------------- pkg/cache/clusterqueue.go | 55 +---- pkg/cache/clusterqueue_test.go | 77 ++----- pkg/cache/snapshot.go | 24 +- pkg/cache/snapshot_test.go | 252 +++++++-------------- 5 files changed, 251 insertions(+), 543 deletions(-) diff --git a/pkg/cache/cache_test.go b/pkg/cache/cache_test.go index 98c9d312cc..77c85dfadb 100644 --- a/pkg/cache/cache_test.go +++ b/pkg/cache/cache_test.go @@ -127,11 +127,8 @@ func TestCacheClusterQueueOperations(t *testing.T) { AdmittedUsage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 10_000, - Lendable: 10_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, }, Status: active, Preemption: defaultPreemption, @@ -159,11 +156,8 @@ func TestCacheClusterQueueOperations(t *testing.T) { AdmittedUsage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 15_000, - Lendable: 15_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 15_000, }, Status: active, Preemption: defaultPreemption, @@ -210,11 +204,8 @@ func TestCacheClusterQueueOperations(t *testing.T) { AdmittedUsage: FlavorResourceQuantities{ "nonexistent-flavor": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 15_000, - Lendable: 15_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 15_000, }, Status: pending, Preemption: defaultPreemption, @@ -303,11 +294,8 @@ func TestCacheClusterQueueOperations(t *testing.T) { AdmittedUsage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 10_000, - Lendable: 10_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, }, Status: active, Preemption: defaultPreemption, @@ -335,11 +323,8 @@ func TestCacheClusterQueueOperations(t *testing.T) { AdmittedUsage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 15_000, - Lendable: 15_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 15_000, }, Status: active, Preemption: defaultPreemption, @@ -388,11 +373,8 @@ func TestCacheClusterQueueOperations(t *testing.T) { AdmittedUsage: FlavorResourceQuantities{ "nonexistent-flavor": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 15_000, - Lendable: 15_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 15_000, }, Status: pending, Preemption: defaultPreemption, @@ -478,11 +460,8 @@ func TestCacheClusterQueueOperations(t *testing.T) { AdmittedUsage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 5_000, - Lendable: 5_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 5_000, }, Status: active, Preemption: defaultPreemption, @@ -547,11 +526,8 @@ func TestCacheClusterQueueOperations(t *testing.T) { AdmittedUsage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 5_000, - Lendable: 4_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 4_000, }, Status: active, Preemption: defaultPreemption, @@ -614,11 +590,8 @@ func TestCacheClusterQueueOperations(t *testing.T) { AdmittedUsage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 15_000, - Lendable: 15_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 15_000, }, Status: active, Preemption: defaultPreemption, @@ -657,11 +630,8 @@ func TestCacheClusterQueueOperations(t *testing.T) { AdmittedUsage: FlavorResourceQuantities{ "nonexistent-flavor": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 15_000, - Lendable: 15_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 15_000, }, Status: pending, Preemption: defaultPreemption, @@ -722,11 +692,8 @@ func TestCacheClusterQueueOperations(t *testing.T) { AdmittedUsage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 10_000, - Lendable: 10_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, }, Status: active, Preemption: defaultPreemption, @@ -754,11 +721,8 @@ func TestCacheClusterQueueOperations(t *testing.T) { AdmittedUsage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 15_000, - Lendable: 15_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 15_000, }, Status: active, Preemption: defaultPreemption, @@ -805,11 +769,8 @@ func TestCacheClusterQueueOperations(t *testing.T) { AdmittedUsage: FlavorResourceQuantities{ "nonexistent-flavor": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 15_000, - Lendable: 15_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 15_000, }, Status: active, Preemption: defaultPreemption, @@ -934,10 +895,10 @@ func TestCacheClusterQueueOperations(t *testing.T) { "example.com/gpu": 0, }, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: {}, - corev1.ResourceMemory: {}, - "example.com/gpu": {}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 0, + corev1.ResourceMemory: 0, + "example.com/gpu": 0, }, Status: pending, Preemption: defaultPreemption, @@ -1108,12 +1069,8 @@ func TestCacheClusterQueueOperations(t *testing.T) { FlavorFungibility: defaultFlavorFungibility, Usage: FlavorResourceQuantities{"f1": {corev1.ResourceCPU: 2000}}, AdmittedUsage: FlavorResourceQuantities{"f1": {corev1.ResourceCPU: 1000}}, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: &QuotaStats{ - Nominal: 10_000, - Lendable: 10_000, - Usage: 2_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, }, Workloads: map[string]*workload.Info{ "ns/reserving": { @@ -1147,6 +1104,113 @@ func TestCacheClusterQueueOperations(t *testing.T) { }, wantCohorts: map[string]sets.Set[string]{}, }, + { + name: "add CQ with multiple resource groups and flavors", + enableLendingLimit: true, + operation: func(cache *Cache) error { + cq := utiltesting.MakeClusterQueue("foo"). + ResourceGroup( + kueue.FlavorQuotas{ + Name: "on-demand", + Resources: []kueue.ResourceQuota{ + { + Name: corev1.ResourceCPU, + NominalQuota: resource.MustParse("10"), + LendingLimit: ptr.To(resource.MustParse("8")), + }, + { + Name: corev1.ResourceMemory, + NominalQuota: resource.MustParse("10Gi"), + LendingLimit: ptr.To(resource.MustParse("8Gi")), + }, + }, + }, + kueue.FlavorQuotas{ + Name: "spot", + Resources: []kueue.ResourceQuota{ + { + Name: corev1.ResourceCPU, + NominalQuota: resource.MustParse("20"), + LendingLimit: ptr.To(resource.MustParse("20")), + }, + { + Name: corev1.ResourceMemory, + NominalQuota: resource.MustParse("20Gi"), + LendingLimit: ptr.To(resource.MustParse("20Gi")), + }, + }, + }, + ). + ResourceGroup( + kueue.FlavorQuotas{ + Name: "license", + Resources: []kueue.ResourceQuota{ + { + Name: "license", + NominalQuota: resource.MustParse("8"), + LendingLimit: ptr.To(resource.MustParse("4")), + }, + }, + }, + ). + Obj() + return cache.AddClusterQueue(context.Background(), cq) + }, + wantClusterQueues: map[string]*ClusterQueue{ + "foo": { + Name: "foo", + NamespaceSelector: labels.Everything(), + Status: pending, + Preemption: defaultPreemption, + AllocatableResourceGeneration: 1, + FlavorFungibility: defaultFlavorFungibility, + GuaranteedQuota: FlavorResourceQuantities{ + "on-demand": { + corev1.ResourceCPU: 2_000, + corev1.ResourceMemory: 2 * utiltesting.Gi, + }, + "spot": { + corev1.ResourceCPU: 0, + corev1.ResourceMemory: 0, + }, + "license": { + "license": 4, + }, + }, + Usage: FlavorResourceQuantities{ + "on-demand": { + corev1.ResourceCPU: 0, + corev1.ResourceMemory: 0, + }, + "spot": { + corev1.ResourceCPU: 0, + corev1.ResourceMemory: 0, + }, + "license": { + "license": 0, + }, + }, + AdmittedUsage: FlavorResourceQuantities{ + "on-demand": { + corev1.ResourceCPU: 0, + corev1.ResourceMemory: 0, + }, + "spot": { + corev1.ResourceCPU: 0, + corev1.ResourceMemory: 0, + }, + "license": { + "license": 0, + }, + }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 28_000, + corev1.ResourceMemory: 28 * utiltesting.Gi, + "license": 4, + }, + }, + }, + }, } for _, tc := range cases { @@ -1250,7 +1314,6 @@ func TestCacheWorkloadOperations(t *testing.T) { type result struct { Workloads sets.Set[string] UsedResources FlavorResourceQuantities - ResourceStats ResourceStats } steps := []struct { @@ -1285,11 +1348,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, "two": { Workloads: sets.New("/c", "/d"), @@ -1297,11 +1355,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, }, }, @@ -1324,11 +1377,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, "two": { Workloads: sets.New("/c"), @@ -1336,11 +1384,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, }, }, @@ -1362,11 +1405,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, "two": { Workloads: sets.New("/c"), @@ -1374,11 +1412,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, }, }, @@ -1401,11 +1434,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, "two": { Workloads: sets.New("/a", "/c"), @@ -1413,11 +1441,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, }, }, @@ -1440,11 +1463,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, "two": { Workloads: sets.New("/c"), @@ -1452,11 +1470,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, }, }, @@ -1479,11 +1492,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, "two": { Workloads: sets.New("/c"), @@ -1491,11 +1499,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, }, }, @@ -1517,11 +1520,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, "two": { Workloads: sets.New("/c", "/d"), @@ -1529,11 +1527,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, }, }, @@ -1552,11 +1545,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, "two": { Workloads: sets.New("/c"), @@ -1564,11 +1552,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, }, }, @@ -1585,11 +1568,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, "two": { Workloads: sets.New("/c"), @@ -1597,11 +1575,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, }, }, @@ -1619,11 +1592,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, "two": { Workloads: sets.New("/c"), @@ -1631,11 +1599,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, }, }, @@ -1655,11 +1618,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, "two": { Workloads: sets.New("/c"), @@ -1667,11 +1625,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, }, }, @@ -1690,11 +1643,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, "two": { Workloads: sets.New("/c"), @@ -1702,11 +1650,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, }, }, @@ -1737,11 +1680,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 20}, "spot": {corev1.ResourceCPU: 30}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 50, - }, - }, }, "two": { Workloads: sets.New("/c", "/e"), @@ -1749,11 +1687,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, }, wantAssumedWorkloads: map[string]string{ @@ -1780,11 +1713,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, "two": { Workloads: sets.New("/c"), @@ -1792,11 +1720,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, }, wantAssumedWorkloads: map[string]string{}, @@ -1830,11 +1753,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, "two": { Workloads: sets.New("/c", "/e"), @@ -1842,11 +1760,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, }, wantAssumedWorkloads: map[string]string{ @@ -1872,11 +1785,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, "two": { Workloads: sets.New("/c"), @@ -1884,11 +1792,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 0}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 0, - }, - }, }, }, }, @@ -1924,11 +1827,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 20}, "spot": {corev1.ResourceCPU: 30}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 50, - }, - }, }, "two": { Workloads: sets.New("/c", "/e"), @@ -1936,11 +1834,6 @@ func TestCacheWorkloadOperations(t *testing.T) { "on-demand": {corev1.ResourceCPU: 10}, "spot": {corev1.ResourceCPU: 15}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Usage: 25, - }, - }, }, }, wantAssumedWorkloads: map[string]string{ @@ -1967,7 +1860,6 @@ func TestCacheWorkloadOperations(t *testing.T) { gotResult[name] = result{ Workloads: sets.KeySet(cq.Workloads), UsedResources: cq.Usage, - ResourceStats: cq.ResourceStats, } } if diff := cmp.Diff(step.wantResults, gotResult); diff != "" { diff --git a/pkg/cache/clusterqueue.go b/pkg/cache/clusterqueue.go index 142ee5ad73..abccb9f8e3 100644 --- a/pkg/cache/clusterqueue.go +++ b/pkg/cache/clusterqueue.go @@ -40,16 +40,6 @@ var ( errQueueAlreadyExists = errors.New("queue already exists") ) -// QuotaStats holds the nominal quota and usage for a resource. -type QuotaStats struct { - Nominal int64 - Lendable int64 - Usage int64 -} - -// ResourceStats holds QuotaStats for resources. -type ResourceStats map[corev1.ResourceName]*QuotaStats - // ClusterQueue is the internal implementation of kueue.ClusterQueue that // holds admitted workloads. type ClusterQueue struct { @@ -75,8 +65,8 @@ type ClusterQueue struct { // deleted, or the resource groups are changed. AllocatableResourceGeneration int64 - // ResourceStats holds nominal quota and usage for the resources of the ClusterQueue, independent of the flavor. - ResourceStats ResourceStats + // Lendable holds the total lendable quota for the resources of the ClusterQueue, independent of the flavor. + Lendable map[corev1.ResourceName]int64 // The following fields are not populated in a snapshot. @@ -101,7 +91,7 @@ type Cohort struct { // RequestableResources equals to the sum of LendingLimit when feature LendingLimit enabled. RequestableResources FlavorResourceQuantities Usage FlavorResourceQuantities - ResourceStats ResourceStats + Lendable map[corev1.ResourceName]int64 // AllocatableResourceGeneration equals to // the sum of allocatable generation among its members. AllocatableResourceGeneration int64 @@ -265,29 +255,10 @@ func filterFlavorQuantities(orig FlavorResourceQuantities, resourceGroups []kueu return ret } -// resetResourceStatsFromResourceGroups maintains the Usage stats for the given resource groups -// and resets Nominal and Lendable values. They are calculated again in updateResourceGroups. -func (c *ClusterQueue) resetResourceStatsFromResourceGroups(resourceGroups []kueue.ResourceGroup) { - updatedResourceStats := make(ResourceStats, len(resourceGroups)) - for _, rg := range resourceGroups { - for _, res := range rg.CoveredResources { - if oStats := c.ResourceStats[res]; oStats != nil { - updatedResourceStats[res] = &QuotaStats{ - Usage: c.ResourceStats[res].Usage, - // Reset Nominal and Lendable. - } - } else { - updatedResourceStats[res] = &QuotaStats{} - } - } - } - c.ResourceStats = updatedResourceStats -} - func (c *ClusterQueue) updateResourceGroups(in []kueue.ResourceGroup) { oldRG := c.ResourceGroups c.ResourceGroups = make([]ResourceGroup, len(in)) - c.resetResourceStatsFromResourceGroups(in) + c.Lendable = make(map[corev1.ResourceName]int64) for i, rgIn := range in { rg := &c.ResourceGroups[i] *rg = ResourceGroup{ @@ -305,15 +276,14 @@ func (c *ClusterQueue) updateResourceGroups(in []kueue.ResourceGroup) { rQuota := ResourceQuota{ Nominal: nominal, } - c.ResourceStats[rIn.Name].Nominal += nominal if rIn.BorrowingLimit != nil { rQuota.BorrowingLimit = ptr.To(workload.ResourceValue(rIn.Name, *rIn.BorrowingLimit)) } if features.Enabled(features.LendingLimit) && rIn.LendingLimit != nil { rQuota.LendingLimit = ptr.To(workload.ResourceValue(rIn.Name, *rIn.LendingLimit)) - c.ResourceStats[rIn.Name].Lendable += *rQuota.LendingLimit + c.Lendable[rIn.Name] += *rQuota.LendingLimit } else { - c.ResourceStats[rIn.Name].Lendable += nominal + c.Lendable[rIn.Name] += nominal } fQuotas.Resources[rIn.Name] = &rQuota } @@ -499,7 +469,6 @@ func (c *ClusterQueue) reportActiveWorkloads() { func (c *ClusterQueue) updateWorkloadUsage(wi *workload.Info, m int64) { admitted := workload.IsAdmitted(wi.Obj) updateFlavorUsage(wi, c.Usage, m) - updateResourceStats(wi, c.ResourceStats, m) if admitted { updateFlavorUsage(wi, c.AdmittedUsage, m) c.admittedWorkloadsCount += int(m) @@ -515,16 +484,6 @@ func (c *ClusterQueue) updateWorkloadUsage(wi *workload.Info, m int64) { } } -func updateResourceStats(wi *workload.Info, rStats ResourceStats, m int64) { - for _, ps := range wi.TotalRequests { - for res, v := range ps.Requests { - if _, exists := rStats[res]; exists { - rStats[res].Usage += v * m - } - } - } -} - func updateFlavorUsage(wi *workload.Info, flvUsage FlavorResourceQuantities, m int64) { for _, ps := range wi.TotalRequests { for wlRes, wlResFlv := range ps.Flavors { @@ -723,7 +682,7 @@ func (c *ClusterQueue) dominantResourceShare(wlReq FlavorResourceQuantities, m i var drs int64 = -1 var dRes corev1.ResourceName for rName, b := range borrowing { - if lendable := c.Cohort.ResourceStats[rName].Lendable; lendable > 0 { + if lendable := c.Cohort.Lendable[rName]; lendable > 0 { ratio := b * 1000 / lendable // Use alphabetical order to get a deterministic resource name. if ratio > drs || (ratio == drs && rName < dRes) { diff --git a/pkg/cache/clusterqueue_test.go b/pkg/cache/clusterqueue_test.go index 9ad9d5bd8b..79666a99a9 100644 --- a/pkg/cache/clusterqueue_test.go +++ b/pkg/cache/clusterqueue_test.go @@ -816,17 +816,9 @@ func TestDominantResourceShare(t *testing.T) { }, }, Cohort: &Cohort{ - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 10_000, - Lendable: 10_000, - Usage: 2_000, - }, - "example.com/gpu": { - Nominal: 10, - Lendable: 10, - Usage: 6, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, + "example.com/gpu": 10, }, }, }, @@ -857,17 +849,9 @@ func TestDominantResourceShare(t *testing.T) { }, }, Cohort: &Cohort{ - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 10_000, - Lendable: 10_000, - Usage: 10_000, - }, - "example.com/gpu": { - Nominal: 10, - Lendable: 10, - Usage: 10, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, + "example.com/gpu": 10, }, }, }, @@ -900,17 +884,9 @@ func TestDominantResourceShare(t *testing.T) { }, }, Cohort: &Cohort{ - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 10_000, - Lendable: 10_000, - Usage: 10_000, - }, - "example.com/gpu": { - Nominal: 10, - Lendable: 10, - Usage: 10, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, + "example.com/gpu": 10, }, }, }, @@ -943,17 +919,9 @@ func TestDominantResourceShare(t *testing.T) { }, }, Cohort: &Cohort{ - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 10_000, - Lendable: 10_000, - Usage: 2_000, - }, - "example.com/gpu": { - Nominal: 10, - Lendable: 10, - Usage: 6, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, + "example.com/gpu": 10, }, }, }, @@ -993,16 +961,9 @@ func TestDominantResourceShare(t *testing.T) { }, }, Cohort: &Cohort{ - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 10_000, - Lendable: 10_000, - Usage: 2_000, - }, - "example.com/gpu": { - Nominal: 10_000, - Usage: 5_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, + "example.com/gpu": 0, }, }, }, @@ -1048,12 +1009,8 @@ func TestDominantResourceShare(t *testing.T) { }, }, Cohort: &Cohort{ - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 200_000, - Lendable: 200_000, - Usage: 20_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 200_000, }, }, }, diff --git a/pkg/cache/snapshot.go b/pkg/cache/snapshot.go index de5fa3ec35..c7da94daaa 100644 --- a/pkg/cache/snapshot.go +++ b/pkg/cache/snapshot.go @@ -23,7 +23,6 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" - "k8s.io/utils/ptr" kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" "sigs.k8s.io/kueue/pkg/features" @@ -55,14 +54,12 @@ func (s *Snapshot) AddWorkload(wl *workload.Info) { func (c *ClusterQueue) addOrRemoveWorkload(wl *workload.Info, m int64) { updateFlavorUsage(wl, c.Usage, m) - updateResourceStats(wl, c.ResourceStats, m) if c.Cohort != nil { if features.Enabled(features.LendingLimit) { updateCohortUsage(wl, c, m) } else { updateFlavorUsage(wl, c.Cohort.Usage, m) } - updateResourceStats(wl, c.Cohort.ResourceStats, m) } } @@ -138,7 +135,7 @@ func (c *ClusterQueue) snapshot() *ClusterQueue { FlavorFungibility: c.FlavorFungibility, AllocatableResourceGeneration: c.AllocatableResourceGeneration, Usage: make(FlavorResourceQuantities, len(c.Usage)), - ResourceStats: make(ResourceStats, len(c.ResourceStats)), + Lendable: maps.Clone(c.Lendable), Workloads: maps.Clone(c.Workloads), Preemption: c.Preemption, NamespaceSelector: c.NamespaceSelector, @@ -152,9 +149,6 @@ func (c *ClusterQueue) snapshot() *ClusterQueue { if features.Enabled(features.LendingLimit) { cc.GuaranteedQuota = c.GuaranteedQuota } - for rName, rStats := range c.ResourceStats { - cc.ResourceStats[rName] = ptr.To(*rStats) - } return cc } @@ -203,17 +197,11 @@ func (c *ClusterQueue) accumulateResources(cohort *Cohort) { used[res] += val } } - if cohort.ResourceStats == nil { - cohort.ResourceStats = make(ResourceStats, len(c.ResourceStats)) - } - for rName, rStats := range c.ResourceStats { - cohortRStats := cohort.ResourceStats[rName] - if cohortRStats == nil { - cohort.ResourceStats[rName] = ptr.To(*rStats) - continue + if cohort.Lendable == nil { + cohort.Lendable = maps.Clone(c.Lendable) + } else { + for res, v := range c.Lendable { + cohort.Lendable[res] += v } - cohortRStats.Nominal += rStats.Nominal - cohortRStats.Lendable += rStats.Lendable - cohortRStats.Usage += rStats.Usage } } diff --git a/pkg/cache/snapshot_test.go b/pkg/cache/snapshot_test.go index 787c33c6fa..3a869f3aa5 100644 --- a/pkg/cache/snapshot_test.go +++ b/pkg/cache/snapshot_test.go @@ -223,17 +223,9 @@ func TestSnapshot(t *testing.T) { "example.com/gpu": 15, }, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 400_000, - Lendable: 400_000, - Usage: 20_000, - }, - "example.com/gpu": { - Nominal: 50, - Lendable: 50, - Usage: 15, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 400_000, + "example.com/gpu": 50, }, } return Snapshot{ @@ -267,12 +259,8 @@ func TestSnapshot(t *testing.T) { "demand": {corev1.ResourceCPU: 10_000}, "spot": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 300_000, - Lendable: 300_000, - Usage: 10_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 300_000, }, Workloads: map[string]*workload.Info{ "/alpha": workload.NewInfo(utiltesting.MakeWorkload("alpha", ""). @@ -322,17 +310,9 @@ func TestSnapshot(t *testing.T) { "example.com/gpu": 15, }, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 100_000, - Lendable: 100_000, - Usage: 10_000, - }, - "example.com/gpu": { - Nominal: 50, - Lendable: 50, - Usage: 15, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 100_000, + "example.com/gpu": 50, }, Workloads: map[string]*workload.Info{ "/beta": workload.NewInfo(utiltesting.MakeWorkload("beta", ""). @@ -383,11 +363,8 @@ func TestSnapshot(t *testing.T) { corev1.ResourceCPU: 0, }, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 100_000, - Lendable: 100_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 100_000, }, Preemption: defaultPreemption, NamespaceSelector: labels.Everything(), @@ -492,12 +469,8 @@ func TestSnapshot(t *testing.T) { corev1.ResourceCPU: 0, }, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 60_000, - Lendable: 30_000, - Usage: 25_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 30_000, }, } return Snapshot{ @@ -539,12 +512,8 @@ func TestSnapshot(t *testing.T) { "arm": {corev1.ResourceCPU: 15_000}, "x86": {corev1.ResourceCPU: 10_000}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 30_000, - Lendable: 15_000, - Usage: 25_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 15_000, }, Workloads: map[string]*workload.Info{ "/alpha": workload.NewInfo(utiltesting.MakeWorkload("alpha", ""). @@ -618,11 +587,8 @@ func TestSnapshot(t *testing.T) { "arm": {corev1.ResourceCPU: 0}, "x86": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 30_000, - Lendable: 15_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 15_000, }, Preemption: defaultPreemption, NamespaceSelector: labels.Everything(), @@ -771,15 +737,9 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) { "alpha": {corev1.ResourceMemory: 0}, "beta": {corev1.ResourceMemory: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 12_000, - Lendable: 12_000, - }, - corev1.ResourceMemory: { - Nominal: 12 * utiltesting.Gi, - Lendable: 12 * utiltesting.Gi, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 12_000, + corev1.ResourceMemory: 12 * utiltesting.Gi, }, } return Snapshot{ @@ -796,15 +756,9 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) { "alpha": {corev1.ResourceMemory: 0}, "beta": {corev1.ResourceMemory: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 6_000, - Lendable: 6_000, - }, - corev1.ResourceMemory: { - Nominal: 12 * utiltesting.Gi, - Lendable: 12 * utiltesting.Gi, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 6_000, + corev1.ResourceMemory: 12 * utiltesting.Gi, }, }, "c2": { @@ -817,11 +771,8 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) { Usage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 6_000, - Lendable: 6_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 6_000, }, }, }, @@ -840,17 +791,9 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) { "alpha": {corev1.ResourceMemory: utiltesting.Gi}, "beta": {corev1.ResourceMemory: utiltesting.Gi}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 12_000, - Lendable: 12_000, - Usage: 2_000, - }, - corev1.ResourceMemory: { - Nominal: 12 * utiltesting.Gi, - Lendable: 12 * utiltesting.Gi, - Usage: 2 * utiltesting.Gi, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 12_000, + corev1.ResourceMemory: 12 * utiltesting.Gi, }, } return Snapshot{ @@ -870,16 +813,9 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) { "alpha": {corev1.ResourceMemory: utiltesting.Gi}, "beta": {corev1.ResourceMemory: utiltesting.Gi}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 6_000, - Lendable: 6_000, - }, - corev1.ResourceMemory: { - Nominal: 12 * utiltesting.Gi, - Lendable: 12 * utiltesting.Gi, - Usage: 2 * utiltesting.Gi, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 6_000, + corev1.ResourceMemory: 12 * utiltesting.Gi, }, }, "c2": { @@ -895,12 +831,8 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) { Usage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 2_000}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 6_000, - Lendable: 6_000, - Usage: 2_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 6_000, }, }, }, @@ -919,17 +851,9 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) { "alpha": {corev1.ResourceMemory: 0}, "beta": {corev1.ResourceMemory: utiltesting.Gi}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 12_000, - Lendable: 12_000, - Usage: 3_000, - }, - corev1.ResourceMemory: { - Nominal: 12 * utiltesting.Gi, - Lendable: 12 * utiltesting.Gi, - Usage: utiltesting.Gi, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 12_000, + corev1.ResourceMemory: 12 * utiltesting.Gi, }, } return Snapshot{ @@ -949,17 +873,9 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) { "alpha": {corev1.ResourceMemory: 0}, "beta": {corev1.ResourceMemory: utiltesting.Gi}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 6_000, - Lendable: 6_000, - Usage: 1_000, - }, - corev1.ResourceMemory: { - Nominal: 12 * utiltesting.Gi, - Lendable: 12 * utiltesting.Gi, - Usage: utiltesting.Gi, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 6_000, + corev1.ResourceMemory: 12 * utiltesting.Gi, }, }, "c2": { @@ -975,12 +891,8 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) { Usage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 2_000}, }, - ResourceStats: ResourceStats{ - corev1.ResourceCPU: { - Nominal: 6_000, - Lendable: 6_000, - Usage: 2_000, - }, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 6_000, }, }, }, @@ -1095,8 +1007,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { Usage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 20_000, Lendable: 10_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, }, } return Snapshot{ @@ -1116,8 +1028,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { corev1.ResourceCPU: 6_000, }, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 10_000, Lendable: 4_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 4_000, }, }, "lend-b": { @@ -1135,8 +1047,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { corev1.ResourceCPU: 4_000, }, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 10_000, Lendable: 6_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 6_000, }, }, }, @@ -1153,8 +1065,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { Usage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 1_000}, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 20_000, Lendable: 10_000, Usage: 11_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, }, } return Snapshot{ @@ -1174,8 +1086,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { corev1.ResourceCPU: 6_000, }, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 10_000, Lendable: 4_000, Usage: 7_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 4_000, }, }, "lend-b": { @@ -1193,8 +1105,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { corev1.ResourceCPU: 4_000, }, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 10_000, Lendable: 6_000, Usage: 4_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 6_000, }, }, }, @@ -1211,8 +1123,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { Usage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 20_000, Lendable: 10_000, Usage: 10_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, }, } return Snapshot{ @@ -1232,8 +1144,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { corev1.ResourceCPU: 6_000, }, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 10_000, Lendable: 4_000, Usage: 6_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 4_000, }, }, "lend-b": { @@ -1251,8 +1163,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { corev1.ResourceCPU: 4_000, }, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 10_000, Lendable: 6_000, Usage: 4_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 6_000, }, }, }, @@ -1269,8 +1181,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { Usage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 20_000, Lendable: 10_000, Usage: 5_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, }, } return Snapshot{ @@ -1290,8 +1202,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { corev1.ResourceCPU: 6_000, }, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 10_000, Lendable: 4_000, Usage: 1_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 4_000, }, }, "lend-b": { @@ -1309,8 +1221,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { corev1.ResourceCPU: 4_000, }, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 10_000, Lendable: 6_000, Usage: 4_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 6_000, }, }, }, @@ -1328,8 +1240,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { Usage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 20_000, Lendable: 10_000, Usage: 1_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, }, } return Snapshot{ @@ -1349,8 +1261,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { corev1.ResourceCPU: 6_000, }, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 10_000, Lendable: 4_000, Usage: 1_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 4_000, }, }, "lend-b": { @@ -1368,8 +1280,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { corev1.ResourceCPU: 4_000, }, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 10_000, Lendable: 6_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 6_000, }, }, }, @@ -1387,8 +1299,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { Usage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 0}, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 20_000, Lendable: 10_000, Usage: 6_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, }, } return Snapshot{ @@ -1408,8 +1320,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { corev1.ResourceCPU: 6_000, }, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 10_000, Lendable: 4_000, Usage: 6_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 4_000, }, }, "lend-b": { @@ -1427,8 +1339,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { corev1.ResourceCPU: 4_000, }, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 10_000, Lendable: 6_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 6_000, }, }, }, @@ -1446,8 +1358,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { Usage: FlavorResourceQuantities{ "default": {corev1.ResourceCPU: 3_000}, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 20_000, Lendable: 10_000, Usage: 9_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 10_000, }, } return Snapshot{ @@ -1467,8 +1379,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { corev1.ResourceCPU: 6_000, }, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 10_000, Lendable: 4_000, Usage: 9_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 4_000, }, }, "lend-b": { @@ -1486,8 +1398,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) { corev1.ResourceCPU: 4_000, }, }, - ResourceStats: ResourceStats{ - "cpu": {Nominal: 10_000, Lendable: 6_000}, + Lendable: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 6_000, }, }, }, From 8b4e4b34573d3257ce5db15425fe1c9aed7b1eee Mon Sep 17 00:00:00 2001 From: Patryk Bundyra <73306396+PBundyra@users.noreply.github.com> Date: Thu, 25 Apr 2024 17:54:18 +0200 Subject: [PATCH 31/49] Add AdmissionCheckStrategy documentation (#1996) * Add AdmissionCheckStrategy documentation * Add an AdmissionCheck section in the ClusterQueue's site * Update site/content/en/docs/concepts/admission_check.md Co-authored-by: Yuki Iwai * Apply suggestions from code review Co-authored-by: Yuki Iwai * Apply suggestions from code review Co-authored-by: Yuki Iwai * Link ClusterQueue documentation, improve the Usage section --------- Co-authored-by: Yuki Iwai --- .../en/docs/concepts/admission_check.md | 44 ++++++++++++++++++- .../content/en/docs/concepts/cluster_queue.md | 4 ++ site/static/examples/admin/minimal-cq.yaml | 11 +++++ 3 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 site/static/examples/admin/minimal-cq.yaml diff --git a/site/content/en/docs/concepts/admission_check.md b/site/content/en/docs/concepts/admission_check.md index 23058cb88d..d6f1d48e92 100644 --- a/site/content/en/docs/concepts/admission_check.md +++ b/site/content/en/docs/concepts/admission_check.md @@ -38,11 +38,51 @@ spec: name: prov-test-config ``` -### ClusterQueue admissionChecks +### Usage -Once defined, an AdmissionCheck can be referenced in the ClusterQueues' spec. All Workloads associated with the queue need to be evaluated by the AdmissionCheck's controller before being admitted. +Once defined, an AdmissionCheck can be referenced in the [ClusterQueue's spec](/docs/concepts/cluster_queue). All Workloads associated with the queue need to be evaluated by the AdmissionCheck's controller before being admitted. Similarly to `ResourceFlavors`, if an `AdmissionCheck` is not found or its controller has not marked it as `Active`, the ClusterQueue will be marked as Inactive. +There are two ways of referencing AdmissionChecks in the ClusterQueue's spec: + +- `.spec.admissionChecks` - is the list of AdmissionChecks that will be run for all Workloads submitted to the ClusterQueue +- `.spec.admissionCheckStrategy` - wraps the list of `admissionCheckStrategyRules` that give you more flexibility. It allows you to both run an AdmissionCheck for all Workloads or to associate an AdmissionCheck +with a specific ResourceFlavor. To specify ResourceFlavors that an AdmissionCheck should run for use the `admissionCheckStrategyRule.onFlavors` field, and if you want to run AdmissionCheck for all Workloads, simply leave the field empty. + +Only one of the above-mentioned fields can be specified at the time. + +See examples below: + +Using `.spec.admissionChecks` + +```yaml +apiVersion: kueue.x-k8s.io/v1beta1 +kind: ClusterQueue +metadata: + name: "cluster-queue" +spec: +<...> + admissionChecks: + - sample-prov +``` + +Using `.spec.admissionCheckStrategy` + +```yaml +apiVersion: kueue.x-k8s.io/v1beta1 +kind: ClusterQueue +metadata: + name: "cluster-queue" +spec: +<...> + admissionChecksStrategy: + admissionChecks: + - name: "sample-prov" # Name of the AdmissionCheck to be run + onFlavors: ["default-flavor"] # This AdmissionCheck will only run for Workloads that use default-flavor + - name: "sample-prov-2" # This AdmissionCheck will run for all Workloads regardless of a used ResourceFlavor +``` + + ### AdmissionCheckState AdmissionCheckState is the way the state of an AdmissionCheck for a specific Workload is tracked. diff --git a/site/content/en/docs/concepts/cluster_queue.md b/site/content/en/docs/concepts/cluster_queue.md index 3679d3b09a..0b3e38a1ea 100644 --- a/site/content/en/docs/concepts/cluster_queue.md +++ b/site/content/en/docs/concepts/cluster_queue.md @@ -527,6 +527,10 @@ The `HoldAndDrain` will have a similar effect but, in addition, it will trigger If set to `None` or `spec.stopPolicy` is removed the ClusterQueue will to normal admission behavior. +## AdmissionChecks + +AdmissionChecks are a mechanism that allows Kueue to consider additional criteria before admitting a Workload. See [Admission Checks](/docs/concepts/admission_check#usage) for ClusterQueue's example configuration. + ## What's next? - Create [local queues](/docs/concepts/local_queue) diff --git a/site/static/examples/admin/minimal-cq.yaml b/site/static/examples/admin/minimal-cq.yaml new file mode 100644 index 0000000000..4718cc3887 --- /dev/null +++ b/site/static/examples/admin/minimal-cq.yaml @@ -0,0 +1,11 @@ +apiVersion: kueue.x-k8s.io/v1beta1 +kind: ClusterQueue +metadata: + name: "cluster-queue-2" +spec: + namespaceSelector: {} # match all. + admissionChecksStrategy: + admissionChecks: + - name: "sample-prov" # Name of the AdmissionCheck to be run + onFlavors: ["default-flavor"] # This AdmissionCheck will only run for Workloads that use default-flavor + - name: "sample-prov-2" # This AdmissionCheck will run for all Workloads regardless of a used ResourceFlavor From ab2dac1db7c3f265a56ce0b0d8ff06678fca0b3d Mon Sep 17 00:00:00 2001 From: Patryk Bundyra <73306396+PBundyra@users.noreply.github.com> Date: Thu, 25 Apr 2024 17:54:36 +0200 Subject: [PATCH 32/49] Add the ProvisioningRequest's classname annotation (#2052) * Add the ProvisioningRequest's classname annotation * Update integration tests --- .../admissionchecks/provisioning/constants.go | 7 ++++--- .../admissionchecks/provisioning/controller.go | 6 ++++-- .../admissionchecks/provisioning/controller_test.go | 11 +++++++---- .../provisioning/provisioning_test.go | 12 ++++++++---- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/pkg/controller/admissionchecks/provisioning/constants.go b/pkg/controller/admissionchecks/provisioning/constants.go index 6b421232a9..cb6f5051e9 100644 --- a/pkg/controller/admissionchecks/provisioning/constants.go +++ b/pkg/controller/admissionchecks/provisioning/constants.go @@ -17,9 +17,10 @@ limitations under the License. package provisioning const ( - ConfigKind = "ProvisioningRequestConfig" - ControllerName = "kueue.x-k8s.io/provisioning-request" - ConsumesAnnotationKey = "cluster-autoscaler.kubernetes.io/consume-provisioning-request" + ConfigKind = "ProvisioningRequestConfig" + ControllerName = "kueue.x-k8s.io/provisioning-request" + ConsumesAnnotationKey = "cluster-autoscaler.kubernetes.io/consume-provisioning-request" + ClassNameAnnotationKey = "cluster-autoscaler.kubernetes.io/provisioning-class-name" CheckInactiveMessage = "the check is not active" NoRequestNeeded = "the provisioning request is not needed" diff --git a/pkg/controller/admissionchecks/provisioning/controller.go b/pkg/controller/admissionchecks/provisioning/controller.go index d2a828c142..6ec7cc7165 100644 --- a/pkg/controller/admissionchecks/provisioning/controller.go +++ b/pkg/controller/admissionchecks/provisioning/controller.go @@ -573,8 +573,10 @@ func podSetUpdates(wl *kueue.Workload, pr *autoscaling.ProvisioningRequest) []ku }) return slices.Map(pr.Spec.PodSets, func(ps *autoscaling.PodSet) kueue.PodSetUpdate { return kueue.PodSetUpdate{ - Name: refMap[ps.PodTemplateRef.Name], - Annotations: map[string]string{ConsumesAnnotationKey: pr.Name}, + Name: refMap[ps.PodTemplateRef.Name], + Annotations: map[string]string{ + ConsumesAnnotationKey: pr.Name, + ClassNameAnnotationKey: pr.Spec.ProvisioningClassName}, } }) } diff --git a/pkg/controller/admissionchecks/provisioning/controller_test.go b/pkg/controller/admissionchecks/provisioning/controller_test.go index 1ceccd38b4..2600a86db8 100644 --- a/pkg/controller/admissionchecks/provisioning/controller_test.go +++ b/pkg/controller/admissionchecks/provisioning/controller_test.go @@ -547,12 +547,15 @@ func TestReconcile(t *testing.T) { State: kueue.CheckStateReady, PodSetUpdates: []kueue.PodSetUpdate{ { - Name: "ps1", - Annotations: map[string]string{"cluster-autoscaler.kubernetes.io/consume-provisioning-request": "wl-check1-1"}, + Name: "ps1", + Annotations: map[string]string{ + "cluster-autoscaler.kubernetes.io/consume-provisioning-request": "wl-check1-1", + "cluster-autoscaler.kubernetes.io/provisioning-class-name": "class1"}, }, { - Name: "ps2", - Annotations: map[string]string{"cluster-autoscaler.kubernetes.io/consume-provisioning-request": "wl-check1-1"}, + Name: "ps2", + Annotations: map[string]string{"cluster-autoscaler.kubernetes.io/consume-provisioning-request": "wl-check1-1", + "cluster-autoscaler.kubernetes.io/provisioning-class-name": "class1"}, }, }, }, kueue.AdmissionCheckState{ diff --git a/test/integration/controller/admissionchecks/provisioning/provisioning_test.go b/test/integration/controller/admissionchecks/provisioning/provisioning_test.go index 554ed4b055..34b6e11719 100644 --- a/test/integration/controller/admissionchecks/provisioning/provisioning_test.go +++ b/test/integration/controller/admissionchecks/provisioning/provisioning_test.go @@ -376,13 +376,15 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure { Name: "ps1", Annotations: map[string]string{ - provisioning.ConsumesAnnotationKey: provReqKey.Name, + provisioning.ConsumesAnnotationKey: provReqKey.Name, + provisioning.ClassNameAnnotationKey: prc.Spec.ProvisioningClassName, }, }, { Name: "ps2", Annotations: map[string]string{ - provisioning.ConsumesAnnotationKey: provReqKey.Name, + provisioning.ConsumesAnnotationKey: provReqKey.Name, + provisioning.ClassNameAnnotationKey: prc.Spec.ProvisioningClassName, }, }, })) @@ -641,13 +643,15 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure { Name: "ps1", Annotations: map[string]string{ - provisioning.ConsumesAnnotationKey: provReqKey.Name, + provisioning.ConsumesAnnotationKey: provReqKey.Name, + provisioning.ClassNameAnnotationKey: prc.Spec.ProvisioningClassName, }, }, { Name: "ps2", Annotations: map[string]string{ - provisioning.ConsumesAnnotationKey: provReqKey.Name, + provisioning.ConsumesAnnotationKey: provReqKey.Name, + provisioning.ClassNameAnnotationKey: prc.Spec.ProvisioningClassName, }, }, })) From 8d5eba2c748778bd3529373f5a047cf1c1083d18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irving=20Mondrag=C3=B3n?= Date: Thu, 25 Apr 2024 20:11:33 +0200 Subject: [PATCH 33/49] Validate admission check applies to all flavors (#2047) * Add admission check to validate that it applies to all flavors * Rename variables and constants * Rename variables and constants * Update unit test * Rename variables and constants * Rename variables and constants --- apis/kueue/v1beta1/admissioncheck_types.go | 3 ++ pkg/cache/admissioncheck.go | 1 + pkg/cache/cache.go | 1 + pkg/cache/clusterqueue.go | 32 +++++++++++++------ pkg/cache/clusterqueue_test.go | 22 +++++++++++++ .../multikueue/admissioncheck.go | 19 +++++++++-- .../multikueue/admissioncheck_test.go | 6 ++++ pkg/util/testing/wrappers.go | 16 ++++++++++ .../integration/multikueue/multikueue_test.go | 6 ++++ 9 files changed, 94 insertions(+), 12 deletions(-) diff --git a/apis/kueue/v1beta1/admissioncheck_types.go b/apis/kueue/v1beta1/admissioncheck_types.go index 0d5268fc9d..d22878ca1c 100644 --- a/apis/kueue/v1beta1/admissioncheck_types.go +++ b/apis/kueue/v1beta1/admissioncheck_types.go @@ -104,6 +104,9 @@ const ( // Having multiple AdmissionChecks managed by the same controller where at least one has this condition // set to true will cause the ClusterQueue to be marked as Inactive. AdmissionChecksSingleInstanceInClusterQueue string = "SingleInstanceInClusterQueue" + + // FlavorIndependentAdmissionCheck indicates if the AdmissionCheck cannot be applied at ResourceFlavor level. + FlavorIndependentAdmissionCheck string = "FlavorIndependent" ) // +genclient diff --git a/pkg/cache/admissioncheck.go b/pkg/cache/admissioncheck.go index 0e8d105390..85bc49a6eb 100644 --- a/pkg/cache/admissioncheck.go +++ b/pkg/cache/admissioncheck.go @@ -20,4 +20,5 @@ type AdmissionCheck struct { Active bool Controller string SingleInstanceInClusterQueue bool + FlavorIndependent bool } diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go index a6ef5912e0..30ebff183f 100644 --- a/pkg/cache/cache.go +++ b/pkg/cache/cache.go @@ -211,6 +211,7 @@ func (c *Cache) AddOrUpdateAdmissionCheck(ac *kueue.AdmissionCheck) sets.Set[str Active: apimeta.IsStatusConditionTrue(ac.Status.Conditions, kueue.AdmissionCheckActive), Controller: ac.Spec.ControllerName, SingleInstanceInClusterQueue: apimeta.IsStatusConditionTrue(ac.Status.Conditions, kueue.AdmissionChecksSingleInstanceInClusterQueue), + FlavorIndependent: apimeta.IsStatusConditionTrue(ac.Status.Conditions, kueue.FlavorIndependentAdmissionCheck), } return c.updateClusterQueues() diff --git a/pkg/cache/clusterqueue.go b/pkg/cache/clusterqueue.go index abccb9f8e3..341465a7fa 100644 --- a/pkg/cache/clusterqueue.go +++ b/pkg/cache/clusterqueue.go @@ -72,13 +72,14 @@ type ClusterQueue struct { AdmittedUsage FlavorResourceQuantities // localQueues by (namespace/name). - localQueues map[string]*queue - podsReadyTracking bool - hasMissingFlavors bool - hasMissingOrInactiveAdmissionChecks bool - hasMultipleSingleInstanceControllersChecks bool - admittedWorkloadsCount int - isStopped bool + localQueues map[string]*queue + podsReadyTracking bool + hasMissingFlavors bool + hasMissingOrInactiveAdmissionChecks bool + hasMultipleSingleInstanceControllersChecks bool + hasFlavorIndependentAdmissionCheckAppliedPerFlavor bool + admittedWorkloadsCount int + isStopped bool } // Cohort is a set of ClusterQueues that can borrow resources from each other. @@ -309,7 +310,7 @@ func (c *ClusterQueue) UpdateRGByResource() { func (c *ClusterQueue) updateQueueStatus() { status := active - if c.hasMissingFlavors || c.hasMissingOrInactiveAdmissionChecks || c.isStopped || c.hasMultipleSingleInstanceControllersChecks { + if c.hasMissingFlavors || c.hasMissingOrInactiveAdmissionChecks || c.isStopped || c.hasMultipleSingleInstanceControllersChecks || c.hasFlavorIndependentAdmissionCheckAppliedPerFlavor { status = pending } if c.Status == terminating { @@ -341,6 +342,10 @@ func (c *ClusterQueue) inactiveReason() (string, string) { reasons = append(reasons, "MultipleSingleInstanceControllerChecks") } + if c.hasFlavorIndependentAdmissionCheckAppliedPerFlavor { + reasons = append(reasons, "FlavorIndependentAdmissionCheckAppliedPerFlavor") + } + if len(reasons) == 0 { return "Unknown", "Can't admit new workloads." } @@ -387,9 +392,10 @@ func (c *ClusterQueue) updateLabelKeys(flavors map[kueue.ResourceFlavorReference // updateWithAdmissionChecks updates a ClusterQueue based on the passed AdmissionChecks set. func (c *ClusterQueue) updateWithAdmissionChecks(checks map[string]AdmissionCheck) { hasMissing := false + hasSpecificChecks := false checksPerController := make(map[string]int, len(c.AdmissionChecks)) singleInstanceControllers := sets.New[string]() - for acName := range c.AdmissionChecks { + for acName, flavors := range c.AdmissionChecks { if ac, found := checks[acName]; !found { hasMissing = true } else { @@ -400,6 +406,9 @@ func (c *ClusterQueue) updateWithAdmissionChecks(checks map[string]AdmissionChec if ac.SingleInstanceInClusterQueue { singleInstanceControllers.Insert(ac.Controller) } + if ac.FlavorIndependent && flavors.Len() != 0 { + hasSpecificChecks = true + } } } @@ -421,6 +430,11 @@ func (c *ClusterQueue) updateWithAdmissionChecks(checks map[string]AdmissionChec update = true } + if c.hasFlavorIndependentAdmissionCheckAppliedPerFlavor != hasSpecificChecks { + c.hasFlavorIndependentAdmissionCheckAppliedPerFlavor = hasSpecificChecks + update = true + } + if update { c.updateQueueStatus() } diff --git a/pkg/cache/clusterqueue_test.go b/pkg/cache/clusterqueue_test.go index 79666a99a9..427cfa4b25 100644 --- a/pkg/cache/clusterqueue_test.go +++ b/pkg/cache/clusterqueue_test.go @@ -474,6 +474,12 @@ func TestClusterQueueUpdateWithAdmissionCheck(t *testing.T) { *utiltesting.MakeAdmissionCheckStrategyRule("check3").Obj()). Obj() + cqWithACPerFlavor := utiltesting.MakeClusterQueue("cq3"). + AdmissionCheckStrategy( + *utiltesting.MakeAdmissionCheckStrategyRule("check1", "flavor1", "flavor2", "flavor3").Obj(), + ). + Obj() + testcases := []struct { name string cq *kueue.ClusterQueue @@ -646,6 +652,20 @@ func TestClusterQueueUpdateWithAdmissionCheck(t *testing.T) { wantStatus: pending, wantReason: "MultipleSingleInstanceControllerChecks", }, + { + name: "Active clusterQueue with a FlavorIndependent AC applied per ResourceFlavor", + cq: cqWithACPerFlavor, + cqStatus: pending, + admissionChecks: map[string]AdmissionCheck{ + "check1": { + Active: true, + Controller: "controller1", + FlavorIndependent: true, + }, + }, + wantStatus: pending, + wantReason: "FlavorIndependentAdmissionCheckAppliedPerFlavor", + }, { name: "Terminating clusterQueue updated with valid AC list", cq: cqWithAC, @@ -738,9 +758,11 @@ func TestClusterQueueUpdateWithAdmissionCheck(t *testing.T) { if tc.cqStatus == active { cq.hasMultipleSingleInstanceControllersChecks = false cq.hasMissingOrInactiveAdmissionChecks = false + cq.hasFlavorIndependentAdmissionCheckAppliedPerFlavor = false } else { cq.hasMultipleSingleInstanceControllersChecks = true cq.hasMissingOrInactiveAdmissionChecks = true + cq.hasFlavorIndependentAdmissionCheckAppliedPerFlavor = true } cq.updateWithAdmissionChecks(tc.admissionChecks) diff --git a/pkg/controller/admissionchecks/multikueue/admissioncheck.go b/pkg/controller/admissionchecks/multikueue/admissioncheck.go index c41745eb7d..b1e67906cc 100644 --- a/pkg/controller/admissionchecks/multikueue/admissioncheck.go +++ b/pkg/controller/admissionchecks/multikueue/admissioncheck.go @@ -39,9 +39,11 @@ import ( ) const ( - ControllerName = "kueue.x-k8s.io/multikueue" - SingleInstanceReason = "MultiKueue" - SingleInstanceMessage = "only one multikueue managed admission check can be used in one ClusterQueue" + ControllerName = "kueue.x-k8s.io/multikueue" + SingleInstanceReason = "MultiKueue" + SingleInstanceMessage = "only one multikueue managed admission check can be used in one ClusterQueue" + FlavorIndependentCheckReason = "MultiKueue" + FlavorIndependentCheckMessage = "admission check cannot be applied at ResourceFlavor level" ) type multiKueueStoreHelper = admissioncheck.ConfigHelper[*kueuealpha.MultiKueueConfig, kueuealpha.MultiKueueConfig] @@ -140,6 +142,17 @@ func (a *ACReconciler) Reconcile(ctx context.Context, req reconcile.Request) (re needsUpdate = true } + if !apimeta.IsStatusConditionTrue(ac.Status.Conditions, kueue.FlavorIndependentAdmissionCheck) { + apimeta.SetStatusCondition(&ac.Status.Conditions, metav1.Condition{ + Type: kueue.FlavorIndependentAdmissionCheck, + Status: metav1.ConditionTrue, + Reason: FlavorIndependentCheckReason, + Message: FlavorIndependentCheckMessage, + ObservedGeneration: ac.Generation, + }) + needsUpdate = true + } + if needsUpdate { err := a.client.Status().Update(ctx, ac) if err != nil { diff --git a/pkg/controller/admissionchecks/multikueue/admissioncheck_test.go b/pkg/controller/admissionchecks/multikueue/admissioncheck_test.go index 65cd958b38..204984c3a4 100644 --- a/pkg/controller/admissionchecks/multikueue/admissioncheck_test.go +++ b/pkg/controller/admissionchecks/multikueue/admissioncheck_test.go @@ -57,6 +57,7 @@ func TestReconcile(t *testing.T) { ControllerName(ControllerName). Parameters(kueuealpha.GroupVersion.Group, "MultiKueueConfig", "config1"). SingleInstanceInClusterQueue(true, SingleInstanceReason, SingleInstanceMessage, 1). + ApplyToAllFlavors(true, FlavorIndependentCheckReason, FlavorIndependentCheckMessage, 1). Condition(metav1.Condition{ Type: kueue.AdmissionCheckActive, Status: metav1.ConditionFalse, @@ -97,6 +98,7 @@ func TestReconcile(t *testing.T) { ControllerName(ControllerName). Parameters(kueuealpha.GroupVersion.Group, "MultiKueueConfig", "config1"). SingleInstanceInClusterQueue(true, SingleInstanceReason, SingleInstanceMessage, 1). + ApplyToAllFlavors(true, FlavorIndependentCheckReason, FlavorIndependentCheckMessage, 1). Condition(metav1.Condition{ Type: kueue.AdmissionCheckActive, Status: metav1.ConditionFalse, @@ -129,6 +131,7 @@ func TestReconcile(t *testing.T) { ControllerName(ControllerName). Parameters(kueuealpha.GroupVersion.Group, "MultiKueueConfig", "config1"). SingleInstanceInClusterQueue(true, SingleInstanceReason, SingleInstanceMessage, 1). + ApplyToAllFlavors(true, FlavorIndependentCheckReason, FlavorIndependentCheckMessage, 1). Condition(metav1.Condition{ Type: kueue.AdmissionCheckActive, Status: metav1.ConditionFalse, @@ -164,6 +167,7 @@ func TestReconcile(t *testing.T) { ControllerName(ControllerName). Parameters(kueuealpha.GroupVersion.Group, "MultiKueueConfig", "config1"). SingleInstanceInClusterQueue(true, SingleInstanceReason, SingleInstanceMessage, 1). + ApplyToAllFlavors(true, FlavorIndependentCheckReason, FlavorIndependentCheckMessage, 1). Condition(metav1.Condition{ Type: kueue.AdmissionCheckActive, Status: metav1.ConditionFalse, @@ -199,6 +203,7 @@ func TestReconcile(t *testing.T) { ControllerName(ControllerName). Parameters(kueuealpha.GroupVersion.Group, "MultiKueueConfig", "config1"). SingleInstanceInClusterQueue(true, SingleInstanceReason, SingleInstanceMessage, 1). + ApplyToAllFlavors(true, FlavorIndependentCheckReason, FlavorIndependentCheckMessage, 1). Condition(metav1.Condition{ Type: kueue.AdmissionCheckActive, Status: metav1.ConditionTrue, @@ -231,6 +236,7 @@ func TestReconcile(t *testing.T) { ControllerName(ControllerName). Parameters(kueuealpha.GroupVersion.Group, "MultiKueueConfig", "config1"). SingleInstanceInClusterQueue(true, SingleInstanceReason, SingleInstanceMessage, 1). + ApplyToAllFlavors(true, FlavorIndependentCheckReason, FlavorIndependentCheckMessage, 1). Condition(metav1.Condition{ Type: kueue.AdmissionCheckActive, Status: metav1.ConditionTrue, diff --git a/pkg/util/testing/wrappers.go b/pkg/util/testing/wrappers.go index 036b0af2b2..6efa611e7e 100644 --- a/pkg/util/testing/wrappers.go +++ b/pkg/util/testing/wrappers.go @@ -918,6 +918,22 @@ func (ac *AdmissionCheckWrapper) SingleInstanceInClusterQueue(singleInstance boo return ac } +func (ac *AdmissionCheckWrapper) ApplyToAllFlavors(applyToAllFlavors bool, reason, message string, observedGeneration int64) *AdmissionCheckWrapper { + cond := metav1.Condition{ + Type: kueue.FlavorIndependentAdmissionCheck, + Status: metav1.ConditionTrue, + Reason: reason, + Message: message, + ObservedGeneration: observedGeneration, + } + if !applyToAllFlavors { + cond.Status = metav1.ConditionFalse + } + + apimeta.SetStatusCondition(&ac.Status.Conditions, cond) + return ac +} + func (ac *AdmissionCheckWrapper) Obj() *kueue.AdmissionCheck { return &ac.AdmissionCheck } diff --git a/test/integration/multikueue/multikueue_test.go b/test/integration/multikueue/multikueue_test.go index 6ac25042f1..980620202f 100644 --- a/test/integration/multikueue/multikueue_test.go +++ b/test/integration/multikueue/multikueue_test.go @@ -199,6 +199,12 @@ var _ = ginkgo.Describe("Multikueue", func() { Reason: multikueue.SingleInstanceReason, Message: multikueue.SingleInstanceMessage, }, util.IgnoreConditionTimestampsAndObservedGeneration), + gomega.BeComparableTo(metav1.Condition{ + Type: kueue.FlavorIndependentAdmissionCheck, + Status: metav1.ConditionTrue, + Reason: multikueue.FlavorIndependentCheckReason, + Message: multikueue.FlavorIndependentCheckMessage, + }, util.IgnoreConditionTimestampsAndObservedGeneration), )) }, util.Timeout, util.Interval).Should(gomega.Succeed()) }) From 3d764d5086ea156ae11f3d4b8bf60eef9fff7e3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?= Date: Thu, 25 Apr 2024 20:11:42 +0200 Subject: [PATCH 34/49] Improve logging of workload status (#2062) --- pkg/controller/core/workload_controller.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go index 49c784a184..28fe8cd4b2 100644 --- a/pkg/controller/core/workload_controller.go +++ b/pkg/controller/core/workload_controller.go @@ -60,9 +60,10 @@ import ( const ( // statuses for logging purposes - pending = "pending" - admitted = "admitted" - finished = "finished" + pending = "pending" + quotaReserved = "quotaReserved" + admitted = "admitted" + finished = "finished" ) var ( @@ -539,12 +540,12 @@ func (r *WorkloadReconciler) Update(e event.UpdateEvent) bool { log.V(2).Info("Queue for updated workload didn't exist; ignoring for now") } - case prevStatus == pending && status == admitted: + case prevStatus == pending && (status == quotaReserved || status == admitted): r.queues.DeleteWorkload(oldWl) if !r.cache.AddOrUpdateWorkload(wlCopy) { log.V(2).Info("ClusterQueue for workload didn't exist; ignored for now") } - case prevStatus == admitted && status == pending: + case (prevStatus == quotaReserved || prevStatus == admitted) && status == pending: // trigger the move of associated inadmissibleWorkloads, if there are any. r.queues.QueueAssociatedInadmissibleWorkloadsAfter(ctx, wl, func() { // Delete the workload from cache while holding the queues lock @@ -660,9 +661,12 @@ func workloadStatus(w *kueue.Workload) string { if apimeta.IsStatusConditionTrue(w.Status.Conditions, kueue.WorkloadFinished) { return finished } - if workload.HasQuotaReservation(w) { + if workload.IsAdmitted(w) { return admitted } + if workload.HasQuotaReservation(w) { + return quotaReserved + } return pending } From ce4be24ea11edc26fa207b9247ffd23e62bbd633 Mon Sep 17 00:00:00 2001 From: vladikkuzn <51460778+vladikkuzn@users.noreply.github.com> Date: Thu, 25 Apr 2024 22:47:11 +0300 Subject: [PATCH 35/49] * Enable gci (#2069) * Add make lint-fix for dev use --- .golangci.yaml | 18 ++++++++++++++++++ Makefile | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/.golangci.yaml b/.golangci.yaml index 6c0fdd3289..227d26731f 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -19,6 +19,24 @@ linters-settings: govet: enable: - nilness + gci: + # Section configuration to compare against. + # Section names are case-insensitive and may contain parameters in (). + # The default order of sections is `standard > default > custom > blank > dot > alias`, + # If `custom-order` is `true`, it follows the order of `sections` option. + # Default: ["standard", "default"] + sections: + - standard # Standard section: captures all standard packages. + - default # Default section: contains all imports that could not be matched to another section type. + - prefix(sigs.k8s.io/kueue) # Custom section: groups all imports with the specified Prefix. + # Skip generated files. + # Default: true + skip-generated: true + # Enable custom order of sections. + # If `true`, make the section order the same as the order of `sections`. + # Default: false + custom-order: true + # Settings for enabling and disabling linters linters: diff --git a/Makefile b/Makefile index 691c5c011f..3b8ab0b7b0 100644 --- a/Makefile +++ b/Makefile @@ -271,6 +271,10 @@ run-scalability-in-cluster: envtest scalability-runner ci-lint: golangci-lint $(GOLANGCI_LINT) run --timeout 15m0s +.PHONY: lint-fix +lint-fix: golangci-lint + $(GOLANGCI_LINT) run --fix --timeout 15m0s + .PHONY: verify verify: gomod-verify ci-lint fmt-verify shell-lint toc-verify manifests generate update-helm generate-apiref prepare-release-branch git --no-pager diff --exit-code config/components apis charts/kueue/templates client-go site/ From e9915ae16962c0a26b5fcaf60e658f3f3a6290ce Mon Sep 17 00:00:00 2001 From: Traian Schiau <55734665+trasc@users.noreply.github.com> Date: Fri, 26 Apr 2024 10:37:08 +0300 Subject: [PATCH 36/49] [Scalability] rename to performance/scheduler (#2065) * Move `test/performance` to `test/performance/e2e` * Move `test/scalability` to `test/performance/scheduler`. * Rename scalability make targets and artifacts dirs. * Add temporary test-scalability alias. * Review --- Makefile | 38 ++++++++++--------- test/performance/{ => e2e}/jobs/.env.example | 0 test/performance/{ => e2e}/jobs/.gitignore | 0 test/performance/{ => e2e}/jobs/README.md | 2 +- test/performance/{ => e2e}/jobs/config.yaml | 0 test/performance/{ => e2e}/jobs/job.yaml | 0 .../{ => e2e}/jobs/local-queue.yaml | 0 .../jobs/prerequisites/cluster-queue.template | 0 .../jobs/prerequisites/resource-flavor.yaml | 0 test/performance/{ => e2e}/jobs/run-test.sh | 0 .../{ => e2e}/podgroups/.gitignore | 0 .../performance/{ => e2e}/podgroups/README.md | 2 +- .../{ => e2e}/podgroups/manifest.diff | 0 .../{ => e2e}/podgroups/run-test.sh | 0 .../podgroups/templates/cluster-queue.yaml | 0 .../podgroups/templates/local-queue.yaml | 0 .../{ => e2e}/podgroups/templates/pod.yaml | 0 .../podgroups/templates/resource-flavor.yaml | 0 .../{ => e2e}/podgroups/test-config.yaml | 0 .../scheduler}/README.md | 32 ++++++++-------- .../scheduler}/checker/checker_test.go | 4 +- .../scheduler}/default_generator_config.yaml | 0 .../scheduler}/default_rangespec.yaml | 0 .../scheduler}/minimalkueue/main.go | 0 .../runner/controller/controller.go | 4 +- .../scheduler}/runner/generator/generator.go | 6 +-- .../runner/generator/generator_test.go | 0 .../scheduler}/runner/main.go | 10 ++--- .../scheduler}/runner/recorder/recorder.go | 2 +- .../scheduler}/runner/scraper/scraper.go | 0 .../scheduler}/runner/stats/stats.go | 0 31 files changed, 52 insertions(+), 48 deletions(-) rename test/performance/{ => e2e}/jobs/.env.example (100%) rename test/performance/{ => e2e}/jobs/.gitignore (100%) rename test/performance/{ => e2e}/jobs/README.md (98%) rename test/performance/{ => e2e}/jobs/config.yaml (100%) rename test/performance/{ => e2e}/jobs/job.yaml (100%) rename test/performance/{ => e2e}/jobs/local-queue.yaml (100%) rename test/performance/{ => e2e}/jobs/prerequisites/cluster-queue.template (100%) rename test/performance/{ => e2e}/jobs/prerequisites/resource-flavor.yaml (100%) rename test/performance/{ => e2e}/jobs/run-test.sh (100%) rename test/performance/{ => e2e}/podgroups/.gitignore (100%) rename test/performance/{ => e2e}/podgroups/README.md (97%) rename test/performance/{ => e2e}/podgroups/manifest.diff (100%) rename test/performance/{ => e2e}/podgroups/run-test.sh (100%) rename test/performance/{ => e2e}/podgroups/templates/cluster-queue.yaml (100%) rename test/performance/{ => e2e}/podgroups/templates/local-queue.yaml (100%) rename test/performance/{ => e2e}/podgroups/templates/pod.yaml (100%) rename test/performance/{ => e2e}/podgroups/templates/resource-flavor.yaml (100%) rename test/performance/{ => e2e}/podgroups/test-config.yaml (100%) rename test/{scalability => performance/scheduler}/README.md (61%) rename test/{scalability => performance/scheduler}/checker/checker_test.go (96%) rename test/{scalability => performance/scheduler}/default_generator_config.yaml (100%) rename test/{scalability => performance/scheduler}/default_rangespec.yaml (100%) rename test/{scalability => performance/scheduler}/minimalkueue/main.go (100%) rename test/{scalability => performance/scheduler}/runner/controller/controller.go (97%) rename test/{scalability => performance/scheduler}/runner/generator/generator.go (97%) rename test/{scalability => performance/scheduler}/runner/generator/generator_test.go (100%) rename test/{scalability => performance/scheduler}/runner/main.go (97%) rename test/{scalability => performance/scheduler}/runner/recorder/recorder.go (99%) rename test/{scalability => performance/scheduler}/runner/scraper/scraper.go (100%) rename test/{scalability => performance/scheduler}/runner/stats/stats.go (100%) diff --git a/Makefile b/Makefile index 3b8ab0b7b0..5e07776412 100644 --- a/Makefile +++ b/Makefile @@ -213,14 +213,14 @@ run-test-multikueue-e2e-%: FORCE @echo Running multikueue e2e for k8s ${K8S_VERSION} E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" JOBSET_VERSION=$(JOBSET_VERSION) ./hack/multikueue-e2e-test.sh -SCALABILITY_RUNNER := $(ARTIFACTS)/scalability-runner -.PHONY: scalability-runner -scalability-runner: - $(GO_BUILD_ENV) $(GO_CMD) build -ldflags="$(LD_FLAGS)" -o $(SCALABILITY_RUNNER) test/scalability/runner/main.go +SCALABILITY_RUNNER := $(ARTIFACTS)/performance-scheduler-runner +.PHONY: performance-scheduler-runner +performance-scheduler-runner: + $(GO_BUILD_ENV) $(GO_CMD) build -ldflags="$(LD_FLAGS)" -o $(SCALABILITY_RUNNER) test/performance/scheduler/runner/main.go .PHONY: minimalkueue minimalkueue: - $(GO_BUILD_ENV) $(GO_CMD) build -ldflags="$(LD_FLAGS)" -o $(ARTIFACTS)/minimalkueue test/scalability/minimalkueue/main.go + $(GO_BUILD_ENV) $(GO_CMD) build -ldflags="$(LD_FLAGS)" -o $(ARTIFACTS)/minimalkueue test/performance/scheduler/minimalkueue/main.go ifdef SCALABILITY_CPU_PROFILE SCALABILITY_EXTRA_ARGS += --withCPUProfile=true @@ -238,11 +238,11 @@ ifdef SCALABILITY_SCRAPE_URL SCALABILITY_SCRAPE_ARGS += --metricsScrapeURL=$(SCALABILITY_SCRAPE_URL) endif -SCALABILITY_GENERATOR_CONFIG ?= $(PROJECT_DIR)/test/scalability/default_generator_config.yaml +SCALABILITY_GENERATOR_CONFIG ?= $(PROJECT_DIR)/test/performance/scheduler/default_generator_config.yaml -SCALABILITY_RUN_DIR := $(ARTIFACTS)/run-scalability -.PHONY: run-scalability -run-scalability: envtest scalability-runner minimalkueue +SCALABILITY_RUN_DIR := $(ARTIFACTS)/run-performance-scheduler +.PHONY: run-performance-scheduler +run-performance-scheduler: envtest performance-scheduler-runner minimalkueue mkdir -p $(SCALABILITY_RUN_DIR) KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" \ $(SCALABILITY_RUNNER) \ @@ -251,19 +251,23 @@ run-scalability: envtest scalability-runner minimalkueue --generatorConfig=$(SCALABILITY_GENERATOR_CONFIG) \ --minimalKueue=$(ARTIFACTS)/minimalkueue $(SCALABILITY_EXTRA_ARGS) $(SCALABILITY_SCRAPE_ARGS) -.PHONY: test-scalability -test-scalability: gotestsum run-scalability - $(GOTESTSUM) --junitfile $(ARTIFACTS)/junit.xml -- $(GO_TEST_FLAGS) ./test/scalability/checker \ +.PHONY: test-performance-scheduler +test-performance-scheduler: gotestsum run-performance-scheduler + $(GOTESTSUM) --junitfile $(ARTIFACTS)/junit.xml -- $(GO_TEST_FLAGS) ./test/performance/scheduler/checker \ --summary=$(SCALABILITY_RUN_DIR)/summary.yaml \ --cmdStats=$(SCALABILITY_RUN_DIR)/minimalkueue.stats.yaml \ - --range=$(PROJECT_DIR)/test/scalability/default_rangespec.yaml + --range=$(PROJECT_DIR)/test/performance/scheduler/default_rangespec.yaml + +# drop this once is no longer used by CI +.PHONY: test-scalability +test-scalability: test-performance-scheduler -.PHONY: run-scalability-in-cluster -run-scalability-in-cluster: envtest scalability-runner - mkdir -p $(ARTIFACTS)/run-scalability-in-cluster +.PHONY: run-performance-scheduler-in-cluster +run-performance-scheduler-in-cluster: envtest performance-scheduler-runner + mkdir -p $(ARTIFACTS)/run-performance-scheduler-in-cluster KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" \ $(SCALABILITY_RUNNER) \ - --o $(ARTIFACTS)/run-scalability-in-cluster \ + --o $(ARTIFACTS)/run-performance-scheduler-in-cluster \ --generatorConfig=$(SCALABILITY_GENERATOR_CONFIG) \ --qps=1000 --burst=2000 --timeout=15m $(SCALABILITY_SCRAPE_ARGS) diff --git a/test/performance/jobs/.env.example b/test/performance/e2e/jobs/.env.example similarity index 100% rename from test/performance/jobs/.env.example rename to test/performance/e2e/jobs/.env.example diff --git a/test/performance/jobs/.gitignore b/test/performance/e2e/jobs/.gitignore similarity index 100% rename from test/performance/jobs/.gitignore rename to test/performance/e2e/jobs/.gitignore diff --git a/test/performance/jobs/README.md b/test/performance/e2e/jobs/README.md similarity index 98% rename from test/performance/jobs/README.md rename to test/performance/e2e/jobs/README.md index 6fa1581041..37da22f0f1 100644 --- a/test/performance/jobs/README.md +++ b/test/performance/e2e/jobs/README.md @@ -1,4 +1,4 @@ -# Kueue Performance Testing +# Kueue e2e Performance Testing ## Measurements diff --git a/test/performance/jobs/config.yaml b/test/performance/e2e/jobs/config.yaml similarity index 100% rename from test/performance/jobs/config.yaml rename to test/performance/e2e/jobs/config.yaml diff --git a/test/performance/jobs/job.yaml b/test/performance/e2e/jobs/job.yaml similarity index 100% rename from test/performance/jobs/job.yaml rename to test/performance/e2e/jobs/job.yaml diff --git a/test/performance/jobs/local-queue.yaml b/test/performance/e2e/jobs/local-queue.yaml similarity index 100% rename from test/performance/jobs/local-queue.yaml rename to test/performance/e2e/jobs/local-queue.yaml diff --git a/test/performance/jobs/prerequisites/cluster-queue.template b/test/performance/e2e/jobs/prerequisites/cluster-queue.template similarity index 100% rename from test/performance/jobs/prerequisites/cluster-queue.template rename to test/performance/e2e/jobs/prerequisites/cluster-queue.template diff --git a/test/performance/jobs/prerequisites/resource-flavor.yaml b/test/performance/e2e/jobs/prerequisites/resource-flavor.yaml similarity index 100% rename from test/performance/jobs/prerequisites/resource-flavor.yaml rename to test/performance/e2e/jobs/prerequisites/resource-flavor.yaml diff --git a/test/performance/jobs/run-test.sh b/test/performance/e2e/jobs/run-test.sh similarity index 100% rename from test/performance/jobs/run-test.sh rename to test/performance/e2e/jobs/run-test.sh diff --git a/test/performance/podgroups/.gitignore b/test/performance/e2e/podgroups/.gitignore similarity index 100% rename from test/performance/podgroups/.gitignore rename to test/performance/e2e/podgroups/.gitignore diff --git a/test/performance/podgroups/README.md b/test/performance/e2e/podgroups/README.md similarity index 97% rename from test/performance/podgroups/README.md rename to test/performance/e2e/podgroups/README.md index 9bd75d3787..6831196fb7 100644 --- a/test/performance/podgroups/README.md +++ b/test/performance/e2e/podgroups/README.md @@ -1,4 +1,4 @@ -# Kueue Pod Integration Performance Testing +# Kueue Pod Integration e2e Performance Testing ## Introduction A minimal setup for performance testing Plain Pods integration using clusterloader2. diff --git a/test/performance/podgroups/manifest.diff b/test/performance/e2e/podgroups/manifest.diff similarity index 100% rename from test/performance/podgroups/manifest.diff rename to test/performance/e2e/podgroups/manifest.diff diff --git a/test/performance/podgroups/run-test.sh b/test/performance/e2e/podgroups/run-test.sh similarity index 100% rename from test/performance/podgroups/run-test.sh rename to test/performance/e2e/podgroups/run-test.sh diff --git a/test/performance/podgroups/templates/cluster-queue.yaml b/test/performance/e2e/podgroups/templates/cluster-queue.yaml similarity index 100% rename from test/performance/podgroups/templates/cluster-queue.yaml rename to test/performance/e2e/podgroups/templates/cluster-queue.yaml diff --git a/test/performance/podgroups/templates/local-queue.yaml b/test/performance/e2e/podgroups/templates/local-queue.yaml similarity index 100% rename from test/performance/podgroups/templates/local-queue.yaml rename to test/performance/e2e/podgroups/templates/local-queue.yaml diff --git a/test/performance/podgroups/templates/pod.yaml b/test/performance/e2e/podgroups/templates/pod.yaml similarity index 100% rename from test/performance/podgroups/templates/pod.yaml rename to test/performance/e2e/podgroups/templates/pod.yaml diff --git a/test/performance/podgroups/templates/resource-flavor.yaml b/test/performance/e2e/podgroups/templates/resource-flavor.yaml similarity index 100% rename from test/performance/podgroups/templates/resource-flavor.yaml rename to test/performance/e2e/podgroups/templates/resource-flavor.yaml diff --git a/test/performance/podgroups/test-config.yaml b/test/performance/e2e/podgroups/test-config.yaml similarity index 100% rename from test/performance/podgroups/test-config.yaml rename to test/performance/e2e/podgroups/test-config.yaml diff --git a/test/scalability/README.md b/test/performance/scheduler/README.md similarity index 61% rename from test/scalability/README.md rename to test/performance/scheduler/README.md index 817d9a167e..7daef3844f 100644 --- a/test/scalability/README.md +++ b/test/performance/scheduler/README.md @@ -23,49 +23,49 @@ It is designed to offer the Kueue scheduling capabilities without any additional ## Checker -Checks the results of a scalability against a set of expected value defined as [default_rangespec](./default_rangespec.yaml). +Checks the results of a performance-scheduler against a set of expected value defined as [default_rangespec](./default_rangespec.yaml). # Usage ## Run in an existing cluster ```bash -make run-scalability-in-cluster +make run-performance-scheduler-in-cluster ``` -Will run a scalability scenario against an existing cluster (connectable by the host's default kubeconfig), and store the resulting artifacts are stored in `$(PROJECT_DIR)/bin/run-scalability-in-cluster`. +Will run a performance-scheduler scenario against an existing cluster (connectable by the host's default kubeconfig), and store the resulting artifacts are stored in `$(PROJECT_DIR)/bin/run-performance-scheduler-in-cluster`. -The generation config to be used can be set in `SCALABILITY_GENERATOR_CONFIG` by default using `$(PROJECT_DIR)/test/scalability/default_generator_config.yaml` +The generation config to be used can be set in `SCALABILITY_GENERATOR_CONFIG` by default using `$(PROJECT_DIR)/test/performance/scheduler/default_generator_config.yaml` -Setting `SCALABILITY_SCRAPE_INTERVAL` to an interval value and `SCALABILITY_SCRAPE_URL` to an URL exposing kueue's metrics will cause the scalability runner to scrape that URL every interval and store the results in `$(PROJECT_DIR)/bin/run-scalability-in-cluster/metricsDump.tgz`. +Setting `SCALABILITY_SCRAPE_INTERVAL` to an interval value and `SCALABILITY_SCRAPE_URL` to an URL exposing kueue's metrics will cause the scalability runner to scrape that URL every interval and store the results in `$(PROJECT_DIR)/bin/run-performance-scheduler-in-cluster/metricsDump.tgz`. Check [installation guide](https://kueue.sigs.k8s.io/docs/installation) for cluster and [observability](https://kueue.sigs.k8s.io/docs/installation/#add-metrics-scraping-for-prometheus-operator). ## Run with minimalkueue ```bash -make run-scalability +make run-performance-scheduler ``` -Will run a scalability scenario against an [envtest](https://book.kubebuilder.io/reference/envtest.html) environment +Will run a performance-scheduler scenario against an [envtest](https://book.kubebuilder.io/reference/envtest.html) environment and an instance of minimalkueue. -The resulting artifacts are stored in `$(PROJECT_DIR)/bin/run-scalability`. +The resulting artifacts are stored in `$(PROJECT_DIR)/bin/run-performance-scheduler`. -The generation config to be used can be set in `SCALABILITY_GENERATOR_CONFIG` by default using `$(PROJECT_DIR)/test/scalability/default_generator_config.yaml` +The generation config to be used can be set in `SCALABILITY_GENERATOR_CONFIG` by default using `$(PROJECT_DIR)/test/performance/scheduler/default_generator_config.yaml` -Setting `SCALABILITY_CPU_PROFILE=1` will generate a cpuprofile of minimalkueue in `$(PROJECT_DIR)/bin/run-scalability/minimalkueue.cpu.prof` +Setting `SCALABILITY_CPU_PROFILE=1` will generate a cpuprofile of minimalkueue in `$(PROJECT_DIR)/bin/run-performance-scheduler/minimalkueue.cpu.prof` -Setting `SCALABILITY_KUEUE_LOGS=1` will save the logs of minimalkueue in `$(PROJECT_DIR)/bin/run-scalability/minimalkueue.out.log` and `$(PROJECT_DIR)/bin/run-scalability/minimalkueue.err.log` +Setting `SCALABILITY_KUEUE_LOGS=1` will save the logs of minimalkueue in `$(PROJECT_DIR)/bin/run-performance-scheduler/minimalkueue.out.log` and `$(PROJECT_DIR)/bin/run-performance-scheduler/minimalkueue.err.log` -Setting `SCALABILITY_SCRAPE_INTERVAL` to an interval value (e.g. `1s`) will expose the metrics of `minimalkueue` and have them collected by the scalability runner in `$(PROJECT_DIR)/bin/run-scalability/metricsDump.tgz` every interval. +Setting `SCALABILITY_SCRAPE_INTERVAL` to an interval value (e.g. `1s`) will expose the metrics of `minimalkueue` and have them collected by the scalability runner in `$(PROJECT_DIR)/bin/run-performance-scheduler/metricsDump.tgz` every interval. -## Run scalability test +## Run performance-scheduler test ```bash -make test-scalability +make test-performance-scheduler ``` -Runs the scalability with minimalkueue and checks the results against `$(PROJECT_DIR)/test/scalability/default_rangespec.yaml` +Runs the performance-scheduler with minimalkueue and checks the results against `$(PROJECT_DIR)/test/performance-scheduler/default_rangespec.yaml` ## Scrape result @@ -75,7 +75,7 @@ If an instance of [VictoriaMetrics](https://docs.victoriametrics.com/) listening ```bash TMPDIR=$(mktemp -d) - tar -xf ./bin/run-scalability/metricsDump.tgz -C $TMPDIR + tar -xf ./bin/run-performance-scheduler/metricsDump.tgz -C $TMPDIR for file in ${TMPDIR}/*.prometheus; do timestamp=$(basename "$file" .prometheus); curl -vX POST -T "$file" http://localhost:8428/api/v1/import/prometheus?timestamp="$timestamp"; done rm -r $TMPDIR diff --git a/test/scalability/checker/checker_test.go b/test/performance/scheduler/checker/checker_test.go similarity index 96% rename from test/scalability/checker/checker_test.go rename to test/performance/scheduler/checker/checker_test.go index 09a0d40d91..86feffc9f3 100644 --- a/test/scalability/checker/checker_test.go +++ b/test/performance/scheduler/checker/checker_test.go @@ -23,8 +23,8 @@ import ( "sigs.k8s.io/yaml" - "sigs.k8s.io/kueue/test/scalability/runner/recorder" - "sigs.k8s.io/kueue/test/scalability/runner/stats" + "sigs.k8s.io/kueue/test/performance/scheduler/runner/recorder" + "sigs.k8s.io/kueue/test/performance/scheduler/runner/stats" ) var ( diff --git a/test/scalability/default_generator_config.yaml b/test/performance/scheduler/default_generator_config.yaml similarity index 100% rename from test/scalability/default_generator_config.yaml rename to test/performance/scheduler/default_generator_config.yaml diff --git a/test/scalability/default_rangespec.yaml b/test/performance/scheduler/default_rangespec.yaml similarity index 100% rename from test/scalability/default_rangespec.yaml rename to test/performance/scheduler/default_rangespec.yaml diff --git a/test/scalability/minimalkueue/main.go b/test/performance/scheduler/minimalkueue/main.go similarity index 100% rename from test/scalability/minimalkueue/main.go rename to test/performance/scheduler/minimalkueue/main.go diff --git a/test/scalability/runner/controller/controller.go b/test/performance/scheduler/runner/controller/controller.go similarity index 97% rename from test/scalability/runner/controller/controller.go rename to test/performance/scheduler/runner/controller/controller.go index 3e6cab6ca2..10b1492440 100644 --- a/test/scalability/runner/controller/controller.go +++ b/test/performance/scheduler/runner/controller/controller.go @@ -38,8 +38,8 @@ import ( kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" "sigs.k8s.io/kueue/pkg/constants" "sigs.k8s.io/kueue/pkg/workload" - "sigs.k8s.io/kueue/test/scalability/runner/generator" - "sigs.k8s.io/kueue/test/scalability/runner/recorder" + "sigs.k8s.io/kueue/test/performance/scheduler/runner/generator" + "sigs.k8s.io/kueue/test/performance/scheduler/runner/recorder" ) type reconciler struct { diff --git a/test/scalability/runner/generator/generator.go b/test/performance/scheduler/runner/generator/generator.go similarity index 97% rename from test/scalability/runner/generator/generator.go rename to test/performance/scheduler/runner/generator/generator.go index b0914ee5e4..2450eae89b 100644 --- a/test/scalability/runner/generator/generator.go +++ b/test/performance/scheduler/runner/generator/generator.go @@ -35,9 +35,9 @@ import ( const ( resourceFlavorName = "rf" - RunningTimeLabel = "kueue.x-k8s.io/scalability-running-ms" - ClassLabel = "kueue.x-k8s.io/scalability-class" - CleanupLabel = "kueue.x-k8s.io/scalability-cleanup" + RunningTimeLabel = "kueue.x-k8s.io/performance-scheduler-running-ms" + ClassLabel = "kueue.x-k8s.io/performance-scheduler-class" + CleanupLabel = "kueue.x-k8s.io/performance-scheduler-cleanup" ) type WorkloadTemplate struct { diff --git a/test/scalability/runner/generator/generator_test.go b/test/performance/scheduler/runner/generator/generator_test.go similarity index 100% rename from test/scalability/runner/generator/generator_test.go rename to test/performance/scheduler/runner/generator/generator_test.go diff --git a/test/scalability/runner/main.go b/test/performance/scheduler/runner/main.go similarity index 97% rename from test/scalability/runner/main.go rename to test/performance/scheduler/runner/main.go index 08fcf6c7dc..608eec6f91 100644 --- a/test/scalability/runner/main.go +++ b/test/performance/scheduler/runner/main.go @@ -48,11 +48,11 @@ import ( kueuealpha "sigs.k8s.io/kueue/apis/kueue/v1alpha1" kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" utiltesting "sigs.k8s.io/kueue/pkg/util/testing" - "sigs.k8s.io/kueue/test/scalability/runner/controller" - "sigs.k8s.io/kueue/test/scalability/runner/generator" - "sigs.k8s.io/kueue/test/scalability/runner/recorder" - "sigs.k8s.io/kueue/test/scalability/runner/scraper" - "sigs.k8s.io/kueue/test/scalability/runner/stats" + "sigs.k8s.io/kueue/test/performance/scheduler/runner/controller" + "sigs.k8s.io/kueue/test/performance/scheduler/runner/generator" + "sigs.k8s.io/kueue/test/performance/scheduler/runner/recorder" + "sigs.k8s.io/kueue/test/performance/scheduler/runner/scraper" + "sigs.k8s.io/kueue/test/performance/scheduler/runner/stats" ) var ( diff --git a/test/scalability/runner/recorder/recorder.go b/test/performance/scheduler/runner/recorder/recorder.go similarity index 99% rename from test/scalability/runner/recorder/recorder.go rename to test/performance/scheduler/runner/recorder/recorder.go index 065addc4ac..bbc0d41cda 100644 --- a/test/scalability/runner/recorder/recorder.go +++ b/test/performance/scheduler/runner/recorder/recorder.go @@ -29,7 +29,7 @@ import ( "sigs.k8s.io/yaml" kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" - "sigs.k8s.io/kueue/test/scalability/runner/generator" + "sigs.k8s.io/kueue/test/performance/scheduler/runner/generator" ) type CQEvent struct { diff --git a/test/scalability/runner/scraper/scraper.go b/test/performance/scheduler/runner/scraper/scraper.go similarity index 100% rename from test/scalability/runner/scraper/scraper.go rename to test/performance/scheduler/runner/scraper/scraper.go diff --git a/test/scalability/runner/stats/stats.go b/test/performance/scheduler/runner/stats/stats.go similarity index 100% rename from test/scalability/runner/stats/stats.go rename to test/performance/scheduler/runner/stats/stats.go From 944eea2f863a30c5077d8935d7489a42e8000ffa Mon Sep 17 00:00:00 2001 From: Tomas Tormo Date: Fri, 26 Apr 2024 09:56:24 +0200 Subject: [PATCH 37/49] Conditions creation code cleanup (#2071) --- pkg/workload/workload.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pkg/workload/workload.go b/pkg/workload/workload.go index cfce1ea7c8..ea93fcbf92 100644 --- a/pkg/workload/workload.go +++ b/pkg/workload/workload.go @@ -365,7 +365,6 @@ func UnsetQuotaReservationWithCondition(wl *kueue.Workload, reason, message stri condition := metav1.Condition{ Type: kueue.WorkloadQuotaReserved, Status: metav1.ConditionFalse, - LastTransitionTime: metav1.Now(), Reason: reason, Message: api.TruncateConditionMessage(message), ObservedGeneration: wl.Generation, @@ -429,12 +428,12 @@ func BaseSSAWorkload(w *kueue.Workload) *kueue.Workload { // The WorkloadAdmitted and WorkloadEvicted are added or updated if necessary. func SetQuotaReservation(w *kueue.Workload, admission *kueue.Admission) { w.Status.Admission = admission + message := fmt.Sprintf("Quota reserved in ClusterQueue %s", w.Status.Admission.ClusterQueue) admittedCond := metav1.Condition{ Type: kueue.WorkloadQuotaReserved, Status: metav1.ConditionTrue, - LastTransitionTime: metav1.Now(), Reason: "QuotaReserved", - Message: fmt.Sprintf("Quota reserved in ClusterQueue %s", w.Status.Admission.ClusterQueue), + Message: api.TruncateConditionMessage(message), ObservedGeneration: w.Generation, } apimeta.SetStatusCondition(&w.Status.Conditions, admittedCond) @@ -443,14 +442,14 @@ func SetQuotaReservation(w *kueue.Workload, admission *kueue.Admission) { if evictedCond := apimeta.FindStatusCondition(w.Status.Conditions, kueue.WorkloadEvicted); evictedCond != nil { evictedCond.Status = metav1.ConditionFalse evictedCond.Reason = "QuotaReserved" - evictedCond.Message = "Previously: " + evictedCond.Message + evictedCond.Message = api.TruncateConditionMessage("Previously: " + evictedCond.Message) evictedCond.LastTransitionTime = metav1.Now() } // reset Preempted condition if present. if preemptedCond := apimeta.FindStatusCondition(w.Status.Conditions, kueue.WorkloadPreempted); preemptedCond != nil { preemptedCond.Status = metav1.ConditionFalse preemptedCond.Reason = "QuotaReserved" - preemptedCond.Message = "Previously: " + preemptedCond.Message + preemptedCond.Message = api.TruncateConditionMessage("Previously: " + preemptedCond.Message) preemptedCond.LastTransitionTime = metav1.Now() } } @@ -460,7 +459,7 @@ func SetPreemptedCondition(w *kueue.Workload, reason string, message string) { Type: kueue.WorkloadPreempted, Status: metav1.ConditionTrue, Reason: reason, - Message: message, + Message: api.TruncateConditionMessage(message), } apimeta.SetStatusCondition(&w.Status.Conditions, condition) } @@ -469,9 +468,8 @@ func SetEvictedCondition(w *kueue.Workload, reason string, message string) { condition := metav1.Condition{ Type: kueue.WorkloadEvicted, Status: metav1.ConditionTrue, - LastTransitionTime: metav1.Now(), Reason: reason, - Message: message, + Message: api.TruncateConditionMessage(message), ObservedGeneration: w.Generation, } apimeta.SetStatusCondition(&w.Status.Conditions, condition) From 62e0a81bb42b058f36462db192f5ddd52e1d1901 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?= Date: Fri, 26 Apr 2024 12:11:29 +0200 Subject: [PATCH 38/49] Adjust the flaky test for preemption (#2061) --- pkg/controller/core/workload_controller.go | 23 +++++++++++++--------- pkg/queue/manager.go | 6 +++--- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go index 28fe8cd4b2..f7d09da624 100644 --- a/pkg/controller/core/workload_controller.go +++ b/pkg/controller/core/workload_controller.go @@ -546,6 +546,11 @@ func (r *WorkloadReconciler) Update(e event.UpdateEvent) bool { log.V(2).Info("ClusterQueue for workload didn't exist; ignored for now") } case (prevStatus == quotaReserved || prevStatus == admitted) && status == pending: + var backoff time.Duration + if wlCopy.Status.RequeueState != nil && wlCopy.Status.RequeueState.RequeueAt != nil { + backoff = time.Until(wl.Status.RequeueState.RequeueAt.Time) + } + immediate := backoff <= 0 // trigger the move of associated inadmissibleWorkloads, if there are any. r.queues.QueueAssociatedInadmissibleWorkloadsAfter(ctx, wl, func() { // Delete the workload from cache while holding the queues lock @@ -554,16 +559,16 @@ func (r *WorkloadReconciler) Update(e event.UpdateEvent) bool { if err := r.cache.DeleteWorkload(wl); err != nil { log.Error(err, "Failed to delete workload from cache") } - }) - var backoff time.Duration - if wlCopy.Status.RequeueState != nil && wlCopy.Status.RequeueState.RequeueAt != nil { - backoff = time.Until(wl.Status.RequeueState.RequeueAt.Time) - } - if backoff <= 0 { - if !r.queues.AddOrUpdateWorkload(wlCopy) { - log.V(2).Info("Queue for workload didn't exist; ignored for now") + // Here we don't take the lock as it is already taken by the wrapping + // function. + if immediate { + if !r.queues.AddOrUpdateWorkloadWithoutLock(wlCopy) { + log.V(2).Info("Queue for workload didn't exist; ignored for now") + } } - } else { + }) + + if !immediate { log.V(3).Info("Workload to be requeued after backoff", "backoff", backoff, "requeueAt", wl.Status.RequeueState.RequeueAt.Time) time.AfterFunc(backoff, func() { updatedWl := kueue.Workload{} diff --git a/pkg/queue/manager.go b/pkg/queue/manager.go index c465cfcdf7..631dc492e5 100644 --- a/pkg/queue/manager.go +++ b/pkg/queue/manager.go @@ -293,10 +293,10 @@ func (m *Manager) ClusterQueueForWorkload(wl *kueue.Workload) (string, bool) { func (m *Manager) AddOrUpdateWorkload(w *kueue.Workload) bool { m.Lock() defer m.Unlock() - return m.addOrUpdateWorkload(w) + return m.AddOrUpdateWorkloadWithoutLock(w) } -func (m *Manager) addOrUpdateWorkload(w *kueue.Workload) bool { +func (m *Manager) AddOrUpdateWorkloadWithoutLock(w *kueue.Workload) bool { qKey := workload.QueueKey(w) q := m.localQueues[qKey] if q == nil { @@ -453,7 +453,7 @@ func (m *Manager) UpdateWorkload(oldW, w *kueue.Workload) bool { if oldW.Spec.QueueName != w.Spec.QueueName { m.deleteWorkloadFromQueueAndClusterQueue(w, workload.QueueKey(oldW)) } - return m.addOrUpdateWorkload(w) + return m.AddOrUpdateWorkloadWithoutLock(w) } // CleanUpOnContext tracks the context. When closed, it wakes routines waiting From 0446cb5e54655d3057e2e96a50de1f9da03b0a0e Mon Sep 17 00:00:00 2001 From: Traian Schiau <55734665+trasc@users.noreply.github.com> Date: Fri, 26 Apr 2024 13:42:54 +0300 Subject: [PATCH 39/49] [makefile] Drop old `test-scalability` make target. (#2078) --- Makefile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Makefile b/Makefile index 5e07776412..9f22c73847 100644 --- a/Makefile +++ b/Makefile @@ -258,10 +258,6 @@ test-performance-scheduler: gotestsum run-performance-scheduler --cmdStats=$(SCALABILITY_RUN_DIR)/minimalkueue.stats.yaml \ --range=$(PROJECT_DIR)/test/performance/scheduler/default_rangespec.yaml -# drop this once is no longer used by CI -.PHONY: test-scalability -test-scalability: test-performance-scheduler - .PHONY: run-performance-scheduler-in-cluster run-performance-scheduler-in-cluster: envtest performance-scheduler-runner mkdir -p $(ARTIFACTS)/run-performance-scheduler-in-cluster From 9d77f2fe59729435e05c1be29cb6373a2eabfa26 Mon Sep 17 00:00:00 2001 From: vladikkuzn <51460778+vladikkuzn@users.noreply.github.com> Date: Fri, 26 Apr 2024 18:03:32 +0300 Subject: [PATCH 40/49] Include a Failed condition in Workloads (#2026) * * Replace workload finished reason with succeeded and failed reasons * * Replace workload finished interface `Finished() (metav1.Condition, bool)` with `Finished() (reason, message string, finished bool)` * * Update comments for WorkloadFinishedReason in workload_types.go * Update Finished() (message string, success, finished bool) to return success instead of reason * * Copy message from underlying job condition --- apis/kueue/v1beta1/workload_types.go | 14 +++++++++ pkg/controller/core/workload_controller.go | 2 +- .../core/workload_controller_test.go | 2 +- pkg/controller/jobframework/interface.go | 3 +- pkg/controller/jobframework/reconciler.go | 14 +++++---- pkg/controller/jobs/job/job_controller.go | 17 ++++------- .../jobs/job/job_controller_test.go | 14 +++++++-- .../jobs/jobset/jobset_controller.go | 24 ++++----------- .../kubeflowjob/kubeflowjob_controller.go | 23 ++++++-------- .../jobs/mpijob/mpijob_controller.go | 18 ++++------- pkg/controller/jobs/pod/pod_controller.go | 30 +++++++++---------- .../jobs/pod/pod_controller_test.go | 22 +++++++++----- .../jobs/raycluster/raycluster_controller.go | 13 ++++---- .../jobs/rayjob/rayjob_controller.go | 16 +++++----- pkg/util/testingjobs/pod/wrappers.go | 6 ++++ test/e2e/multikueue/e2e_test.go | 13 ++++---- test/e2e/singlecluster/e2e_test.go | 2 +- test/e2e/singlecluster/jobset_test.go | 7 +++-- .../core/workload_controller_test.go | 4 +-- .../jobs/job/job_controller_test.go | 4 +-- .../integration/multikueue/multikueue_test.go | 11 +++---- 21 files changed, 131 insertions(+), 128 deletions(-) diff --git a/apis/kueue/v1beta1/workload_types.go b/apis/kueue/v1beta1/workload_types.go index fd96a53704..74470ec182 100644 --- a/apis/kueue/v1beta1/workload_types.go +++ b/apis/kueue/v1beta1/workload_types.go @@ -329,6 +329,20 @@ const ( WorkloadEvictedByDeactivation = "InactiveWorkload" ) +const ( + // WorkloadFinishedReasonSucceeded indicates that the workload's job finished successfully. + WorkloadFinishedReasonSucceeded = "Succeeded" + + // WorkloadFinishedReasonFailed indicates that the workload's job finished with an error. + WorkloadFinishedReasonFailed = "Failed" + + // WorkloadFinishedReasonAdmissionChecksRejected indicates that the workload was rejected by admission checks. + WorkloadFinishedReasonAdmissionChecksRejected = "AdmissionChecksRejected" + + // WorkloadFinishedReasonOutOfSync indicates that the prebuilt workload is not in sync with its parent job. + WorkloadFinishedReasonOutOfSync = "OutOfSync" +) + // +genclient // +kubebuilder:object:root=true // +kubebuilder:storageversion diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go index f7d09da624..7cf592752c 100644 --- a/pkg/controller/core/workload_controller.go +++ b/pkg/controller/core/workload_controller.go @@ -207,7 +207,7 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c log.V(3).Info("Workload has Rejected admission checks, Finish with failure") err := workload.UpdateStatus(ctx, r.client, &wl, kueue.WorkloadFinished, metav1.ConditionTrue, - "AdmissionChecksRejected", + kueue.WorkloadFinishedReasonAdmissionChecksRejected, fmt.Sprintf("Admission checks %v are rejected", rejectedChecks), constants.KueueName) if err == nil { diff --git a/pkg/controller/core/workload_controller_test.go b/pkg/controller/core/workload_controller_test.go index e776911b78..cd28f01638 100644 --- a/pkg/controller/core/workload_controller_test.go +++ b/pkg/controller/core/workload_controller_test.go @@ -466,7 +466,7 @@ func TestReconcile(t *testing.T) { Condition(metav1.Condition{ Type: "Finished", Status: "True", - Reason: "AdmissionChecksRejected", + Reason: kueue.WorkloadFinishedReasonAdmissionChecksRejected, Message: "Admission checks [check] are rejected", }). Obj(), diff --git a/pkg/controller/jobframework/interface.go b/pkg/controller/jobframework/interface.go index f264e7db5a..2408a054ea 100644 --- a/pkg/controller/jobframework/interface.go +++ b/pkg/controller/jobframework/interface.go @@ -16,7 +16,6 @@ package jobframework import ( "context" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" @@ -44,7 +43,7 @@ type GenericJob interface { // Finished means whether the job is completed/failed or not, // condition represents the workload finished condition. // Observed generation of the workload is set by the jobframework. - Finished() (condition metav1.Condition, finished bool) + Finished() (message string, success, finished bool) // PodSets will build workload podSets corresponding to the job. PodSets() []kueue.PodSet // IsActive returns true if there are any running pods. diff --git a/pkg/controller/jobframework/reconciler.go b/pkg/controller/jobframework/reconciler.go index 2e412cde88..c18d1604de 100644 --- a/pkg/controller/jobframework/reconciler.go +++ b/pkg/controller/jobframework/reconciler.go @@ -256,7 +256,7 @@ func (r *JobReconciler) ReconcileGenericJob(ctx context.Context, req ctrl.Reques // if this is a non-standalone job, suspend the job if its parent workload is not found or not admitted. if !isStandaloneJob { - _, finished := job.Finished() + _, _, finished := job.Finished() if !finished && !job.IsSuspended() { if parentWorkload, err := r.getParentWorkload(ctx, job, object); err != nil { log.Error(err, "couldn't get the parent job workload") @@ -307,9 +307,13 @@ func (r *JobReconciler) ReconcileGenericJob(ctx context.Context, req ctrl.Reques } // 2. handle job is finished. - if condition, finished := job.Finished(); finished { + if message, success, finished := job.Finished(); finished { if wl != nil && !apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadFinished) { - err := workload.UpdateStatus(ctx, r.client, wl, condition.Type, condition.Status, condition.Reason, condition.Message, constants.JobControllerName) + reason := kueue.WorkloadFinishedReasonSucceeded + if !success { + reason = kueue.WorkloadFinishedReasonFailed + } + err := workload.UpdateStatus(ctx, r.client, wl, kueue.WorkloadFinished, metav1.ConditionTrue, reason, message, constants.JobControllerName) if err != nil && !apierrors.IsNotFound(err) { return ctrl.Result{}, err } @@ -541,7 +545,7 @@ func (r *JobReconciler) ensureOneWorkload(ctx context.Context, job GenericJob, o w = toDelete[0] } - if _, finished := job.Finished(); !finished { + if _, _, finished := job.Finished(); !finished { var msg string if w == nil { msg = "Missing Workload; unable to restore pod templates" @@ -636,7 +640,7 @@ func (r *JobReconciler) ensurePrebuiltWorkloadInSync(ctx context.Context, wl *ku err := workload.UpdateStatus(ctx, r.client, wl, kueue.WorkloadFinished, metav1.ConditionTrue, - "OutOfSync", + kueue.WorkloadFinishedReasonOutOfSync, "The prebuilt workload is out of sync with its user job", constants.JobControllerName) return false, err diff --git a/pkg/controller/jobs/job/job_controller.go b/pkg/controller/jobs/job/job_controller.go index b8a0d1ab53..5c05d70300 100644 --- a/pkg/controller/jobs/job/job_controller.go +++ b/pkg/controller/jobs/job/job_controller.go @@ -277,30 +277,23 @@ func (j *Job) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool { return changed } -func (j *Job) Finished() (metav1.Condition, bool) { +func (j *Job) Finished() (message string, success, finished bool) { var conditionType batchv1.JobConditionType - var finished bool - for _, c := range j.Status.Conditions { if (c.Type == batchv1.JobComplete || c.Type == batchv1.JobFailed) && c.Status == corev1.ConditionTrue { conditionType = c.Type finished = true + message = c.Message break } } - condition := metav1.Condition{ - Type: kueue.WorkloadFinished, - Status: metav1.ConditionTrue, - Reason: "JobFinished", - Message: "Job finished successfully", - ObservedGeneration: j.Generation, - } + success = true if conditionType == batchv1.JobFailed { - condition.Message = "Job failed" + success = false } - return condition, finished + return message, success, finished } func (j *Job) PodsReady() bool { diff --git a/pkg/controller/jobs/job/job_controller_test.go b/pkg/controller/jobs/job/job_controller_test.go index a6eee4360e..b8ae8ff329 100644 --- a/pkg/controller/jobs/job/job_controller_test.go +++ b/pkg/controller/jobs/job/job_controller_test.go @@ -1700,7 +1700,11 @@ func TestReconciler(t *testing.T) { }, "when job completes, workload is marked as finished": { job: *baseJobWrapper.Clone(). - Condition(batchv1.JobCondition{Type: batchv1.JobComplete, Status: corev1.ConditionTrue}). + Condition(batchv1.JobCondition{ + Type: batchv1.JobComplete, + Status: corev1.ConditionTrue, + Message: "Job finished successfully", + }). Obj(), workloads: []kueue.Workload{ *baseWorkloadWrapper.Clone(). @@ -1709,7 +1713,11 @@ func TestReconciler(t *testing.T) { Obj(), }, wantJob: *baseJobWrapper.Clone(). - Condition(batchv1.JobCondition{Type: batchv1.JobComplete, Status: corev1.ConditionTrue}). + Condition(batchv1.JobCondition{ + Type: batchv1.JobComplete, + Status: corev1.ConditionTrue, + Message: "Job finished successfully", + }). Obj(), wantWorkloads: []kueue.Workload{ *baseWorkloadWrapper.Clone(). @@ -1717,7 +1725,7 @@ func TestReconciler(t *testing.T) { Condition(metav1.Condition{ Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue, - Reason: "JobFinished", + Reason: kueue.WorkloadFinishedReasonSucceeded, Message: "Job finished successfully", ObservedGeneration: 1, }). diff --git a/pkg/controller/jobs/jobset/jobset_controller.go b/pkg/controller/jobs/jobset/jobset_controller.go index 6011ad56b7..959fd8c9cd 100644 --- a/pkg/controller/jobs/jobset/jobset_controller.go +++ b/pkg/controller/jobs/jobset/jobset_controller.go @@ -146,26 +146,14 @@ func (j *JobSet) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool { return changed } -func (j *JobSet) Finished() (metav1.Condition, bool) { - if apimeta.IsStatusConditionTrue(j.Status.Conditions, string(jobsetapi.JobSetCompleted)) { - condition := metav1.Condition{ - Type: kueue.WorkloadFinished, - Status: metav1.ConditionTrue, - Reason: "JobSetFinished", - Message: "JobSet finished successfully", - } - return condition, true +func (j *JobSet) Finished() (message string, success, finished bool) { + if c := apimeta.FindStatusCondition(j.Status.Conditions, string(jobsetapi.JobSetCompleted)); c != nil && c.Status == metav1.ConditionTrue { + return c.Message, true, true } - if apimeta.IsStatusConditionTrue(j.Status.Conditions, string(jobsetapi.JobSetFailed)) { - condition := metav1.Condition{ - Type: kueue.WorkloadFinished, - Status: metav1.ConditionTrue, - Reason: "JobSetFinished", - Message: "JobSet failed", - } - return condition, true + if c := apimeta.FindStatusCondition(j.Status.Conditions, string(jobsetapi.JobSetFailed)); c != nil && c.Status == metav1.ConditionTrue { + return c.Message, false, true } - return metav1.Condition{}, false + return message, success, false } func (j *JobSet) PodsReady() bool { diff --git a/pkg/controller/jobs/kubeflow/kubeflowjob/kubeflowjob_controller.go b/pkg/controller/jobs/kubeflow/kubeflowjob/kubeflowjob_controller.go index fd6bdcf1df..94cad2ee78 100644 --- a/pkg/controller/jobs/kubeflow/kubeflowjob/kubeflowjob_controller.go +++ b/pkg/controller/jobs/kubeflow/kubeflowjob/kubeflowjob_controller.go @@ -21,7 +21,6 @@ import ( kftraining "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" @@ -82,30 +81,26 @@ func (j *KubeflowJob) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool { return changed } -func (j *KubeflowJob) Finished() (metav1.Condition, bool) { - var conditionType kftraining.JobConditionType - var finished bool +func (j *KubeflowJob) Finished() (message string, success, finished bool) { if j.KFJobControl.JobStatus() == nil { - return metav1.Condition{}, false + return message, finished, false } + var conditionType kftraining.JobConditionType for _, c := range j.KFJobControl.JobStatus().Conditions { if (c.Type == kftraining.JobSucceeded || c.Type == kftraining.JobFailed) && c.Status == corev1.ConditionTrue { conditionType = c.Type finished = true + message = c.Message break } } - message := "Job finished successfully" + + success = true if conditionType == kftraining.JobFailed { - message = "Job failed" + success = false } - condition := metav1.Condition{ - Type: kueue.WorkloadFinished, - Status: metav1.ConditionTrue, - Reason: "JobFinished", - Message: message, - } - return condition, finished + + return message, success, finished } func (j *KubeflowJob) PodSets() []kueue.PodSet { diff --git a/pkg/controller/jobs/mpijob/mpijob_controller.go b/pkg/controller/jobs/mpijob/mpijob_controller.go index acc36b2416..cb5df91772 100644 --- a/pkg/controller/jobs/mpijob/mpijob_controller.go +++ b/pkg/controller/jobs/mpijob/mpijob_controller.go @@ -148,29 +148,23 @@ func (j *MPIJob) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool { return changed } -func (j *MPIJob) Finished() (metav1.Condition, bool) { +func (j *MPIJob) Finished() (message string, success, finished bool) { var conditionType kubeflow.JobConditionType - var finished bool for _, c := range j.Status.Conditions { if (c.Type == kubeflow.JobSucceeded || c.Type == kubeflow.JobFailed) && c.Status == corev1.ConditionTrue { conditionType = c.Type finished = true + message = c.Message break } } - message := "Job finished successfully" + success = true if conditionType == kubeflow.JobFailed { - message = "Job failed" + success = false } - condition := metav1.Condition{ - Type: kueue.WorkloadFinished, - Status: metav1.ConditionTrue, - Reason: "JobFinished", - Message: message, - // ObservedGeneration is added via Update status by the job framework - } - return condition, finished + + return message, success, finished } // PriorityClass calculates the priorityClass name needed for workload according to the following priorities: diff --git a/pkg/controller/jobs/pod/pod_controller.go b/pkg/controller/jobs/pod/pod_controller.go index eecf864c69..764e6402c7 100644 --- a/pkg/controller/jobs/pod/pod_controller.go +++ b/pkg/controller/jobs/pod/pod_controller.go @@ -316,25 +316,24 @@ func (p *Pod) RestorePodSetsInfo(_ []podset.PodSetInfo) bool { // Finished means whether the job is completed/failed or not, // condition represents the workload finished condition. -func (p *Pod) Finished() (metav1.Condition, bool) { - finished := true - - condition := metav1.Condition{ - Type: kueue.WorkloadFinished, - Status: metav1.ConditionTrue, - Reason: "JobFinished", - Message: "Job finished successfully", - } +func (p *Pod) Finished() (message string, success, finished bool) { + finished = true + success = true if !p.isGroup { ph := p.pod.Status.Phase finished = ph == corev1.PodSucceeded || ph == corev1.PodFailed if ph == corev1.PodFailed { - condition.Message = "Job failed" + message = p.pod.Status.Message + success = false + } + + if ph == corev1.PodSucceeded { + message = p.pod.Status.Message } - return condition, finished + return message, success, finished } isActive := false succeededCount := 0 @@ -342,7 +341,8 @@ func (p *Pod) Finished() (metav1.Condition, bool) { groupTotalCount, err := p.groupTotalCount() if err != nil { ctrl.Log.V(2).Error(err, "failed to check if pod group is finished") - return metav1.Condition{}, false + message = "failed to check if pod group is finished" + return message, success, false } for _, pod := range p.list.Items { if pod.Status.Phase == corev1.PodSucceeded { @@ -357,12 +357,12 @@ func (p *Pod) Finished() (metav1.Condition, bool) { unretriableGroup := p.isUnretriableGroup() if succeededCount == groupTotalCount || (!isActive && unretriableGroup) { - condition.Message = fmt.Sprintf("Pods succeeded: %d/%d.", succeededCount, groupTotalCount) + message = fmt.Sprintf("Pods succeeded: %d/%d.", succeededCount, groupTotalCount) } else { - return metav1.Condition{}, false + return message, success, false } - return condition, finished + return message, success, finished } // PodSets will build workload podSets corresponding to the job. diff --git a/pkg/controller/jobs/pod/pod_controller_test.go b/pkg/controller/jobs/pod/pod_controller_test.go index ea08fd4174..5214f07b06 100644 --- a/pkg/controller/jobs/pod/pod_controller_test.go +++ b/pkg/controller/jobs/pod/pod_controller_test.go @@ -364,11 +364,13 @@ func TestReconciler(t *testing.T) { Label("kueue.x-k8s.io/managed", "true"). KueueFinalizer(). StatusPhase(corev1.PodSucceeded). + StatusMessage("Job finished successfully"). Obj()}, wantPods: []corev1.Pod{*basePodWrapper. Clone(). Label("kueue.x-k8s.io/managed", "true"). StatusPhase(corev1.PodSucceeded). + StatusMessage("Job finished successfully"). Obj()}, workloads: []kueue.Workload{ *utiltesting.MakeWorkload("unit-test", "ns").Finalizers(kueue.ResourceInUseFinalizerName). @@ -387,7 +389,7 @@ func TestReconciler(t *testing.T) { Condition(metav1.Condition{ Type: "Finished", Status: "True", - Reason: "JobFinished", + Reason: kueue.WorkloadFinishedReasonSucceeded, Message: "Job finished successfully", }). Obj(), @@ -416,11 +418,13 @@ func TestReconciler(t *testing.T) { Clone(). Label("kueue.x-k8s.io/managed", "true"). StatusPhase(corev1.PodSucceeded). + StatusMessage("Job finished successfully"). Obj()}, wantPods: []corev1.Pod{*basePodWrapper. Clone(). Label("kueue.x-k8s.io/managed", "true"). StatusPhase(corev1.PodSucceeded). + StatusMessage("Job finished successfully"). Obj()}, workloads: []kueue.Workload{ *utiltesting.MakeWorkload("unit-test", "ns").Finalizers(kueue.ResourceInUseFinalizerName). @@ -439,7 +443,7 @@ func TestReconciler(t *testing.T) { Condition(metav1.Condition{ Type: "Finished", Status: "True", - Reason: "JobFinished", + Reason: kueue.WorkloadFinishedReasonSucceeded, Message: "Job finished successfully", }). Obj(), @@ -906,7 +910,7 @@ func TestReconciler(t *testing.T) { Condition(metav1.Condition{ Type: "Finished", Status: "True", - Reason: "JobFinished", + Reason: kueue.WorkloadFinishedReasonSucceeded, Message: "Pods succeeded: 2/2.", }). Obj(), @@ -1074,7 +1078,7 @@ func TestReconciler(t *testing.T) { Condition(metav1.Condition{ Type: "Finished", Status: "True", - Reason: "JobFinished", + Reason: kueue.WorkloadFinishedReasonSucceeded, Message: "Pods succeeded: 1/2. Pods failed: 1/2", }). Obj(), @@ -1093,7 +1097,7 @@ func TestReconciler(t *testing.T) { Condition(metav1.Condition{ Type: "Finished", Status: "True", - Reason: "JobFinished", + Reason: kueue.WorkloadFinishedReasonSucceeded, Message: "Pods succeeded: 1/2. Pods failed: 1/2", }). Obj(), @@ -1497,7 +1501,7 @@ func TestReconciler(t *testing.T) { Condition(metav1.Condition{ Type: "Finished", Status: "True", - Reason: "JobFinished", + Reason: kueue.WorkloadFinishedReasonSucceeded, Message: "Pods succeeded: 2/2.", }). Obj(), @@ -2117,7 +2121,7 @@ func TestReconciler(t *testing.T) { metav1.Condition{ Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue, - Reason: "JobFinished", + Reason: kueue.WorkloadFinishedReasonSucceeded, Message: "Pods succeeded: 1/3.", }, ). @@ -3614,6 +3618,7 @@ func TestReconciler_ErrorFinalizingPod(t *testing.T) { Label("kueue.x-k8s.io/managed", "true"). KueueFinalizer(). StatusPhase(corev1.PodSucceeded). + StatusMessage("Job finished successfully"). Obj() wl := *utiltesting.MakeWorkload("unit-test", "ns").Finalizers(kueue.ResourceInUseFinalizerName). @@ -3684,6 +3689,7 @@ func TestReconciler_ErrorFinalizingPod(t *testing.T) { Clone(). Label("kueue.x-k8s.io/managed", "true"). StatusPhase(corev1.PodSucceeded). + StatusMessage("Job finished successfully"). Obj() if diff := cmp.Diff(wantPod, gotPod, podCmpOpts...); diff != "" { t.Errorf("Pod after second reconcile (-want,+got):\n%s", diff) @@ -3704,7 +3710,7 @@ func TestReconciler_ErrorFinalizingPod(t *testing.T) { metav1.Condition{ Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue, - Reason: "JobFinished", + Reason: kueue.WorkloadFinishedReasonSucceeded, Message: "Job finished successfully", }, ). diff --git a/pkg/controller/jobs/raycluster/raycluster_controller.go b/pkg/controller/jobs/raycluster/raycluster_controller.go index 2a44cc8e17..a3f74f1abf 100644 --- a/pkg/controller/jobs/raycluster/raycluster_controller.go +++ b/pkg/controller/jobs/raycluster/raycluster_controller.go @@ -160,16 +160,13 @@ func (j *RayCluster) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool { return changed } -func (j *RayCluster) Finished() (metav1.Condition, bool) { - condition := metav1.Condition{ - Type: kueue.WorkloadFinished, - Status: metav1.ConditionFalse, - Reason: string(j.Status.State), - Message: j.Status.Reason, - ObservedGeneration: j.Generation, +func (j *RayCluster) Finished() (message string, success, finished bool) { + success = true + if j.Status.State == rayv1.Failed { + success = false } // Technically a RayCluster is never "finished" - return condition, false + return j.Status.Reason, success, false } func (j *RayCluster) PodsReady() bool { diff --git a/pkg/controller/jobs/rayjob/rayjob_controller.go b/pkg/controller/jobs/rayjob/rayjob_controller.go index 7037f0fb94..cce1dc0cb8 100644 --- a/pkg/controller/jobs/rayjob/rayjob_controller.go +++ b/pkg/controller/jobs/rayjob/rayjob_controller.go @@ -160,16 +160,14 @@ func (j *RayJob) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool { return changed } -func (j *RayJob) Finished() (metav1.Condition, bool) { - condition := metav1.Condition{ - Type: kueue.WorkloadFinished, - Status: metav1.ConditionTrue, - Reason: string(j.Status.JobStatus), - Message: j.Status.Message, - // ObservedGeneration is added via Update status by the job framework +func (j *RayJob) Finished() (message string, success, finished bool) { + success = true + if j.Status.JobStatus == rayv1.JobStatusFailed { + success = false } - - return condition, j.Status.JobStatus == rayv1.JobStatusFailed || j.Status.JobStatus == rayv1.JobStatusSucceeded + message = j.Status.Message + finished = j.Status.JobStatus == rayv1.JobStatusFailed || j.Status.JobStatus == rayv1.JobStatusSucceeded + return message, success, finished } func (j *RayJob) PodsReady() bool { diff --git a/pkg/util/testingjobs/pod/wrappers.go b/pkg/util/testingjobs/pod/wrappers.go index a6e95ff337..4471d6d7d1 100644 --- a/pkg/util/testingjobs/pod/wrappers.go +++ b/pkg/util/testingjobs/pod/wrappers.go @@ -204,6 +204,12 @@ func (p *PodWrapper) StatusPhase(ph corev1.PodPhase) *PodWrapper { return p } +// StatusMessage updates status message of the Pod. +func (p *PodWrapper) StatusMessage(msg string) *PodWrapper { + p.Pod.Status.Message = msg + return p +} + // CreationTimestamp sets a creation timestamp for the pod object func (p *PodWrapper) CreationTimestamp(t time.Time) *PodWrapper { timestamp := metav1.NewTime(t).Rfc3339Copy() diff --git a/test/e2e/multikueue/e2e_test.go b/test/e2e/multikueue/e2e_test.go index e24fc09f03..550c030a12 100644 --- a/test/e2e/multikueue/e2e_test.go +++ b/test/e2e/multikueue/e2e_test.go @@ -218,11 +218,10 @@ var _ = ginkgo.Describe("MultiKueue", func() { g.Expect(k8sManagerClient.Get(ctx, wlLookupKey, createdLeaderWorkload)).To(gomega.Succeed()) g.Expect(apimeta.FindStatusCondition(createdLeaderWorkload.Status.Conditions, kueue.WorkloadFinished)).To(gomega.BeComparableTo(&metav1.Condition{ - Type: kueue.WorkloadFinished, - Status: metav1.ConditionTrue, - Reason: "JobFinished", - Message: `Job finished successfully`, - }, util.IgnoreConditionTimestampsAndObservedGeneration)) + Type: kueue.WorkloadFinished, + Status: metav1.ConditionTrue, + Reason: kueue.WorkloadFinishedReasonSucceeded, + }, util.IgnoreConditionMessage, util.IgnoreConditionTimestampsAndObservedGeneration)) }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) }) @@ -314,8 +313,8 @@ var _ = ginkgo.Describe("MultiKueue", func() { g.Expect(apimeta.FindStatusCondition(createdLeaderWorkload.Status.Conditions, kueue.WorkloadFinished)).To(gomega.BeComparableTo(&metav1.Condition{ Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue, - Reason: "JobSetFinished", - Message: "JobSet finished successfully", + Reason: kueue.WorkloadFinishedReasonSucceeded, + Message: "jobset completed successfully", }, util.IgnoreConditionTimestampsAndObservedGeneration)) }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) }) diff --git a/test/e2e/singlecluster/e2e_test.go b/test/e2e/singlecluster/e2e_test.go index 26af1cfc8b..029a53551e 100644 --- a/test/e2e/singlecluster/e2e_test.go +++ b/test/e2e/singlecluster/e2e_test.go @@ -207,7 +207,7 @@ var _ = ginkgo.Describe("Kueue", func() { gomega.BeComparableTo(metav1.Condition{ Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue, - Reason: "JobFinished", + Reason: kueue.WorkloadFinishedReasonFailed, }, util.IgnoreConditionMessage, util.IgnoreConditionTimestampsAndObservedGeneration))) }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) }) diff --git a/test/e2e/singlecluster/jobset_test.go b/test/e2e/singlecluster/jobset_test.go index aed06ee6fd..59197fa358 100644 --- a/test/e2e/singlecluster/jobset_test.go +++ b/test/e2e/singlecluster/jobset_test.go @@ -23,6 +23,7 @@ import ( apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/jobset/pkg/constants" kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" workloadjobset "sigs.k8s.io/kueue/pkg/controller/jobs/jobset" @@ -33,7 +34,7 @@ import ( // +kubebuilder:docs-gen:collapse=Imports -var _ = ginkgo.Describe("Kueue", func() { +var _ = ginkgo.Describe("Jobset", func() { var ns *corev1.Namespace ginkgo.BeforeEach(func() { @@ -104,8 +105,8 @@ var _ = ginkgo.Describe("Kueue", func() { g.Expect(apimeta.FindStatusCondition(createdLeaderWorkload.Status.Conditions, kueue.WorkloadFinished)).To(gomega.BeComparableTo(&metav1.Condition{ Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue, - Reason: "JobSetFinished", - Message: "JobSet finished successfully", + Reason: kueue.WorkloadFinishedReasonSucceeded, + Message: constants.AllJobsCompletedMessage, }, util.IgnoreConditionTimestampsAndObservedGeneration)) }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) }) diff --git a/test/integration/controller/core/workload_controller_test.go b/test/integration/controller/core/workload_controller_test.go index e8cdf361b1..046efb6226 100644 --- a/test/integration/controller/core/workload_controller_test.go +++ b/test/integration/controller/core/workload_controller_test.go @@ -291,7 +291,7 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn }, util.Timeout, util.Interval).Should(gomega.BeComparableTo(&metav1.Condition{ Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue, - Reason: "AdmissionChecksRejected", + Reason: kueue.WorkloadFinishedReasonAdmissionChecksRejected, Message: "Admission checks [check1] are rejected", }, util.IgnoreConditionTimestampsAndObservedGeneration)) @@ -390,7 +390,7 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn gomega.BeComparableTo(metav1.Condition{ Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue, - Reason: "AdmissionChecksRejected", + Reason: kueue.WorkloadFinishedReasonAdmissionChecksRejected, Message: "Admission checks [check1] are rejected", }, util.IgnoreConditionTimestampsAndObservedGeneration), gomega.BeComparableTo(metav1.Condition{ diff --git a/test/integration/controller/jobs/job/job_controller_test.go b/test/integration/controller/jobs/job/job_controller_test.go index bbef383ba8..417a902da0 100644 --- a/test/integration/controller/jobs/job/job_controller_test.go +++ b/test/integration/controller/jobs/job/job_controller_test.go @@ -504,7 +504,7 @@ var _ = ginkgo.Describe("Job controller", ginkgo.Ordered, ginkgo.ContinueOnFailu LastProbeTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "ByTest", - Message: "by test", + Message: "Job finished successfully", }, } g.Expect(k8sClient.Status().Update(ctx, &createdJob)).To(gomega.Succeed()) @@ -519,7 +519,7 @@ var _ = ginkgo.Describe("Job controller", ginkgo.Ordered, ginkgo.ContinueOnFailu gomega.BeComparableTo(metav1.Condition{ Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue, - Reason: "JobFinished", + Reason: kueue.WorkloadFinishedReasonSucceeded, Message: "Job finished successfully", }, util.IgnoreConditionTimestampsAndObservedGeneration))) }, util.Timeout, util.Interval).Should(gomega.Succeed()) diff --git a/test/integration/multikueue/multikueue_test.go b/test/integration/multikueue/multikueue_test.go index 980620202f..9e558543d2 100644 --- a/test/integration/multikueue/multikueue_test.go +++ b/test/integration/multikueue/multikueue_test.go @@ -374,6 +374,7 @@ var _ = ginkgo.Describe("Multikueue", func() { Status: corev1.ConditionTrue, LastProbeTime: metav1.Now(), LastTransitionTime: metav1.Now(), + Message: "Job finished successfully", }) g.Expect(worker1TestCluster.client.Status().Update(worker1TestCluster.ctx, &createdJob)).To(gomega.Succeed()) }, util.Timeout, util.Interval).Should(gomega.Succeed()) @@ -384,8 +385,8 @@ var _ = ginkgo.Describe("Multikueue", func() { g.Expect(apimeta.FindStatusCondition(createdWorkload.Status.Conditions, kueue.WorkloadFinished)).To(gomega.BeComparableTo(&metav1.Condition{ Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue, - Reason: "JobFinished", - Message: `Job finished successfully`, + Reason: kueue.WorkloadFinishedReasonSucceeded, + Message: "Job finished successfully", }, util.IgnoreConditionTimestampsAndObservedGeneration)) }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) @@ -495,7 +496,7 @@ var _ = ginkgo.Describe("Multikueue", func() { Type: string(jobset.JobSetCompleted), Status: metav1.ConditionTrue, Reason: "ByTest", - Message: "by test", + Message: "JobSet finished successfully", }) g.Expect(worker2TestCluster.client.Status().Update(worker2TestCluster.ctx, &createdJobSet)).To(gomega.Succeed()) }, util.Timeout, util.Interval).Should(gomega.Succeed()) @@ -506,8 +507,8 @@ var _ = ginkgo.Describe("Multikueue", func() { g.Expect(apimeta.FindStatusCondition(createdWorkload.Status.Conditions, kueue.WorkloadFinished)).To(gomega.BeComparableTo(&metav1.Condition{ Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue, - Reason: "JobSetFinished", - Message: `JobSet finished successfully`, + Reason: kueue.WorkloadFinishedReasonSucceeded, + Message: "JobSet finished successfully", }, util.IgnoreConditionTimestampsAndObservedGeneration)) }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) From 0cdb8fda4bdf07d6a702cf8160acd71e2a07053b Mon Sep 17 00:00:00 2001 From: Traian Schiau <55734665+trasc@users.noreply.github.com> Date: Fri, 26 Apr 2024 18:03:38 +0300 Subject: [PATCH 41/49] [sclability] Relax the expectations. (#2067) * [sclability] Relax the expectations. - Extend the admission time for large workloads - Only check the average mCPU usage of minimalkueue (not sys and user times) - Extend the runner's timeout to 8 min * Second iteration: - Revert the timeout extension, was not needed. - Bring back the wallMs check. - Extend the medium WL admission time. --- .../scheduler/checker/checker_test.go | 11 ++++------- .../scheduler/default_rangespec.yaml | 19 ++++++++----------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/test/performance/scheduler/checker/checker_test.go b/test/performance/scheduler/checker/checker_test.go index 86feffc9f3..d870170cc5 100644 --- a/test/performance/scheduler/checker/checker_test.go +++ b/test/performance/scheduler/checker/checker_test.go @@ -36,8 +36,7 @@ var ( type RangeSpec struct { Cmd struct { MaxWallMs int64 `json:"maxWallMs"` - MaxUserMs int64 `json:"maxUserMs"` - MaxSysMs int64 `json:"maxSysMs"` + MCPU int64 `json:"mCPU"` Maxrss uint64 `json:"maxrss"` } `json:"cmd"` ClusterQueueClassesMinUsage map[string]float64 `json:"clusterQueueClassesMinUsage"` @@ -83,11 +82,9 @@ func TestScalability(t *testing.T) { if cmdStats.WallMs > rangeSpec.Cmd.MaxWallMs { t.Errorf("Wall time %dms is greater than maximum expected %dms", cmdStats.WallMs, rangeSpec.Cmd.MaxWallMs) } - if cmdStats.UserMs > rangeSpec.Cmd.MaxUserMs { - t.Errorf("User time %dms is greater than maximum expected %dms", cmdStats.UserMs, rangeSpec.Cmd.MaxUserMs) - } - if cmdStats.SysMs > rangeSpec.Cmd.MaxSysMs { - t.Errorf("Sys time %dms is greater than maximum expected %dms", cmdStats.SysMs, rangeSpec.Cmd.MaxSysMs) + mCPUUsed := (cmdStats.SysMs + cmdStats.UserMs) * 1000 / cmdStats.WallMs + if mCPUUsed > rangeSpec.Cmd.MCPU { + t.Errorf("Average CPU usage %dmCpu is greater than maximum expected %dmCPU", mCPUUsed, rangeSpec.Cmd.MCPU) } if cmdStats.Maxrss > int64(rangeSpec.Cmd.Maxrss) { t.Errorf("Maxrss %dKib is greater than maximum expected %dKib", cmdStats.Maxrss, rangeSpec.Cmd.Maxrss) diff --git a/test/performance/scheduler/default_rangespec.yaml b/test/performance/scheduler/default_rangespec.yaml index 018c9472c7..ae8776f32e 100644 --- a/test/performance/scheduler/default_rangespec.yaml +++ b/test/performance/scheduler/default_rangespec.yaml @@ -5,14 +5,11 @@ # - #1782772615836864512 # - #1782775995984515072 cmd: - # Average value 351116.4 (+/- 0.9%), setting at +5% - maxWallMs: 368_000 + # Average value 351116.4 (+/- 0.9%), setting at +20% + maxWallMs: 425_000 - # Average value 111500 (+/- 14%), setting at +20% - maxUserMs: 134_000 - - # Average value 27875 (+/- 16%), setting at +20% - maxSysMs: 34_000 + # Average value 396 mCPU (+/- 8%), setting at +25% + mCPU: 500 # Average value 445012 (+/- 0.3%), setting at +5% maxrss: 468_000 @@ -22,11 +19,11 @@ clusterQueueClassesMinUsage: cq: 56 #% wlClassesMaxAvgTimeToAdmissionMs: - # Average value 6666 (+/- 14%), setting at +20% - large: 8_000 + # Average value 6666 (+/- 14%), setting at +35% + large: 9_000 - # Average value 76768 (+/- 2%), setting at +5% - medium: 81_000 + # Average value 76768 (+/- 2%), setting at +20% + medium: 90_000 # Average value 215468 (+/- 2%), setting at +5% small: 227_000 From 6516ada7611161b9d2193b6e910fb1909f90cbac Mon Sep 17 00:00:00 2001 From: Mykhailo Bobrovskyi Date: Fri, 26 Apr 2024 19:55:57 +0300 Subject: [PATCH 42/49] Using patch/apply on update pods. (#2074) --- pkg/controller/jobs/pod/pod_controller.go | 32 +++++++------- .../jobs/pod/pod_controller_test.go | 10 ++--- pkg/util/client/client.go | 43 +++++++++++++++++++ 3 files changed, 63 insertions(+), 22 deletions(-) create mode 100644 pkg/util/client/client.go diff --git a/pkg/controller/jobs/pod/pod_controller.go b/pkg/controller/jobs/pod/pod_controller.go index 764e6402c7..12ece98947 100644 --- a/pkg/controller/jobs/pod/pod_controller.go +++ b/pkg/controller/jobs/pod/pod_controller.go @@ -52,6 +52,7 @@ import ( "sigs.k8s.io/kueue/pkg/controller/jobframework" "sigs.k8s.io/kueue/pkg/podset" "sigs.k8s.io/kueue/pkg/util/admissioncheck" + clientutil "sigs.k8s.io/kueue/pkg/util/client" "sigs.k8s.io/kueue/pkg/util/kubeversion" "sigs.k8s.io/kueue/pkg/util/maps" "sigs.k8s.io/kueue/pkg/util/parallelize" @@ -240,6 +241,8 @@ func (p *Pod) Run(ctx context.Context, c client.Client, podSetsInfo []podset.Pod return fmt.Errorf("%w: expecting 1 pod set got %d", podset.ErrInvalidPodsetInfo, len(podSetsInfo)) } + podOriginal := p.pod.DeepCopy() + if ungated := ungatePod(&p.pod); !ungated { return nil } @@ -248,8 +251,7 @@ func (p *Pod) Run(ctx context.Context, c client.Client, podSetsInfo []podset.Pod return err } - err := c.Update(ctx, &p.pod) - if err != nil { + if err := clientutil.Patch(ctx, c, podOriginal, &p.pod); err != nil { return err } if recorder != nil { @@ -258,21 +260,14 @@ func (p *Pod) Run(ctx context.Context, c client.Client, podSetsInfo []podset.Pod return nil } - var podsToUngate []*corev1.Pod - - for i := range p.list.Items { + return parallelize.Until(ctx, len(p.list.Items), func(i int) error { pod := &p.list.Items[i] + podOriginal := pod.DeepCopy() + if ungated := ungatePod(pod); !ungated { - continue + return nil } - podsToUngate = append(podsToUngate, pod) - } - if len(podsToUngate) == 0 { - return nil - } - return parallelize.Until(ctx, len(podsToUngate), func(i int) error { - pod := podsToUngate[i] roleHash, err := getRoleHash(*pod) if err != nil { return err @@ -291,7 +286,7 @@ func (p *Pod) Run(ctx context.Context, c client.Client, podSetsInfo []podset.Pod } log.V(3).Info("Starting pod in group", "podInGroup", klog.KObj(pod)) - if err := c.Update(ctx, pod); err != nil { + if err := clientutil.Patch(ctx, c, podOriginal, pod); err != nil { return err } if recorder != nil { @@ -514,8 +509,9 @@ func (p *Pod) Finalize(ctx context.Context, c client.Client) error { return parallelize.Until(ctx, len(podsInGroup.Items), func(i int) error { pod := &podsInGroup.Items[i] + podOriginal := pod.DeepCopy() if controllerutil.RemoveFinalizer(pod, PodFinalizer) { - return c.Update(ctx, pod) + return clientutil.Patch(ctx, c, podOriginal, pod) } return nil }) @@ -823,9 +819,10 @@ func (p *Pod) removeExcessPods(ctx context.Context, c client.Client, r record.Ev // Finalize and delete the active pods created last err := parallelize.Until(ctx, len(extraPods), func(i int) error { pod := extraPods[i] + podOriginal := pod.DeepCopy() if controllerutil.RemoveFinalizer(&pod, PodFinalizer) { log.V(3).Info("Finalizing excess pod in group", "excessPod", klog.KObj(&pod)) - if err := c.Update(ctx, &pod); err != nil { + if err := clientutil.Patch(ctx, c, podOriginal, &pod); err != nil { // We won't observe this cleanup in the event handler. p.excessPodExpectations.ObservedUID(log, p.key, pod.UID) return err @@ -861,9 +858,10 @@ func (p *Pod) finalizePods(ctx context.Context, c client.Client, extraPods []cor err := parallelize.Until(ctx, len(extraPods), func(i int) error { pod := extraPods[i] + podOriginal := pod.DeepCopy() if controllerutil.RemoveFinalizer(&pod, PodFinalizer) { log.V(3).Info("Finalizing pod in group", "Pod", klog.KObj(&pod)) - if err := c.Update(ctx, &pod); err != nil { + if err := clientutil.Patch(ctx, c, podOriginal, &pod); err != nil { // We won't observe this cleanup in the event handler. p.excessPodExpectations.ObservedUID(log, p.key, pod.UID) return err diff --git a/pkg/controller/jobs/pod/pod_controller_test.go b/pkg/controller/jobs/pod/pod_controller_test.go index 5214f07b06..fa829d2de5 100644 --- a/pkg/controller/jobs/pod/pod_controller_test.go +++ b/pkg/controller/jobs/pod/pod_controller_test.go @@ -3634,7 +3634,7 @@ func TestReconciler_ErrorFinalizingPod(t *testing.T) { WithObjects(&pod). WithStatusSubresource(&wl). WithInterceptorFuncs(interceptor.Funcs{ - Update: func(ctx context.Context, client client.WithWatch, obj client.Object, opts ...client.UpdateOption) error { + Patch: func(ctx context.Context, client client.WithWatch, obj client.Object, patch client.Patch, opts ...client.PatchOption) error { _, isPod := obj.(*corev1.Pod) if isPod { defer func() { reqcount++ }() @@ -3644,10 +3644,10 @@ func TestReconciler_ErrorFinalizingPod(t *testing.T) { } if reqcount == 1 { // Exec a regular update operation for the second request - return client.Update(ctx, obj, opts...) + return client.Patch(ctx, obj, patch, opts...) } } - return client.Update(ctx, obj, opts...) + return client.Patch(ctx, obj, patch, opts...) }, SubResourcePatch: utiltesting.TreatSSAAsStrategicMerge, }) @@ -3859,11 +3859,11 @@ func TestReconciler_DeletePodAfterTransientErrorsOnUpdateOrDeleteOps(t *testing. kcBuilder := clientBuilder. WithStatusSubresource(&wl). WithInterceptorFuncs(interceptor.Funcs{ - Update: func(ctx context.Context, client client.WithWatch, obj client.Object, opts ...client.UpdateOption) error { + Patch: func(ctx context.Context, client client.WithWatch, obj client.Object, patch client.Patch, opts ...client.PatchOption) error { if triggerUpdateErr { return connRefusedErrMock } - return client.Update(ctx, obj, opts...) + return client.Patch(ctx, obj, patch, opts...) }, Delete: func(ctx context.Context, client client.WithWatch, obj client.Object, opts ...client.DeleteOption) error { if triggerDeleteErr { diff --git a/pkg/util/client/client.go b/pkg/util/client/client.go new file mode 100644 index 0000000000..d7192e7f84 --- /dev/null +++ b/pkg/util/client/client.go @@ -0,0 +1,43 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package client + +import ( + "context" + + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func CreatePatch(before, after client.Object) (client.Patch, error) { + patchBase := client.MergeFrom(before) + patchBytes, err := patchBase.Data(after) + if err != nil { + return nil, err + } + return client.RawPatch(patchBase.Type(), patchBytes), nil +} + +func Patch(ctx context.Context, c client.Client, before, after client.Object) error { + patch, err := CreatePatch(before, after) + if err != nil { + return err + } + if err = c.Patch(ctx, before, patch); err != nil { + return err + } + return nil +} From 44b3055ad9369e1814782888e46388023a4517c1 Mon Sep 17 00:00:00 2001 From: David Grove Date: Fri, 26 Apr 2024 13:30:41 -0400 Subject: [PATCH 43/49] Add support for registering externally managed frameworks (#2059) * support for externally managed frameworks * address review comments * make generate-apiref to pick up changes in config API * second round of review comments * allow RegisterExternalJobType to be called multiple times with same kind The operation of parsing a kindArg into a GVK is deterministic. Since unit tests run multiple times, flagging these as errors will result in spurious failures in main_test's TestValidateIntegrationsName. * remove unnecessary WithExternalFrameworks option --- apis/config/v1beta1/configuration_types.go | 3 + apis/config/v1beta1/zz_generated.deepcopy.go | 5 + cmd/kueue/main.go | 22 ++++- cmd/kueue/main_test.go | 27 +++++- .../manager/controller_manager_config.yaml | 2 + .../jobframework/integrationmanager.go | 60 ++++++++++-- .../jobframework/integrationmanager_test.go | 94 ++++++++++++++++--- .../en/docs/reference/kueue-config.v1beta1.md | 8 ++ 8 files changed, 195 insertions(+), 26 deletions(-) diff --git a/apis/config/v1beta1/configuration_types.go b/apis/config/v1beta1/configuration_types.go index 3c8d27939e..90c4059e37 100644 --- a/apis/config/v1beta1/configuration_types.go +++ b/apis/config/v1beta1/configuration_types.go @@ -307,6 +307,9 @@ type Integrations struct { // - "kubeflow.org/xgboostjob" // - "pod" Frameworks []string `json:"frameworks,omitempty"` + // List of GroupVersionKinds that are managed for Kueue by external controllers; + // the expected format is `Kind.version.group.com`. + ExternalFrameworks []string `json:"externalFrameworks,omitempty"` // PodOptions defines kueue controller behaviour for pod objects PodOptions *PodIntegrationOptions `json:"podOptions,omitempty"` diff --git a/apis/config/v1beta1/zz_generated.deepcopy.go b/apis/config/v1beta1/zz_generated.deepcopy.go index 1a19f9cc08..8749fcd8af 100644 --- a/apis/config/v1beta1/zz_generated.deepcopy.go +++ b/apis/config/v1beta1/zz_generated.deepcopy.go @@ -240,6 +240,11 @@ func (in *Integrations) DeepCopyInto(out *Integrations) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.ExternalFrameworks != nil { + in, out := &in.ExternalFrameworks, &out.ExternalFrameworks + *out = make([]string, len(*in)) + copy(*out, *in) + } if in.PodOptions != nil { in, out := &in.PodOptions, &out.PodOptions *out = new(PodIntegrationOptions) diff --git a/cmd/kueue/main.go b/cmd/kueue/main.go index 4b0bd26bdd..fb9ef9671b 100644 --- a/cmd/kueue/main.go +++ b/cmd/kueue/main.go @@ -32,7 +32,9 @@ import ( corev1 "k8s.io/api/core/v1" schedulingv1 "k8s.io/api/scheduling/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/validation/field" utilfeature "k8s.io/apiserver/pkg/util/feature" autoscaling "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1" @@ -41,6 +43,7 @@ import ( "k8s.io/client-go/rest" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client/apiutil" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" @@ -363,13 +366,30 @@ func apply(configFile string) (ctrl.Options, configapi.Configuration, error) { if cfg.Integrations != nil { var errorlist field.ErrorList + managedKinds := make(sets.Set[string]) availableFrameworks := jobframework.GetIntegrationsList() path := field.NewPath("integrations", "frameworks") for _, framework := range cfg.Integrations.Frameworks { - if _, found := jobframework.GetIntegration(framework); !found { + if cb, found := jobframework.GetIntegration(framework); !found { errorlist = append(errorlist, field.NotSupported(path, framework, availableFrameworks)) + } else { + if gvk, err := apiutil.GVKForObject(cb.JobType, scheme); err == nil { + managedKinds = managedKinds.Insert(gvk.String()) + } } } + + path = field.NewPath("integrations", "externalFrameworks") + for idx, name := range cfg.Integrations.ExternalFrameworks { + if err := jobframework.RegisterExternalJobType(name); err == nil { + gvk, _ := schema.ParseKindArg(name) + if managedKinds.Has(gvk.String()) { + errorlist = append(errorlist, field.Duplicate(path.Index(idx), name)) + } + managedKinds = managedKinds.Insert(gvk.String()) + } + } + if len(errorlist) > 0 { err := errorlist.ToAggregate() return options, cfg, err diff --git a/cmd/kueue/main_test.go b/cmd/kueue/main_test.go index 25d78524a3..882047e4be 100644 --- a/cmd/kueue/main_test.go +++ b/cmd/kueue/main_test.go @@ -41,6 +41,8 @@ kind: Configuration integrations: frameworks: - batch/job + externalFrameworks: + - "Foo.v1.example.com" `), os.FileMode(0600)); err != nil { t.Fatal(err) } @@ -56,6 +58,19 @@ integrations: t.Fatal(err) } + badIntegrationsConfig2 := filepath.Join(tmpDir, "badIntegrations2.yaml") + if err := os.WriteFile(badIntegrationsConfig2, []byte(` +apiVersion: config.kueue.x-k8s.io/v1beta1 +kind: Configuration +integrations: + frameworks: + - batch/job + externalFrameworks: + - Job.v1.batch +`), os.FileMode(0600)); err != nil { + t.Fatal(err) + } + enableDefaultInternalCertManagement := &config.InternalCertManagement{ Enable: ptr.To(true), WebhookServiceName: ptr.To(config.DefaultWebhookServiceName), @@ -92,7 +107,8 @@ integrations: Integrations: &config.Integrations{ // referencing job.FrameworkName ensures the link of job package // therefore the batch/framework should be registered - Frameworks: []string{job.FrameworkName}, + Frameworks: []string{job.FrameworkName}, + ExternalFrameworks: []string{"Foo.v1.example.com"}, PodOptions: &config.PodIntegrationOptions{ NamespaceSelector: &metav1.LabelSelector{ MatchExpressions: []metav1.LabelSelectorRequirement{ @@ -124,6 +140,11 @@ integrations: configFile: badIntegrationsConfig, wantError: fmt.Errorf("integrations.frameworks: Unsupported value: \"unregistered/jobframework\": supported values: \"batch/job\", \"jobset.x-k8s.io/jobset\", \"kubeflow.org/mpijob\", \"kubeflow.org/mxjob\", \"kubeflow.org/paddlejob\", \"kubeflow.org/pytorchjob\", \"kubeflow.org/tfjob\", \"kubeflow.org/xgboostjob\", \"pod\", \"ray.io/raycluster\", \"ray.io/rayjob\""), }, + { + name: "bad integrations config 2", + configFile: badIntegrationsConfig2, + wantError: fmt.Errorf("integrations.externalFrameworks[0]: Duplicate value: \"Job.v1.batch\""), + }, } for _, tc := range testcases { @@ -137,7 +158,9 @@ integrations: t.Errorf("Unexpected config (-want +got):\n%s", diff) } } else { - if diff := cmp.Diff(tc.wantError.Error(), err.Error()); diff != "" { + if err == nil { + t.Errorf("Failed to get expected error") + } else if diff := cmp.Diff(tc.wantError.Error(), err.Error()); diff != "" { t.Errorf("Unexpected error (-want +got):\n%s", diff) } } diff --git a/config/components/manager/controller_manager_config.yaml b/config/components/manager/controller_manager_config.yaml index e698ecf762..13e58ac5df 100644 --- a/config/components/manager/controller_manager_config.yaml +++ b/config/components/manager/controller_manager_config.yaml @@ -42,6 +42,8 @@ integrations: - "kubeflow.org/tfjob" - "kubeflow.org/xgboostjob" # - "pod" +# externalFrameworks: +# - "Foo.v1.example.com" # podOptions: # namespaceSelector: # matchExpressions: diff --git a/pkg/controller/jobframework/integrationmanager.go b/pkg/controller/jobframework/integrationmanager.go index a535077fb5..ec1af39992 100644 --- a/pkg/controller/jobframework/integrationmanager.go +++ b/pkg/controller/jobframework/integrationmanager.go @@ -24,6 +24,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -33,6 +34,7 @@ import ( var ( errDuplicateFrameworkName = errors.New("duplicate framework name") errMissingMandatoryField = errors.New("mandatory field missing") + errFrameworkNameFormat = errors.New("misformatted external framework name") ) type JobReconcilerInterface interface { @@ -66,8 +68,9 @@ type IntegrationCallbacks struct { } type integrationManager struct { - names []string - integrations map[string]IntegrationCallbacks + names []string + integrations map[string]IntegrationCallbacks + externalIntegrations map[string]runtime.Object } var manager integrationManager @@ -98,6 +101,28 @@ func (m *integrationManager) register(name string, cb IntegrationCallbacks) erro return nil } +func (m *integrationManager) registerExternal(kindArg string) error { + if m.externalIntegrations == nil { + m.externalIntegrations = make(map[string]runtime.Object) + } + + gvk, _ := schema.ParseKindArg(kindArg) + if gvk == nil { + return fmt.Errorf("%w %q", errFrameworkNameFormat, kindArg) + } + apiVersion, kind := gvk.ToAPIVersionAndKind() + jobType := &metav1.PartialObjectMetadata{ + TypeMeta: metav1.TypeMeta{ + APIVersion: apiVersion, + Kind: kind, + }, + } + + m.externalIntegrations[kindArg] = jobType + + return nil +} + func (m *integrationManager) forEach(f func(name string, cb IntegrationCallbacks) error) error { for _, name := range m.names { if err := f(name, m.integrations[name]); err != nil { @@ -112,6 +137,11 @@ func (m *integrationManager) get(name string) (IntegrationCallbacks, bool) { return cb, f } +func (m *integrationManager) getExternal(kindArg string) (runtime.Object, bool) { + jt, f := m.externalIntegrations[kindArg] + return jt, f +} + func (m *integrationManager) getList() []string { ret := make([]string, len(m.names)) copy(ret, m.names) @@ -119,13 +149,19 @@ func (m *integrationManager) getList() []string { return ret } -func (m *integrationManager) getCallbacksForOwner(ownerRef *metav1.OwnerReference) *IntegrationCallbacks { - for _, name := range m.names { - cbs := m.integrations[name] +func (m *integrationManager) getJobTypeForOwner(ownerRef *metav1.OwnerReference) runtime.Object { + for _, cbs := range m.integrations { if cbs.IsManagingObjectsOwner != nil && cbs.IsManagingObjectsOwner(ownerRef) { - return &cbs + return cbs.JobType + } + } + for _, jt := range m.externalIntegrations { + apiVersion, kind := jt.GetObjectKind().GroupVersionKind().ToAPIVersionAndKind() + if ownerRef.Kind == kind && ownerRef.APIVersion == apiVersion { + return jt } } + return nil } @@ -136,6 +172,12 @@ func RegisterIntegration(name string, cb IntegrationCallbacks) error { return manager.register(name, cb) } +// RegisterExternalJobType registers a new externally-managed Kind, returns an error +// if kindArg cannot be parsed as a Kind.version.group. +func RegisterExternalJobType(kindArg string) error { + return manager.registerExternal(kindArg) +} + // ForEachIntegration loops through the registered list of frameworks calling f, // if at any point f returns an error the loop is stopped and that error is returned. func ForEachIntegration(f func(name string, cb IntegrationCallbacks) error) error { @@ -156,14 +198,14 @@ func GetIntegrationsList() []string { // IsOwnerManagedByKueue returns true if the provided owner can be managed by // kueue. func IsOwnerManagedByKueue(owner *metav1.OwnerReference) bool { - return manager.getCallbacksForOwner(owner) != nil + return manager.getJobTypeForOwner(owner) != nil } // GetEmptyOwnerObject returns an empty object of the owner's type, // returns nil if the owner is not manageable by kueue. func GetEmptyOwnerObject(owner *metav1.OwnerReference) client.Object { - if cbs := manager.getCallbacksForOwner(owner); cbs != nil { - return cbs.JobType.DeepCopyObject().(client.Object) + if jt := manager.getJobTypeForOwner(owner); jt != nil { + return jt.DeepCopyObject().(client.Object) } return nil } diff --git a/pkg/controller/jobframework/integrationmanager_test.go b/pkg/controller/jobframework/integrationmanager_test.go index 2a3a3ec397..c3f124599b 100644 --- a/pkg/controller/jobframework/integrationmanager_test.go +++ b/pkg/controller/jobframework/integrationmanager_test.go @@ -24,9 +24,11 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -236,6 +238,58 @@ func compareCallbacks(x, y interface{}) bool { return reflect.ValueOf(xcb.AddToScheme).Pointer() == reflect.ValueOf(ycb.AddToScheme).Pointer() } +func TestRegisterExternal(t *testing.T) { + cases := map[string]struct { + manager *integrationManager + kindArg string + wantError error + wantGVK *schema.GroupVersionKind + }{ + "successful 1": { + manager: &integrationManager{ + names: []string{"oldFramework"}, + integrations: map[string]IntegrationCallbacks{ + "oldFramework": testIntegrationCallbacks, + }, + }, + kindArg: "Job.v1.batch", + wantError: nil, + wantGVK: &schema.GroupVersionKind{Group: "batch", Version: "v1", Kind: "Job"}, + }, + "successful 2": { + manager: &integrationManager{ + externalIntegrations: map[string]runtime.Object{ + "Job.v1.batch": &batchv1.Job{TypeMeta: metav1.TypeMeta{Kind: "Job", APIVersion: "batch/v1"}}, + }, + }, + kindArg: "AppWrapper.v1beta2.workload.codeflare.dev", + wantError: nil, + wantGVK: &schema.GroupVersionKind{Group: "workload.codeflare.dev", Version: "v1beta2", Kind: "AppWrapper"}, + }, + "malformed kind arg": { + manager: &integrationManager{}, + kindArg: "batch/job", + wantError: errFrameworkNameFormat, + wantGVK: nil, + }, + } + + for tcName, tc := range cases { + t.Run(tcName, func(t *testing.T) { + gotError := tc.manager.registerExternal(tc.kindArg) + if diff := cmp.Diff(tc.wantError, gotError, cmpopts.EquateErrors()); diff != "" { + t.Errorf("Unexpected error (-want +got):\n%s", diff) + } + if gotJobType, found := tc.manager.getExternal(tc.kindArg); found { + gvk := gotJobType.GetObjectKind().GroupVersionKind() + if diff := cmp.Diff(tc.wantGVK, &gvk); diff != "" { + t.Errorf("Unexpected jobtypes (-want +got):\n%s", diff) + } + } + }) + } +} + func TestForEach(t *testing.T) { foeEachError := errors.New("test error") cases := map[string]struct { @@ -286,7 +340,7 @@ func TestForEach(t *testing.T) { } } -func TestGetCallbacksForOwner(t *testing.T) { +func TestGetJobTypeForOwner(t *testing.T) { dontManage := IntegrationCallbacks{ NewReconciler: func(client.Client, record.EventRecorder, ...Option) JobReconcilerInterface { panic("not implemented") @@ -297,13 +351,18 @@ func TestGetCallbacksForOwner(t *testing.T) { manageK1 := func() IntegrationCallbacks { ret := dontManage ret.IsManagingObjectsOwner = func(owner *metav1.OwnerReference) bool { return owner.Kind == "K1" } + ret.JobType = &metav1.PartialObjectMetadata{TypeMeta: metav1.TypeMeta{Kind: "K1"}} return ret }() manageK2 := func() IntegrationCallbacks { ret := dontManage ret.IsManagingObjectsOwner = func(owner *metav1.OwnerReference) bool { return owner.Kind == "K2" } + ret.JobType = &metav1.PartialObjectMetadata{TypeMeta: metav1.TypeMeta{Kind: "K2"}} return ret }() + externalK3 := func() runtime.Object { + return &metav1.PartialObjectMetadata{TypeMeta: metav1.TypeMeta{Kind: "K3"}} + }() mgr := integrationManager{ names: []string{"manageK1", "dontManage", "manageK2"}, @@ -312,38 +371,45 @@ func TestGetCallbacksForOwner(t *testing.T) { "manageK1": manageK1, "manageK2": manageK2, }, + externalIntegrations: map[string]runtime.Object{ + "externalK3": externalK3, + }, } cases := map[string]struct { - owner *metav1.OwnerReference - wantCallbacks *IntegrationCallbacks + owner *metav1.OwnerReference + wantJobType runtime.Object }{ "K1": { - owner: &metav1.OwnerReference{Kind: "K1"}, - wantCallbacks: &manageK1, + owner: &metav1.OwnerReference{Kind: "K1"}, + wantJobType: manageK1.JobType, }, "K2": { - owner: &metav1.OwnerReference{Kind: "K2"}, - wantCallbacks: &manageK2, + owner: &metav1.OwnerReference{Kind: "K2"}, + wantJobType: manageK2.JobType, }, "K3": { - owner: &metav1.OwnerReference{Kind: "K3"}, - wantCallbacks: nil, + owner: &metav1.OwnerReference{Kind: "K3"}, + wantJobType: externalK3, + }, + "K4": { + owner: &metav1.OwnerReference{Kind: "K4"}, + wantJobType: nil, }, } for tcName, tc := range cases { t.Run(tcName, func(t *testing.T) { - gotCallbacks := mgr.getCallbacksForOwner(tc.owner) - if tc.wantCallbacks == nil { - if gotCallbacks != nil { + wantJobType := mgr.getJobTypeForOwner(tc.owner) + if tc.wantJobType == nil { + if wantJobType != nil { t.Errorf("This owner should be unmanaged") } } else { - if gotCallbacks == nil { + if wantJobType == nil { t.Errorf("This owner should be managed") } else { - if diff := cmp.Diff(*tc.wantCallbacks, *gotCallbacks, cmp.FilterValues(func(_, _ interface{}) bool { return true }, cmp.Comparer(compareCallbacks))); diff != "" { + if diff := cmp.Diff(tc.wantJobType, wantJobType); diff != "" { t.Errorf("Unexpected callbacks (-want +got):\n%s", diff) } } diff --git a/site/content/en/docs/reference/kueue-config.v1beta1.md b/site/content/en/docs/reference/kueue-config.v1beta1.md index 040c2fecf9..da4d406598 100644 --- a/site/content/en/docs/reference/kueue-config.v1beta1.md +++ b/site/content/en/docs/reference/kueue-config.v1beta1.md @@ -429,6 +429,14 @@ Possible options:

+externalFrameworks [Required]
+[]string + + +

List of GroupVersionKinds that are managed for Kueue by external controllers; +the expected format is Kind.version.group.com.

+ + podOptions [Required]
PodIntegrationOptions From 5401a3b55c15098795d22d0c61ddae40bdc6bf25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irving=20Mondrag=C3=B3n?= Date: Fri, 26 Apr 2024 22:15:59 +0200 Subject: [PATCH 44/49] Add CEL rules to Workload (#2008) * Add CEL rules to Workload * Remove workload defaulter * Remove test cases validated by CEL rules * Update test descriptions * Refactoring integration tests * Restore validateAdmissionUpdate function * Restore defaulting for minCount * Simplify error checks * Update workload cel rules * Restore podSets immutability validation via webhook --- apis/kueue/v1beta1/workload_types.go | 28 +- .../crd/kueue.x-k8s.io_workloads.yaml | 64 +- .../crd/bases/kueue.x-k8s.io_workloads.yaml | 64 +- pkg/webhooks/common.go | 11 - pkg/webhooks/resourceflavor_webhook.go | 25 - pkg/webhooks/resourceflavor_webhook_test.go | 39 - pkg/webhooks/workload_webhook.go | 75 -- pkg/webhooks/workload_webhook_test.go | 287 ------- test/integration/webhook/workload_test.go | 785 +++++++++++++++++- 9 files changed, 893 insertions(+), 485 deletions(-) diff --git a/apis/kueue/v1beta1/workload_types.go b/apis/kueue/v1beta1/workload_types.go index 74470ec182..9ccbcbbeb9 100644 --- a/apis/kueue/v1beta1/workload_types.go +++ b/apis/kueue/v1beta1/workload_types.go @@ -22,6 +22,7 @@ import ( ) // WorkloadSpec defines the desired state of Workload +// +kubebuilder:validation:XValidation:rule="has(self.priorityClassName) ? has(self.priority) : true", message="priority should not be nil when priorityClassName is set" type WorkloadSpec struct { // podSets is a list of sets of homogeneous pods, each described by a Pod spec // and a count. @@ -36,6 +37,8 @@ type WorkloadSpec struct { // queueName is the name of the LocalQueue the Workload is associated with. // queueName cannot be changed while .status.admission is not null. + // +kubebuilder:validation:MaxLength=253 + // +kubebuilder:validation:Pattern="^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$" QueueName string `json:"queueName,omitempty"` // If specified, indicates the workload's priority. @@ -44,6 +47,8 @@ type WorkloadSpec struct { // the highest priority. Any other name must be defined by creating a // PriorityClass object with that name. If not specified, the workload // priority will be default or zero if there is no default. + // +kubebuilder:validation:MaxLength=253 + // +kubebuilder:validation:Pattern="^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$" PriorityClassName string `json:"priorityClassName,omitempty"` // Priority determines the order of access to the resources managed by the @@ -79,12 +84,15 @@ type Admission struct { // PodSetAssignments hold the admission results for each of the .spec.podSets entries. // +listType=map // +listMapKey=name + // +kubebuilder:validation:MaxItems=8 PodSetAssignments []PodSetAssignment `json:"podSetAssignments"` } type PodSetAssignment struct { // Name is the name of the podSet. It should match one of the names in .spec.podSets. // +kubebuilder:default=main + // +kubebuilder:validation:MaxLength=63 + // +kubebuilder:validation:Pattern="^(?i)[a-z0-9]([-a-z0-9]*[a-z0-9])?$" Name string `json:"name"` // Flavors are the flavors assigned to the workload for each resource. @@ -107,9 +115,13 @@ type PodSetAssignment struct { Count *int32 `json:"count,omitempty"` } +// +kubebuilder:validation:XValidation:rule="has(self.minCount) ? self.minCount <= self.count : true", message="minCount should be positive and less or equal to count" type PodSet struct { // name is the PodSet name. - Name string `json:"name"` + // +kubebuilder:default=main + // +kubebuilder:validation:MaxLength=63 + // +kubebuilder:validation:Pattern="^[a-z0-9]([-a-z0-9]*[a-z0-9])?$" + Name string `json:"name,omitempty"` // template is the Pod template. // @@ -141,6 +153,7 @@ type PodSet struct { // This is an alpha field and requires enabling PartialAdmission feature gate. // // +optional + // +kubebuilder:validation:Minimum=1 MinCount *int32 `json:"minCount,omitempty"` } @@ -179,6 +192,7 @@ type WorkloadStatus struct { // +optional // +listType=map // +listMapKey=name + // +kubebuilder:validation:MaxItems=8 ReclaimablePods []ReclaimablePod `json:"reclaimablePods,omitempty"` // admissionChecks list all the admission checks required by the workload and the current status @@ -187,6 +201,7 @@ type WorkloadStatus struct { // +listMapKey=name // +patchStrategy=merge // +patchMergeKey=name + // +kubebuilder:validation:MaxItems=8 AdmissionChecks []AdmissionCheckState `json:"admissionChecks,omitempty" patchStrategy:"merge" patchMergeKey:"name"` } @@ -234,6 +249,7 @@ type AdmissionCheckState struct { // +optional // +listType=atomic + // +kubebuilder:validation:MaxItems=8 PodSetUpdates []PodSetUpdate `json:"podSetUpdates,omitempty"` } @@ -257,6 +273,12 @@ type PodSetUpdate struct { NodeSelector map[string]string `json:"nodeSelector,omitempty"` // +optional + // +kubebuilder:validation:MaxItems=8 + // +kubebuilder:validation:XValidation:rule="self.all(x, !has(x.key) ? x.operator == 'Exists' : true)", message="operator must be Exists when 'key' is empty, which means 'match all values and all keys'" + // +kubebuilder:validation:XValidation:rule="self.all(x, has(x.tolerationSeconds) ? x.effect == 'NoExecute' : true)", message="effect must be 'NoExecute' when 'tolerationSeconds' is set" + // +kubebuilder:validation:XValidation:rule="self.all(x, !has(x.operator) || x.operator in ['Equal', 'Exists'])", message="supported toleration values: 'Equal'(default), 'Exists'" + // +kubebuilder:validation:XValidation:rule="self.all(x, has(x.operator) && x.operator == 'Exists' ? !has(x.value) : true)", message="a value must be empty when 'operator' is 'Exists'" + // +kubebuilder:validation:XValidation:rule="self.all(x, !has(x.effect) || x.effect in ['NoSchedule', 'PreferNoSchedule', 'NoExecute'])", message="supported taint effect values: 'NoSchedule', 'PreferNoSchedule', 'NoExecute'" Tolerations []corev1.Toleration `json:"tolerations,omitempty"` } @@ -354,6 +376,10 @@ const ( // +kubebuilder:resource:shortName={wl} // Workload is the Schema for the workloads API +// +kubebuilder:validation:XValidation:rule="has(self.status) && has(self.status.conditions) && self.status.conditions.exists(c, c.type == 'QuotaReserved' && c.status == 'True') && has(self.status.admission) ? size(self.spec.podSets) == size(self.status.admission.podSetAssignments) : true", message="podSetAssignments must have the same number of podSets as the spec" +// +kubebuilder:validation:XValidation:rule="(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, c.type == 'QuotaReserved' && c.status == 'True')) ? (oldSelf.spec.priorityClassSource == self.spec.priorityClassSource) : true", message="field is immutable" +// +kubebuilder:validation:XValidation:rule="(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, c.type == 'QuotaReserved' && c.status == 'True') && has(oldSelf.spec.priorityClassName) && has(self.spec.priorityClassName)) ? (oldSelf.spec.priorityClassName == self.spec.priorityClassName) : true", message="field is immutable" +// +kubebuilder:validation:XValidation:rule="(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, c.type == 'QuotaReserved' && c.status == 'True')) && (has(self.status) && has(self.status.conditions) && self.status.conditions.exists(c, c.type == 'QuotaReserved' && c.status == 'True')) && has(oldSelf.spec.queueName) && has(self.spec.queueName) ? oldSelf.spec.queueName == self.spec.queueName : true", message="field is immutable" type Workload struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` diff --git a/charts/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml b/charts/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml index 3f2afaa75a..0b95583dfc 100644 --- a/charts/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml +++ b/charts/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml @@ -115,9 +115,13 @@ spec: This is an alpha field and requires enabling PartialAdmission feature gate. format: int32 + minimum: 1 type: integer name: + default: main description: name is the PodSet name. + maxLength: 63 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ type: string template: description: |- @@ -7768,9 +7772,11 @@ spec: type: object required: - count - - name - template type: object + x-kubernetes-validations: + - message: minCount should be positive and less or equal to count + rule: 'has(self.minCount) ? self.minCount <= self.count : true' maxItems: 8 minItems: 1 type: array @@ -7794,6 +7800,8 @@ spec: the highest priority. Any other name must be defined by creating a PriorityClass object with that name. If not specified, the workload priority will be default or zero if there is no default. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string priorityClassSource: default: "" @@ -7810,10 +7818,15 @@ spec: description: |- queueName is the name of the LocalQueue the Workload is associated with. queueName cannot be changed while .status.admission is not null. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string required: - podSets type: object + x-kubernetes-validations: + - message: priority should not be nil when priorityClassName is set + rule: 'has(self.priorityClassName) ? has(self.priority) : true' status: description: WorkloadStatus defines the observed state of Workload properties: @@ -7857,6 +7870,8 @@ spec: default: main description: Name is the name of the podSet. It should match one of the names in .spec.podSets. + maxLength: 63 + pattern: ^(?i)[a-z0-9]([-a-z0-9]*[a-z0-9])?$ type: string resourceUsage: additionalProperties: @@ -7876,6 +7891,7 @@ spec: required: - name type: object + maxItems: 8 type: array x-kubernetes-list-map-keys: - name @@ -7966,10 +7982,32 @@ spec: If the operator is Exists, the value should be empty, otherwise just a regular string. type: string type: object + maxItems: 8 type: array + x-kubernetes-validations: + - message: operator must be Exists when 'key' is empty, + which means 'match all values and all keys' + rule: 'self.all(x, !has(x.key) ? x.operator == ''Exists'' + : true)' + - message: effect must be 'NoExecute' when 'tolerationSeconds' + is set + rule: 'self.all(x, has(x.tolerationSeconds) ? x.effect + == ''NoExecute'' : true)' + - message: 'supported toleration values: ''Equal''(default), + ''Exists''' + rule: self.all(x, !has(x.operator) || x.operator in + ['Equal', 'Exists']) + - message: a value must be empty when 'operator' is 'Exists' + rule: 'self.all(x, has(x.operator) && x.operator == + ''Exists'' ? !has(x.value) : true)' + - message: 'supported taint effect values: ''NoSchedule'', + ''PreferNoSchedule'', ''NoExecute''' + rule: self.all(x, !has(x.effect) || x.effect in ['NoSchedule', + 'PreferNoSchedule', 'NoExecute']) required: - name type: object + maxItems: 8 type: array x-kubernetes-list-type: atomic state: @@ -7987,6 +8025,7 @@ spec: - name - state type: object + maxItems: 8 type: array x-kubernetes-list-map-keys: - name @@ -8094,6 +8133,7 @@ spec: - count - name type: object + maxItems: 8 type: array x-kubernetes-list-map-keys: - name @@ -8121,6 +8161,28 @@ spec: type: object type: object type: object + x-kubernetes-validations: + - message: podSetAssignments must have the same number of podSets as the spec + rule: 'has(self.status) && has(self.status.conditions) && self.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'') && has(self.status.admission) + ? size(self.spec.podSets) == size(self.status.admission.podSetAssignments) + : true' + - message: field is immutable + rule: '(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'')) ? (oldSelf.spec.priorityClassSource + == self.spec.priorityClassSource) : true' + - message: field is immutable + rule: '(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'') && has(oldSelf.spec.priorityClassName) + && has(self.spec.priorityClassName)) ? (oldSelf.spec.priorityClassName + == self.spec.priorityClassName) : true' + - message: field is immutable + rule: '(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'')) && (has(self.status) + && has(self.status.conditions) && self.status.conditions.exists(c, c.type + == ''QuotaReserved'' && c.status == ''True'')) && has(oldSelf.spec.queueName) + && has(self.spec.queueName) ? oldSelf.spec.queueName == self.spec.queueName + : true' served: true storage: true subresources: diff --git a/config/components/crd/bases/kueue.x-k8s.io_workloads.yaml b/config/components/crd/bases/kueue.x-k8s.io_workloads.yaml index 9bc7c62a89..3a7117cf9e 100644 --- a/config/components/crd/bases/kueue.x-k8s.io_workloads.yaml +++ b/config/components/crd/bases/kueue.x-k8s.io_workloads.yaml @@ -100,9 +100,13 @@ spec: This is an alpha field and requires enabling PartialAdmission feature gate. format: int32 + minimum: 1 type: integer name: + default: main description: name is the PodSet name. + maxLength: 63 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ type: string template: description: |- @@ -7753,9 +7757,11 @@ spec: type: object required: - count - - name - template type: object + x-kubernetes-validations: + - message: minCount should be positive and less or equal to count + rule: 'has(self.minCount) ? self.minCount <= self.count : true' maxItems: 8 minItems: 1 type: array @@ -7779,6 +7785,8 @@ spec: the highest priority. Any other name must be defined by creating a PriorityClass object with that name. If not specified, the workload priority will be default or zero if there is no default. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string priorityClassSource: default: "" @@ -7795,10 +7803,15 @@ spec: description: |- queueName is the name of the LocalQueue the Workload is associated with. queueName cannot be changed while .status.admission is not null. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string required: - podSets type: object + x-kubernetes-validations: + - message: priority should not be nil when priorityClassName is set + rule: 'has(self.priorityClassName) ? has(self.priority) : true' status: description: WorkloadStatus defines the observed state of Workload properties: @@ -7842,6 +7855,8 @@ spec: default: main description: Name is the name of the podSet. It should match one of the names in .spec.podSets. + maxLength: 63 + pattern: ^(?i)[a-z0-9]([-a-z0-9]*[a-z0-9])?$ type: string resourceUsage: additionalProperties: @@ -7861,6 +7876,7 @@ spec: required: - name type: object + maxItems: 8 type: array x-kubernetes-list-map-keys: - name @@ -7951,10 +7967,32 @@ spec: If the operator is Exists, the value should be empty, otherwise just a regular string. type: string type: object + maxItems: 8 type: array + x-kubernetes-validations: + - message: operator must be Exists when 'key' is empty, + which means 'match all values and all keys' + rule: 'self.all(x, !has(x.key) ? x.operator == ''Exists'' + : true)' + - message: effect must be 'NoExecute' when 'tolerationSeconds' + is set + rule: 'self.all(x, has(x.tolerationSeconds) ? x.effect + == ''NoExecute'' : true)' + - message: 'supported toleration values: ''Equal''(default), + ''Exists''' + rule: self.all(x, !has(x.operator) || x.operator in + ['Equal', 'Exists']) + - message: a value must be empty when 'operator' is 'Exists' + rule: 'self.all(x, has(x.operator) && x.operator == + ''Exists'' ? !has(x.value) : true)' + - message: 'supported taint effect values: ''NoSchedule'', + ''PreferNoSchedule'', ''NoExecute''' + rule: self.all(x, !has(x.effect) || x.effect in ['NoSchedule', + 'PreferNoSchedule', 'NoExecute']) required: - name type: object + maxItems: 8 type: array x-kubernetes-list-type: atomic state: @@ -7972,6 +8010,7 @@ spec: - name - state type: object + maxItems: 8 type: array x-kubernetes-list-map-keys: - name @@ -8079,6 +8118,7 @@ spec: - count - name type: object + maxItems: 8 type: array x-kubernetes-list-map-keys: - name @@ -8106,6 +8146,28 @@ spec: type: object type: object type: object + x-kubernetes-validations: + - message: podSetAssignments must have the same number of podSets as the spec + rule: 'has(self.status) && has(self.status.conditions) && self.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'') && has(self.status.admission) + ? size(self.spec.podSets) == size(self.status.admission.podSetAssignments) + : true' + - message: field is immutable + rule: '(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'')) ? (oldSelf.spec.priorityClassSource + == self.spec.priorityClassSource) : true' + - message: field is immutable + rule: '(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'') && has(oldSelf.spec.priorityClassName) + && has(self.spec.priorityClassName)) ? (oldSelf.spec.priorityClassName + == self.spec.priorityClassName) : true' + - message: field is immutable + rule: '(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'')) && (has(self.status) + && has(self.status.conditions) && self.status.conditions.exists(c, c.type + == ''QuotaReserved'' && c.status == ''True'')) && has(oldSelf.spec.queueName) + && has(self.spec.queueName) ? oldSelf.spec.queueName == self.spec.queueName + : true' served: true storage: true subresources: diff --git a/pkg/webhooks/common.go b/pkg/webhooks/common.go index 02d324c629..9554c241bd 100644 --- a/pkg/webhooks/common.go +++ b/pkg/webhooks/common.go @@ -13,14 +13,3 @@ func validateResourceName(name corev1.ResourceName, fldPath *field.Path) field.E } return allErrs } - -// validateNameReference is the same validation applied to name of an ObjectMeta. -func validateNameReference(name string, path *field.Path) field.ErrorList { - var allErrs field.ErrorList - if msgs := validation.IsDNS1123Subdomain(name); len(msgs) > 0 { - for _, msg := range msgs { - allErrs = append(allErrs, field.Invalid(path, name, msg)) - } - } - return allErrs -} diff --git a/pkg/webhooks/resourceflavor_webhook.go b/pkg/webhooks/resourceflavor_webhook.go index ffe6d4330c..9eeb3060ac 100644 --- a/pkg/webhooks/resourceflavor_webhook.go +++ b/pkg/webhooks/resourceflavor_webhook.go @@ -128,28 +128,3 @@ func validateNodeTaints(taints []corev1.Taint, fldPath *field.Path) field.ErrorL } return allErrors } - -// TODO(#463): Remove this function when CEL validations are added to workload type -// validateTaintEffect is extracted from git.k8s.io/kubernetes/pkg/apis/core/validation/validation.go -func validateTaintEffect(effect *corev1.TaintEffect, allowEmpty bool, fldPath *field.Path) field.ErrorList { - if !allowEmpty && len(*effect) == 0 { - return field.ErrorList{field.Required(fldPath, "")} - } - - allErrors := field.ErrorList{} - switch *effect { - // TODO: Replace next line with subsequent commented-out line when implement TaintEffectNoScheduleNoAdmit. - case corev1.TaintEffectNoSchedule, corev1.TaintEffectPreferNoSchedule, corev1.TaintEffectNoExecute: - // case core.TaintEffectNoSchedule, core.TaintEffectPreferNoSchedule, core.TaintEffectNoScheduleNoAdmit, core.TaintEffectNoExecute: - default: - validValues := []string{ - string(corev1.TaintEffectNoSchedule), - string(corev1.TaintEffectPreferNoSchedule), - string(corev1.TaintEffectNoExecute), - // TODO: Uncomment this block when implement TaintEffectNoScheduleNoAdmit. - // string(core.TaintEffectNoScheduleNoAdmit), - } - allErrors = append(allErrors, field.NotSupported(fldPath, *effect, validValues)) - } - return allErrors -} diff --git a/pkg/webhooks/resourceflavor_webhook_test.go b/pkg/webhooks/resourceflavor_webhook_test.go index 8e2983fca6..518b2c807e 100644 --- a/pkg/webhooks/resourceflavor_webhook_test.go +++ b/pkg/webhooks/resourceflavor_webhook_test.go @@ -63,45 +63,6 @@ func TestValidateResourceFlavor(t *testing.T) { field.Invalid(field.NewPath("spec", "nodeLabels"), "@abc", ""), }, }, - { - name: "bad tolerations", - rf: utiltesting.MakeResourceFlavor("resource-flavor"). - Toleration(corev1.Toleration{ - Key: "@abc", - Operator: corev1.TolerationOpEqual, - Value: "v", - Effect: corev1.TaintEffectNoSchedule, - }). - Toleration(corev1.Toleration{ - Key: "abc", - Operator: corev1.TolerationOpExists, - Value: "v", - Effect: corev1.TaintEffectNoSchedule, - }). - Toleration(corev1.Toleration{ - Key: "abc", - Operator: corev1.TolerationOpEqual, - Value: "v", - Effect: corev1.TaintEffect("not-valid"), - }). - Toleration(corev1.Toleration{ - Key: "abc", - Operator: corev1.TolerationOpEqual, - Value: "v", - Effect: corev1.TaintEffectNoSchedule, - }). - Obj(), - wantErr: field.ErrorList{ - field.Invalid(field.NewPath("spec", "tolerations").Index(0).Child("key"), "@abc", ""), - field.Invalid(field.NewPath("spec", "tolerations").Index(1).Child("operator"), corev1.Toleration{ - Key: "abc", - Operator: corev1.TolerationOpExists, - Value: "v", - Effect: corev1.TaintEffectNoSchedule, - }, ""), - field.NotSupported(field.NewPath("spec", "tolerations").Index(2).Child("effect"), corev1.TaintEffect("not-valid"), []corev1.TaintEffect{}), - }, - }, } for _, tc := range testcases { diff --git a/pkg/webhooks/workload_webhook.go b/pkg/webhooks/workload_webhook.go index a4527f693c..74c41ed34c 100644 --- a/pkg/webhooks/workload_webhook.go +++ b/pkg/webhooks/workload_webhook.go @@ -19,14 +19,12 @@ package webhooks import ( "context" "fmt" - "strings" corev1 "k8s.io/api/core/v1" apivalidation "k8s.io/apimachinery/pkg/api/validation" metav1validation "k8s.io/apimachinery/pkg/apis/meta/v1/validation" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/apimachinery/pkg/util/validation" "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/klog/v2" "k8s.io/utils/ptr" @@ -60,15 +58,6 @@ func (w *WorkloadWebhook) Default(ctx context.Context, obj runtime.Object) error log := ctrl.LoggerFrom(ctx).WithName("workload-webhook") log.V(5).Info("Applying defaults", "workload", klog.KObj(wl)) - // Only when we have one podSet and its name is empty, - // we'll set it to the default name `main`. - if len(wl.Spec.PodSets) == 1 { - podSet := &wl.Spec.PodSets[0] - if len(podSet.Name) == 0 { - podSet.Name = kueue.DefaultPodSetName - } - } - // drop minCounts if PartialAdmission is not enabled if !features.Enabled(features.PartialAdmission) { for i := range wl.Spec.PodSets { @@ -122,22 +111,6 @@ func ValidateWorkload(obj *kueue.Workload) field.ErrorList { allErrs = append(allErrs, field.Invalid(specPath.Child("podSets"), variableCountPosets, "at most one podSet can use minCount")) } - if len(obj.Spec.PriorityClassName) > 0 { - msgs := validation.IsDNS1123Subdomain(obj.Spec.PriorityClassName) - if len(msgs) > 0 { - for _, msg := range msgs { - allErrs = append(allErrs, field.Invalid(specPath.Child("priorityClassName"), obj.Spec.PriorityClassName, msg)) - } - } - if obj.Spec.Priority == nil { - allErrs = append(allErrs, field.Invalid(specPath.Child("priority"), obj.Spec.Priority, "priority should not be nil when priorityClassName is set")) - } - } - - if len(obj.Spec.QueueName) > 0 { - allErrs = append(allErrs, validateNameReference(obj.Spec.QueueName, specPath.Child("queueName"))...) - } - statusPath := field.NewPath("status") if workload.HasQuotaReservation(obj) { allErrs = append(allErrs, validateAdmission(obj, statusPath.Child("admission"))...) @@ -152,10 +125,6 @@ func ValidateWorkload(obj *kueue.Workload) field.ErrorList { func validatePodSet(ps *kueue.PodSet, path *field.Path) field.ErrorList { var allErrs field.ErrorList - // Apply the same validation as container names. - for _, msg := range validation.IsDNS1123Label(ps.Name) { - allErrs = append(allErrs, field.Invalid(path.Child("name"), ps.Name, msg)) - } // validate initContainers icPath := path.Child("template", "spec", "initContainers") @@ -168,10 +137,6 @@ func validatePodSet(ps *kueue.PodSet, path *field.Path) field.ErrorList { allErrs = append(allErrs, validateContainer(&ps.Template.Spec.Containers[ci], cPath.Index(ci))...) } - if min := ptr.Deref(ps.MinCount, ps.Count); min > ps.Count || min < 0 { - allErrs = append(allErrs, field.Forbidden(path.Child("minCount"), fmt.Sprintf("%d should be positive and less or equal to %d", min, ps.Count))) - } - return allErrs } @@ -247,38 +212,6 @@ func validateTolerations(tolerations []corev1.Toleration, fldPath *field.Path) f if len(toleration.Key) > 0 { allErrors = append(allErrors, metav1validation.ValidateLabelName(toleration.Key, idxPath.Child("key"))...) } - - // empty toleration key with Exists operator and empty value means match all taints - if len(toleration.Key) == 0 && toleration.Operator != corev1.TolerationOpExists { - allErrors = append(allErrors, field.Invalid(idxPath.Child("operator"), toleration.Operator, - "operator must be Exists when `key` is empty, which means \"match all values and all keys\"")) - } - - if toleration.TolerationSeconds != nil && toleration.Effect != corev1.TaintEffectNoExecute { - allErrors = append(allErrors, field.Invalid(idxPath.Child("effect"), toleration.Effect, - "effect must be 'NoExecute' when `tolerationSeconds` is set")) - } - - // validate toleration operator and value - switch toleration.Operator { - // empty operator means Equal - case corev1.TolerationOpEqual, "": - if errs := validation.IsValidLabelValue(toleration.Value); len(errs) != 0 { - allErrors = append(allErrors, field.Invalid(idxPath.Child("operator"), toleration.Value, strings.Join(errs, ";"))) - } - case corev1.TolerationOpExists: - if len(toleration.Value) > 0 { - allErrors = append(allErrors, field.Invalid(idxPath.Child("operator"), toleration, "value must be empty when `operator` is 'Exists'")) - } - default: - validValues := []string{string(corev1.TolerationOpEqual), string(corev1.TolerationOpExists)} - allErrors = append(allErrors, field.NotSupported(idxPath.Child("operator"), toleration.Operator, validValues)) - } - - // validate toleration effect, empty toleration effect means match all taint effects - if len(toleration.Effect) > 0 { - allErrors = append(allErrors, validateTaintEffect(&toleration.Effect, true, idxPath.Child("effect"))...) - } } return allErrors } @@ -286,17 +219,12 @@ func validateTolerations(tolerations []corev1.Toleration, fldPath *field.Path) f func validateAdmission(obj *kueue.Workload, path *field.Path) field.ErrorList { admission := obj.Status.Admission var allErrs field.ErrorList - allErrs = append(allErrs, validateNameReference(string(admission.ClusterQueue), path.Child("clusterQueue"))...) names := sets.New[string]() for _, ps := range obj.Spec.PodSets { names.Insert(ps.Name) } assignmentsPath := path.Child("podSetAssignments") - if names.Len() != len(admission.PodSetAssignments) { - allErrs = append(allErrs, field.Invalid(assignmentsPath, field.OmitValueType{}, "must have the same number of podSets as the spec")) - } - for i, ps := range admission.PodSetAssignments { psaPath := assignmentsPath.Index(i) if !names.Has(ps.Name) { @@ -348,11 +276,8 @@ func ValidateWorkloadUpdate(newObj, oldObj *kueue.Workload) field.ErrorList { if workload.HasQuotaReservation(oldObj) { allErrs = append(allErrs, apivalidation.ValidateImmutableField(newObj.Spec.PodSets, oldObj.Spec.PodSets, specPath.Child("podSets"))...) - allErrs = append(allErrs, apivalidation.ValidateImmutableField(newObj.Spec.PriorityClassSource, oldObj.Spec.PriorityClassSource, specPath.Child("priorityClassSource"))...) - allErrs = append(allErrs, apivalidation.ValidateImmutableField(newObj.Spec.PriorityClassName, oldObj.Spec.PriorityClassName, specPath.Child("priorityClassName"))...) } if workload.HasQuotaReservation(newObj) && workload.HasQuotaReservation(oldObj) { - allErrs = append(allErrs, apivalidation.ValidateImmutableField(newObj.Spec.QueueName, oldObj.Spec.QueueName, specPath.Child("queueName"))...) allErrs = append(allErrs, validateReclaimablePodsUpdate(newObj, oldObj, field.NewPath("status", "reclaimablePods"))...) } allErrs = append(allErrs, validateAdmissionUpdate(newObj.Status.Admission, oldObj.Status.Admission, field.NewPath("status", "admission"))...) diff --git a/pkg/webhooks/workload_webhook_test.go b/pkg/webhooks/workload_webhook_test.go index fad58647b9..df1b6b9c3d 100644 --- a/pkg/webhooks/workload_webhook_test.go +++ b/pkg/webhooks/workload_webhook_test.go @@ -17,7 +17,6 @@ limitations under the License. package webhooks import ( - "context" "testing" "time" @@ -30,7 +29,6 @@ import ( "k8s.io/utils/ptr" kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" - "sigs.k8s.io/kueue/pkg/constants" testingutil "sigs.k8s.io/kueue/pkg/util/testing" ) @@ -39,61 +37,6 @@ const ( testWorkloadNamespace = "test-ns" ) -func TestWorkloadWebhookDefault(t *testing.T) { - cases := map[string]struct { - wl kueue.Workload - wantWl kueue.Workload - }{ - "add default podSet name": { - wl: kueue.Workload{ - Spec: kueue.WorkloadSpec{ - PodSets: []kueue.PodSet{ - {}, - }, - }, - }, - wantWl: kueue.Workload{ - Spec: kueue.WorkloadSpec{ - PodSets: []kueue.PodSet{ - {Name: "main"}, - }, - }, - }, - }, - "don't set podSetName if multiple": { - wl: kueue.Workload{ - Spec: kueue.WorkloadSpec{ - PodSets: []kueue.PodSet{ - {}, - {}, - }, - }, - }, - wantWl: kueue.Workload{ - Spec: kueue.WorkloadSpec{ - PodSets: []kueue.PodSet{ - {}, - {}, - }, - }, - }, - }, - } - for name, tc := range cases { - t.Run(name, func(t *testing.T) { - wh := &WorkloadWebhook{} - wlCopy := tc.wl.DeepCopy() - if err := wh.Default(context.Background(), wlCopy); err != nil { - t.Fatalf("Could not apply defaults: %v", err) - } - if diff := cmp.Diff(tc.wantWl, *wlCopy, - cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime")); diff != "" { - t.Errorf("Obtained wrong defaults (-want,+got):\n%s", diff) - } - }) - } -} - func TestValidateWorkload(t *testing.T) { specPath := field.NewPath("spec") podSetsPath := specPath.Child("podSets") @@ -117,52 +60,6 @@ func TestValidateWorkload(t *testing.T) { }, ).Obj(), }, - "should have a valid podSet name": { - workload: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).PodSets( - kueue.PodSet{ - Name: "@driver", - Count: 1, - }, - ).Obj(), - wantErr: field.ErrorList{field.Invalid(podSetsPath.Index(0).Child("name"), nil, "")}, - }, - "should have valid priorityClassName": { - workload: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace). - PriorityClass("invalid_class"). - Priority(0). - Obj(), - wantErr: field.ErrorList{ - field.Invalid(specPath.Child("priorityClassName"), nil, ""), - }, - }, - "should pass validation when priorityClassName is empty": { - workload: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Obj(), - wantErr: nil, - }, - "should have priority once priorityClassName is set": { - workload: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace). - PriorityClass("priority"). - Obj(), - wantErr: field.ErrorList{ - field.Invalid(specPath.Child("priority"), nil, ""), - }, - }, - "should have a valid queueName": { - workload: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace). - Queue("@invalid"). - Obj(), - wantErr: field.ErrorList{ - field.Invalid(specPath.Child("queueName"), nil, ""), - }, - }, - "should have a valid clusterQueue name": { - workload: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace). - ReserveQuota(testingutil.MakeAdmission("@invalid").Obj()). - Obj(), - wantErr: field.ErrorList{ - field.Invalid(statusPath.Child("admission", "clusterQueue"), nil, ""), - }, - }, "should have a valid podSet name in status assignment": { workload: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace). ReserveQuota(testingutil.MakeAdmission("cluster-queue", "@invalid").Obj()). @@ -171,25 +68,6 @@ func TestValidateWorkload(t *testing.T) { field.NotFound(statusPath.Child("admission", "podSetAssignments").Index(0).Child("name"), nil), }, }, - "should have same podSets in admission": { - workload: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace). - PodSets( - kueue.PodSet{ - Name: "main2", - Count: 1, - }, - kueue.PodSet{ - Name: "main1", - Count: 1, - }, - ). - ReserveQuota(testingutil.MakeAdmission("cluster-queue", "main1", "main2", "main3").Obj()). - Obj(), - wantErr: field.ErrorList{ - field.Invalid(statusPath.Child("admission", "podSetAssignments"), nil, ""), - field.NotFound(statusPath.Child("admission", "podSetAssignments").Index(2).Child("name"), nil), - }, - }, "assignment usage should be divisible by count": { workload: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace). PodSets(*testingutil.MakePodSet("main", 3). @@ -350,26 +228,6 @@ func TestValidateWorkload(t *testing.T) { field.NotSupported(statusPath.Child("reclaimablePods").Key("ps2").Child("name"), nil, []string{}), }, }, - "invalid podSet minCount (negative)": { - workload: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace). - PodSets( - *testingutil.MakePodSet("ps1", 3).SetMinimumCount(-1).Obj(), - ). - Obj(), - wantErr: field.ErrorList{ - field.Forbidden(podSetsPath.Index(0).Child("minCount"), ""), - }, - }, - "invalid podSet minCount (too big)": { - workload: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace). - PodSets( - *testingutil.MakePodSet("ps1", 3).SetMinimumCount(4).Obj(), - ). - Obj(), - wantErr: field.ErrorList{ - field.Forbidden(podSetsPath.Index(0).Child("minCount"), ""), - }, - }, "too many variable count podSets": { workload: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace). PodSets( @@ -397,89 +255,6 @@ func TestValidateWorkloadUpdate(t *testing.T) { before, after *kueue.Workload wantErr field.ErrorList }{ - "podSets should not be updated when has quota reservation: count": { - before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).ReserveQuota(testingutil.MakeAdmission("cq").Obj()).Obj(), - after: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).PodSets( - *testingutil.MakePodSet("main", 2).Obj(), - ).Obj(), - wantErr: field.ErrorList{ - field.Invalid(field.NewPath("spec").Child("podSets"), nil, ""), - }, - }, - "podSets should not be updated: podSpec": { - before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).ReserveQuota(testingutil.MakeAdmission("cq").Obj()).Obj(), - after: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).PodSets( - kueue.PodSet{ - Name: "main", - Count: 1, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "c-after", - Resources: corev1.ResourceRequirements{ - Requests: make(corev1.ResourceList), - }, - }, - }, - }, - }, - }, - ).Obj(), - wantErr: field.ErrorList{ - field.Invalid(field.NewPath("spec").Child("podSets"), nil, ""), - }, - }, - "queueName can be updated when not admitted": { - before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q1").Obj(), - after: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q2").Obj(), - wantErr: nil, - }, - "queueName can be updated when admitting": { - before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Obj(), - after: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q"). - ReserveQuota(testingutil.MakeAdmission("cq").Obj()).Obj(), - }, - "queueName should not be updated once admitted": { - before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q1"). - ReserveQuota(testingutil.MakeAdmission("cq").Obj()).Obj(), - after: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q2"). - ReserveQuota(testingutil.MakeAdmission("cq").Obj()).Obj(), - wantErr: field.ErrorList{ - field.Invalid(field.NewPath("spec").Child("queueName"), nil, ""), - }, - }, - "queueName can be updated when admission is reset": { - before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q1"). - ReserveQuota(testingutil.MakeAdmission("cq").Obj()).Obj(), - after: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q2").Obj(), - }, - "admission can be set": { - before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Obj(), - after: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).ReserveQuota( - testingutil.MakeAdmission("cluster-queue").Assignment("on-demand", "5", "1").Obj(), - ).Obj(), - wantErr: nil, - }, - "admission can be unset": { - before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).ReserveQuota( - testingutil.MakeAdmission("cluster-queue").Assignment("on-demand", "5", "1").Obj(), - ).Obj(), - after: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Obj(), - wantErr: nil, - }, - "admission should not be updated once set": { - before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).ReserveQuota( - testingutil.MakeAdmission("cluster-queue").Obj(), - ).Obj(), - after: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).ReserveQuota( - testingutil.MakeAdmission("cluster-queue").Assignment("on-demand", "5", "1").Obj(), - ).Obj(), - wantErr: field.ErrorList{ - field.Invalid(field.NewPath("status", "admission"), nil, ""), - }, - }, - "reclaimable pod count can change up": { before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace). PodSets( @@ -579,28 +354,6 @@ func TestValidateWorkloadUpdate(t *testing.T) { Obj(), wantErr: nil, }, - "priorityClassSource should not be updated": { - before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q"). - PriorityClass("test-class").PriorityClassSource(constants.PodPriorityClassSource). - Priority(10).ReserveQuota(testingutil.MakeAdmission("cq").Obj()).Obj(), - after: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q"). - PriorityClass("test-class").PriorityClassSource(constants.WorkloadPriorityClassSource). - Priority(10).Obj(), - wantErr: field.ErrorList{ - field.Invalid(field.NewPath("spec").Child("priorityClassSource"), nil, ""), - }, - }, - "priorityClassName should not be updated": { - before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q"). - PriorityClass("test-class-1").PriorityClassSource(constants.PodPriorityClassSource). - Priority(10).ReserveQuota(testingutil.MakeAdmission("cq").Obj()).Obj(), - after: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q"). - PriorityClass("test-class-2").PriorityClassSource(constants.PodPriorityClassSource). - Priority(10).Obj(), - wantErr: field.ErrorList{ - field.Invalid(field.NewPath("spec").Child("priorityClassName"), nil, ""), - }, - }, "podSetUpdates should be immutable when state is ready": { before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).PodSets( *testingutil.MakePodSet("first", 1).Obj(), @@ -641,46 +394,6 @@ func TestValidateWorkloadUpdate(t *testing.T) { State: kueue.CheckStateReady, }).Obj(), }, - "updating priorityClassName before setting reserve quota for workload": { - before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q"). - PriorityClass("test-class-1").PriorityClassSource(constants.PodPriorityClassSource). - Priority(10).Obj(), - after: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q"). - PriorityClass("test-class-2").PriorityClassSource(constants.PodPriorityClassSource). - Priority(10).Obj(), - wantErr: nil, - }, - "updating priorityClassSource before setting reserve quota for workload": { - before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q"). - PriorityClass("test-class").PriorityClassSource(constants.PodPriorityClassSource). - Priority(10).Obj(), - after: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Queue("q"). - PriorityClass("test-class").PriorityClassSource(constants.WorkloadPriorityClassSource). - Priority(10).Obj(), - wantErr: nil, - }, - "updating podSets before setting reserve quota for workload": { - before: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).Obj(), - after: testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).PodSets( - kueue.PodSet{ - Name: "main", - Count: 1, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "c-after", - Resources: corev1.ResourceRequirements{ - Requests: make(corev1.ResourceList), - }, - }, - }, - }, - }, - }, - ).Obj(), - wantErr: nil, - }, } for name, tc := range testCases { t.Run(name, func(t *testing.T) { diff --git a/test/integration/webhook/workload_test.go b/test/integration/webhook/workload_test.go index bf49b6a365..657ddccf9a 100644 --- a/test/integration/webhook/workload_test.go +++ b/test/integration/webhook/workload_test.go @@ -15,12 +15,13 @@ package webhook import ( "fmt" + "time" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" schedulingv1 "k8s.io/api/scheduling/v1" - "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" @@ -29,6 +30,7 @@ import ( kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" "sigs.k8s.io/kueue/pkg/constants" "sigs.k8s.io/kueue/pkg/util/testing" + "sigs.k8s.io/kueue/pkg/workload" "sigs.k8s.io/kueue/test/util" ) @@ -82,22 +84,42 @@ var _ = ginkgo.Describe("Workload defaulting webhook", func() { gomega.Expect(created.Spec.PodSets[0].Name).Should(gomega.Equal(kueue.DefaultPodSetName)) }) + + ginkgo.It("Shouldn't set podSet name if multiple", func() { + ginkgo.By("Creating a new Workload") + // Not using the wrappers to avoid hiding any defaulting. + workload := kueue.Workload{ + ObjectMeta: metav1.ObjectMeta{Name: workloadName, Namespace: ns.Name}, + Spec: kueue.WorkloadSpec{ + PodSets: []kueue.PodSet{ + { + Count: 1, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{}, + }, + }, + }, + { + Count: 1, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{}, + }, + }, + }, + }, + }, + } + gomega.Expect(k8sClient.Create(ctx, &workload)).Should(testing.BeAPIError(testing.InvalidError)) + }) + }) }) var _ = ginkgo.Describe("Workload validating webhook", func() { ginkgo.Context("When creating a Workload", func() { - ginkgo.It("Should have valid PriorityClassName when creating", func() { - ginkgo.By("Creating a new Workload") - workload := testing.MakeWorkload(workloadName, ns.Name). - PriorityClass("invalid_class"). - Obj() - err := k8sClient.Create(ctx, workload) - gomega.Expect(err).Should(gomega.HaveOccurred()) - gomega.Expect(errors.IsForbidden(err)).Should(gomega.BeTrue(), "error: %v", err) - }) - ginkgo.DescribeTable("Should have valid PodSet when creating", func(podSetsCapacity int, podSetCount int, isInvalid bool) { podSets := make([]kueue.PodSet, podSetsCapacity) for i := range podSets { @@ -106,10 +128,9 @@ var _ = ginkgo.Describe("Workload validating webhook", func() { workload := testing.MakeWorkload(workloadName, ns.Name).PodSets(podSets...).Obj() err := k8sClient.Create(ctx, workload) if isInvalid { - gomega.Expect(err).Should(gomega.HaveOccurred()) - gomega.Expect(errors.IsInvalid(err)).Should(gomega.BeTrue(), "error: %v", err) + gomega.Expect(err).Should(testing.BeAPIError(testing.InvalidError), "error: %v", err) } else { - gomega.Expect(err).Should(gomega.Succeed()) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) } }, ginkgo.Entry("podSets count less than 1", 0, 1, true), @@ -117,6 +138,313 @@ var _ = ginkgo.Describe("Workload validating webhook", func() { ginkgo.Entry("invalid podSet.Count", 3, 0, true), ginkgo.Entry("valid podSet", 3, 3, false), ) + + ginkgo.DescribeTable("Should have valid values when creating", func(w func() *kueue.Workload, errorType gomega.OmegaMatcher) { + err := k8sClient.Create(ctx, w()) + if errorType != nil { + gomega.Expect(err).Should(errorType, "error: %v", err) + } else { + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + } + }, + ginkgo.Entry("valid workload", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name).PodSets( + *testing.MakePodSet("driver", 1).Obj(), + *testing.MakePodSet("workers", 100).Obj(), + ).Obj() + }, + nil), + ginkgo.Entry("invalid podSet name", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name).PodSets( + *testing.MakePodSet("@driver", 1).Obj(), + ).Obj() + }, + testing.BeAPIError(testing.InvalidError)), + ginkgo.Entry("invalid priorityClassName", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + PriorityClass("invalid_class"). + Priority(0). + Obj() + }, + testing.BeAPIError(testing.InvalidError)), + ginkgo.Entry("empty priorityClassName is valid", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + Obj() + }, + nil), + ginkgo.Entry("priority should not be nil when priorityClassName is set", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + PriorityClass("priority"). + Obj() + }, + testing.BeAPIError(testing.InvalidError)), + ginkgo.Entry("invalid queueName", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + Queue("@invalid"). + Obj() + }, + testing.BeAPIError(testing.InvalidError)), + ginkgo.Entry("should not request num-pods resource", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + PodSets(kueue.PodSet{ + Name: "bad", + Count: 1, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + InitContainers: []corev1.Container{ + { + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourcePods: resource.MustParse("1"), + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourcePods: resource.MustParse("1"), + }, + }, + }, + }, + }, + }, + }). + Obj() + }, + testing.BeAPIError(testing.ForbiddenError)), + ginkgo.Entry("empty podSetUpdates should be valid since it is optional", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + AdmissionChecks(kueue.AdmissionCheckState{}). + Obj() + }, + nil), + ginkgo.Entry("matched names in podSetUpdates with names in podSets", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + PodSets( + *testing.MakePodSet("first", 1).Obj(), + *testing.MakePodSet("second", 1).Obj(), + ). + AdmissionChecks( + kueue.AdmissionCheckState{ + PodSetUpdates: []kueue.PodSetUpdate{ + { + Name: "first", + Labels: map[string]string{"l1": "first"}, + Annotations: map[string]string{"foo": "bar"}, + Tolerations: []corev1.Toleration{ + { + Key: "t1", + Operator: corev1.TolerationOpEqual, + Value: "t1v", + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](5), + }, + }, + NodeSelector: map[string]string{"type": "first"}, + }, + { + Name: "second", + Labels: map[string]string{"l2": "second"}, + Annotations: map[string]string{"foo": "baz"}, + Tolerations: []corev1.Toleration{ + { + Key: "t2", + Operator: corev1.TolerationOpEqual, + Value: "t2v", + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](10), + }, + }, + NodeSelector: map[string]string{"type": "second"}, + }, + }, + }, + ). + Obj() + }, + nil), + ginkgo.Entry("invalid podSet minCount (negative)", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + PodSets( + *testing.MakePodSet("ps1", 3).SetMinimumCount(-1).Obj(), + ). + Obj() + }, + testing.BeAPIError(testing.InvalidError)), + ginkgo.Entry("invalid podSet minCount (too big)", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + PodSets( + *testing.MakePodSet("ps1", 3).SetMinimumCount(4).Obj(), + ). + Obj() + }, + testing.BeAPIError(testing.InvalidError)), + ginkgo.Entry("too many variable count podSets", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + PodSets( + *testing.MakePodSet("ps1", 3).SetMinimumCount(2).Obj(), + *testing.MakePodSet("ps2", 3).SetMinimumCount(1).Obj(), + ). + Obj() + }, + testing.BeAPIError(testing.ForbiddenError)), + ) + + ginkgo.DescribeTable("Should have valid values when setting Admission", func(w func() *kueue.Workload, a *kueue.Admission, errorType gomega.OmegaMatcher) { + workload := w() + gomega.Expect(k8sClient.Create(ctx, workload)).Should(gomega.Succeed()) + + err := util.SetQuotaReservation(ctx, k8sClient, workload, a) + if errorType != nil { + gomega.Expect(err).Should(errorType, "error: %v", err) + } else { + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + } + }, + ginkgo.Entry("invalid clusterQueue name", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + Obj() + }, + testing.MakeAdmission("@invalid").Obj(), + testing.BeAPIError(testing.InvalidError)), + ginkgo.Entry("invalid podSet name in status assignment", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + Obj() + }, + testing.MakeAdmission("cluster-queue", "@invalid").Obj(), + testing.BeAPIError(testing.InvalidError)), + ginkgo.Entry("mismatched names in admission with names in podSets", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + PodSets( + *testing.MakePodSet("main2", 1).Obj(), + *testing.MakePodSet("main1", 1).Obj(), + ). + Obj() + }, + testing.MakeAdmission("cluster-queue", "main1", "main2", "main3").Obj(), + testing.BeAPIError(testing.InvalidError)), + ginkgo.Entry("assignment usage should be divisible by count", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + PodSets( + *testing.MakePodSet("main", 3). + Request(corev1.ResourceCPU, "1"). + Obj(), + ). + Obj() + }, + testing.MakeAdmission("cluster-queue"). + Assignment(corev1.ResourceCPU, "flv", "1"). + AssignmentPodCount(3). + Obj(), + testing.BeAPIError(testing.ForbiddenError)), + ) + + ginkgo.DescribeTable("Should have valid values when setting AdmissionCheckState", func(w func() *kueue.Workload, acs kueue.AdmissionCheckState) { + wl := w() + gomega.Expect(k8sClient.Create(ctx, wl)).Should(gomega.Succeed()) + + gomega.Eventually(func() error { + gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), wl)).To(gomega.Succeed()) + workload.SetAdmissionCheckState(&wl.Status.AdmissionChecks, acs) + return k8sClient.Status().Update(ctx, wl) + }, util.Timeout, util.Interval).Should(testing.BeAPIError(testing.ForbiddenError)) + + }, + ginkgo.Entry("podSetUpdates have the same number of podSets", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + PodSets( + *testing.MakePodSet("first", 1).Obj(), + *testing.MakePodSet("second", 1).Obj(), + ). + Obj() + }, + kueue.AdmissionCheckState{ + Name: "check", + State: kueue.CheckStateReady, + PodSetUpdates: []kueue.PodSetUpdate{{Name: "first"}}}, + ), + ginkgo.Entry("mismatched names in podSetUpdates with names in podSets", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + PodSets( + *testing.MakePodSet("first", 1).Obj(), + *testing.MakePodSet("second", 1).Obj(), + ). + Obj() + }, + kueue.AdmissionCheckState{ + Name: "check", + State: kueue.CheckStateReady, + PodSetUpdates: []kueue.PodSetUpdate{{Name: "first"}, {Name: "third"}}}, + ), + ginkgo.Entry("invalid label name of podSetUpdate", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + Obj() + }, + kueue.AdmissionCheckState{ + Name: "check", + State: kueue.CheckStateReady, + PodSetUpdates: []kueue.PodSetUpdate{{Name: "main", Labels: map[string]string{"@abc": "foo"}}}}, + ), + ginkgo.Entry("invalid node selector name of podSetUpdate", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + Obj() + }, + kueue.AdmissionCheckState{ + Name: "check", + State: kueue.CheckStateReady, + PodSetUpdates: []kueue.PodSetUpdate{{Name: "main", NodeSelector: map[string]string{"@abc": "foo"}}}}, + ), + ginkgo.Entry("invalid label value of podSetUpdate", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + Obj() + }, + kueue.AdmissionCheckState{ + Name: "check", + State: kueue.CheckStateReady, + PodSetUpdates: []kueue.PodSetUpdate{{Name: "main", Labels: map[string]string{"foo": "@abc"}}}}, + ), + ) + + ginkgo.It("invalid reclaimablePods", func() { + ginkgo.By("Creating a new Workload") + wl := testing.MakeWorkload(workloadName, ns.Name). + PodSets( + *testing.MakePodSet("ps1", 3).Obj(), + ). + Obj() + gomega.Expect(k8sClient.Create(ctx, wl)).Should(gomega.Succeed()) + + err := workload.UpdateReclaimablePods(ctx, k8sClient, wl, []kueue.ReclaimablePod{ + {Name: "ps1", Count: 4}, + {Name: "ps2", Count: 1}, + }) + gomega.Expect(err).Should(testing.BeAPIError(testing.ForbiddenError), "error: %v", err) + }) + }) ginkgo.Context("When updating a Workload", func() { @@ -139,34 +467,293 @@ var _ = ginkgo.Describe("Workload validating webhook", func() { gomega.Expect(k8sClient.Delete(ctx, priorityClass)).To(gomega.Succeed()) }) - ginkgo.It("Should allow the change of priority", func() { - ginkgo.By("Creating a new Workload") - workload := testing.MakeWorkload(workloadName, ns.Name).Obj() - gomega.Expect(k8sClient.Create(ctx, workload)).Should(gomega.Succeed()) + ginkgo.DescribeTable("Validate Workload on update", + func(w func() *kueue.Workload, setQuotaReservation bool, updateWl func(newWL *kueue.Workload), matcher gomega.OmegaMatcher) { + ginkgo.By("Creating a new Workload") + workload := w() + gomega.Expect(k8sClient.Create(ctx, workload)).Should(gomega.Succeed()) + if setQuotaReservation { + gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, workload, testing.MakeAdmission("cq").Obj())).Should(gomega.Succeed()) + } - ginkgo.By("Updating the priority") - gomega.Eventually(func() error { - var newWL kueue.Workload - gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(workload), &newWL)).To(gomega.Succeed()) - newWL.Spec.Priority = ptr.To[int32](10) - return k8sClient.Update(ctx, &newWL) - }, util.Timeout, util.Interval).Should(gomega.Succeed()) - }) - - ginkgo.It("Should forbid the change of spec.podSet", func() { - ginkgo.By("Creating a new Workload") - workload := testing.MakeWorkload(workloadName, ns.Name).Obj() - gomega.Expect(k8sClient.Create(ctx, workload)).Should(gomega.Succeed()) - gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, workload, testing.MakeAdmission("cq").Obj())).Should(gomega.Succeed()) - - ginkgo.By("Updating podSet") - gomega.Eventually(func() error { - var newWL kueue.Workload - gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(workload), &newWL)).To(gomega.Succeed()) - newWL.Spec.PodSets[0].Count = 10 - return k8sClient.Update(ctx, &newWL) - }, util.Timeout, util.Interval).Should(testing.BeForbiddenError()) - }) + gomega.Eventually(func() error { + var newWL kueue.Workload + gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(workload), &newWL)).To(gomega.Succeed()) + updateWl(&newWL) + return k8sClient.Update(ctx, &newWL) + }, util.Timeout, util.Interval).Should(matcher) + }, + ginkgo.Entry("podSets should not be updated when has quota reservation: count", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name).Obj() + }, + true, + func(newWL *kueue.Workload) { + newWL.Spec.PodSets = []kueue.PodSet{*testing.MakePodSet("main", 2).Obj()} + }, + testing.BeAPIError(testing.ForbiddenError), + ), + ginkgo.Entry("podSets should not be updated: podSpec", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name).Obj() + }, + true, + func(newWL *kueue.Workload) { + newWL.Spec.PodSets = []kueue.PodSet{{ + Name: "main", + Count: 1, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "c-after", + Resources: corev1.ResourceRequirements{ + Requests: make(corev1.ResourceList), + }, + }, + }, + }, + }, + }} + }, + testing.BeAPIError(testing.ForbiddenError), + ), + ginkgo.Entry("queueName can be updated when not admitted", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name).Queue("q1").Obj() + }, + false, + func(newWL *kueue.Workload) { + newWL.Spec.QueueName = "q2" + }, + gomega.Succeed(), + ), + ginkgo.Entry("queueName can be updated when admitting", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name).Obj() + }, + false, + func(newWL *kueue.Workload) { + newWL.Spec.QueueName = "q" + }, + gomega.Succeed(), + ), + ginkgo.Entry("queueName should not be updated once admitted", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name).Queue("q1").Obj() + }, + true, + func(newWL *kueue.Workload) { + newWL.Spec.QueueName = "q2" + }, + testing.BeAPIError(testing.InvalidError), + ), + ginkgo.Entry("queueName can be updated when admission is reset", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name).Queue("q1"). + ReserveQuota(testing.MakeAdmission("cq").Obj()).Obj() + }, + false, + func(newWL *kueue.Workload) { + newWL.Spec.QueueName = "q2" + newWL.Status = kueue.WorkloadStatus{} + }, + gomega.Succeed(), + ), + ginkgo.Entry("admission can be set", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name).Obj() + }, + false, + func(newWL *kueue.Workload) { + newWL.Status = kueue.WorkloadStatus{ + Admission: testing.MakeAdmission("cluster-queue").Assignment("on-demand", "5", "1").Obj(), + Conditions: []metav1.Condition{{ + Type: kueue.WorkloadQuotaReserved, + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.NewTime(time.Now()), + Reason: "AdmittedByTest", + Message: "Admitted by ClusterQueue cluster-queue", + }}, + } + }, + gomega.Succeed(), + ), + ginkgo.Entry("admission can be unset", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name).ReserveQuota( + testing.MakeAdmission("cluster-queue").Assignment("on-demand", "5", "1").Obj(), + ).Obj() + }, + false, + func(newWL *kueue.Workload) { + newWL.Status = kueue.WorkloadStatus{} + }, + gomega.Succeed(), + ), + ginkgo.Entry("priorityClassSource should not be updated", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + Queue("q"). + PriorityClass("test-class").PriorityClassSource(constants.PodPriorityClassSource). + Priority(10). + Obj() + }, + true, + func(newWL *kueue.Workload) { + newWL.Spec.PriorityClassSource = constants.WorkloadPriorityClassSource + }, + testing.BeAPIError(testing.InvalidError), + ), + ginkgo.Entry("priorityClassName should not be updated", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + Queue("q"). + PriorityClass("test-class-1").PriorityClassSource(constants.PodPriorityClassSource). + Priority(10). + Obj() + }, + true, + func(newWL *kueue.Workload) { + newWL.Spec.PriorityClassName = "test-class-2" + }, + testing.BeAPIError(testing.InvalidError), + ), + ginkgo.Entry("should change other fields of admissionchecks when podSetUpdates is immutable", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + PodSets( + *testing.MakePodSet("first", 1).Obj(), + *testing.MakePodSet("second", 1).Obj(), + ).AdmissionChecks( + kueue.AdmissionCheckState{ + Name: "ac1", + Message: "old", + PodSetUpdates: []kueue.PodSetUpdate{{Name: "first", Labels: map[string]string{"foo": "bar"}}, {Name: "second"}}, + State: kueue.CheckStateReady, + }).Obj() + }, + false, + func(newWL *kueue.Workload) { + newWL.Status.AdmissionChecks = []kueue.AdmissionCheckState{ + { + Name: "ac1", + Message: "new", + LastTransitionTime: metav1.NewTime(time.Now()), + PodSetUpdates: []kueue.PodSetUpdate{{Name: "first", Labels: map[string]string{"foo": "bar"}}, {Name: "second"}}, + State: kueue.CheckStateReady, + }, + } + }, + gomega.Succeed(), + ), + ginkgo.Entry("updating priorityClassName before setting reserve quota for workload", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + Queue("q"). + PriorityClass("test-class-1").PriorityClassSource(constants.PodPriorityClassSource). + Priority(10).Obj() + }, + false, + func(newWL *kueue.Workload) { + newWL.Spec.PriorityClassName = "test-class-2" + }, + gomega.Succeed(), + ), + ginkgo.Entry("updating priorityClassSource before setting reserve quota for workload", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + Queue("q"). + PriorityClass("test-class").PriorityClassSource(constants.PodPriorityClassSource). + Priority(10).Obj() + }, + false, + func(newWL *kueue.Workload) { + newWL.Spec.PriorityClassSource = constants.WorkloadPriorityClassSource + }, + gomega.Succeed(), + ), + ginkgo.Entry("updating podSets before setting reserve quota for workload", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name).Obj() + }, + false, + func(newWL *kueue.Workload) { + newWL.Spec.PodSets = []kueue.PodSet{ + { + Name: "main", + Count: 1, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "c-after", + Resources: corev1.ResourceRequirements{ + Requests: make(corev1.ResourceList), + }, + }, + }, + }, + }, + }, + } + }, + gomega.Succeed(), + ), + ginkgo.Entry("Should allow the change of priority", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name).Obj() + }, + false, + func(newWL *kueue.Workload) { + newWL.Spec.Priority = ptr.To[int32](10) + }, + gomega.Succeed(), + ), + ginkgo.Entry("Should forbid the change of spec.podSet", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name).Obj() + }, + true, + func(newWL *kueue.Workload) { + newWL.Spec.PodSets[0].Count = 10 + }, + testing.BeAPIError(testing.ForbiddenError), + ), + ginkgo.Entry("reclaimable pod count can go to 0 if the job is suspended", + func() *kueue.Workload { + return testing.MakeWorkload(workloadName, ns.Name). + PodSets( + *testing.MakePodSet("ps1", 3).Obj(), + *testing.MakePodSet("ps2", 3).Obj(), + ). + ReserveQuota( + testing.MakeAdmission("cluster-queue"). + PodSets(kueue.PodSetAssignment{Name: "ps1"}, kueue.PodSetAssignment{Name: "ps2"}). + Obj(), + ). + ReclaimablePods( + kueue.ReclaimablePod{Name: "ps1", Count: 2}, + kueue.ReclaimablePod{Name: "ps2", Count: 1}, + ). + Obj() + }, + false, + func(newWL *kueue.Workload) { + newWL.Status.AdmissionChecks = []kueue.AdmissionCheckState{ + { + PodSetUpdates: []kueue.PodSetUpdate{{Name: "ps1"}, {Name: "ps2"}}, + State: kueue.CheckStateReady, + }, + } + newWL.Status.ReclaimablePods = []kueue.ReclaimablePod{ + {Name: "ps1", Count: 0}, + {Name: "ps2", Count: 1}, + } + }, + gomega.Succeed(), + ), + ) ginkgo.It("Should forbid the change of spec.queueName of an admitted workload", func() { ginkgo.By("Creating and admitting a new Workload") @@ -183,7 +770,7 @@ var _ = ginkgo.Describe("Workload validating webhook", func() { gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(workload), &newWL)).To(gomega.Succeed()) newWL.Spec.QueueName = "queue2" return k8sClient.Update(ctx, &newWL) - }, util.Timeout, util.Interval).Should(testing.BeForbiddenError()) + }, util.Timeout, util.Interval).Should(testing.BeAPIError(testing.InvalidError)) }) ginkgo.It("Should forbid the change of spec.admission", func() { @@ -205,7 +792,7 @@ var _ = ginkgo.Describe("Workload validating webhook", func() { gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(workload), &newWL)).To(gomega.Succeed()) newWL.Status.Admission.ClusterQueue = "foo-cluster-queue" return k8sClient.Status().Update(ctx, &newWL) - }, util.Timeout, util.Interval).Should(testing.BeForbiddenError()) + }, util.Timeout, util.Interval).Should(testing.BeAPIError(testing.ForbiddenError)) }) @@ -213,8 +800,7 @@ var _ = ginkgo.Describe("Workload validating webhook", func() { ginkgo.By("Creating a new Workload") workload := testing.MakeWorkload(workloadName, ns.Name).PriorityClass("priority").Obj() err := k8sClient.Create(ctx, workload) - gomega.Expect(err).Should(gomega.HaveOccurred()) - gomega.Expect(errors.IsForbidden(err)).Should(gomega.BeTrue(), "error: %v", err) + gomega.Expect(err).Should(testing.BeAPIError(testing.InvalidError), "error: %v", err) }) ginkgo.It("workload's priority should be mutable when referencing WorkloadPriorityClass", func() { @@ -260,5 +846,114 @@ var _ = ginkgo.Describe("Workload validating webhook", func() { return finalQueueWorkload.Spec.Priority }, util.Timeout, util.Interval).Should(gomega.Equal(&updatedPriority)) }) + + ginkgo.It("admission should not be updated once set", func() { + ginkgo.By("Creating a new Workload") + workload := testing.MakeWorkload(workloadName, ns.Name).Obj() + gomega.Expect(k8sClient.Create(ctx, workload)).Should(gomega.Succeed()) + gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, workload, testing.MakeAdmission("cluster-queue").Obj())).Should(gomega.Succeed()) + + ginkgo.By("Updating the workload setting admission") + gomega.Eventually(func() error { + var newWL kueue.Workload + gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(workload), &newWL)).To(gomega.Succeed()) + newWL.Status.Admission = testing.MakeAdmission("cluster-queue").Assignment("on-demand", "5", "1").Obj() + return k8sClient.Status().Update(ctx, &newWL) + }, util.Timeout, util.Interval).Should(testing.BeAPIError(testing.ForbiddenError)) + }) + + ginkgo.It("reclaimable pod count can change up", func() { + ginkgo.By("Creating a new Workload") + wl := testing.MakeWorkload(workloadName, ns.Name). + PodSets( + *testing.MakePodSet("ps1", 3).Obj(), + *testing.MakePodSet("ps2", 3).Obj(), + ). + Obj() + gomega.Expect(k8sClient.Create(ctx, wl)).Should(gomega.Succeed()) + gomega.Expect(workload.UpdateReclaimablePods(ctx, k8sClient, wl, []kueue.ReclaimablePod{ + {Name: "ps1", Count: 1}, + })).Should(gomega.Succeed()) + + gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, wl, + testing.MakeAdmission("cluster-queue"). + PodSets( + kueue.PodSetAssignment{Name: "ps1"}, + kueue.PodSetAssignment{Name: "ps2"}). + Obj())).Should(gomega.Succeed()) + + ginkgo.By("Updating reclaimable pods") + err := workload.UpdateReclaimablePods(ctx, k8sClient, wl, []kueue.ReclaimablePod{ + {Name: "ps1", Count: 2}, + {Name: "ps2", Count: 1}, + }) + gomega.Expect(err).Should(gomega.Succeed()) + }) + + ginkgo.It("reclaimable pod count cannot change down", func() { + ginkgo.By("Creating a new Workload") + wl := testing.MakeWorkload(workloadName, ns.Name). + PodSets( + *testing.MakePodSet("ps1", 3).Obj(), + *testing.MakePodSet("ps2", 3).Obj(), + ). + Obj() + gomega.Expect(k8sClient.Create(ctx, wl)).Should(gomega.Succeed()) + gomega.Expect(workload.UpdateReclaimablePods(ctx, k8sClient, wl, []kueue.ReclaimablePod{ + {Name: "ps1", Count: 2}, + {Name: "ps2", Count: 1}, + })).Should(gomega.Succeed()) + + gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, wl, + testing.MakeAdmission("cluster-queue"). + PodSets( + kueue.PodSetAssignment{Name: "ps1"}, + kueue.PodSetAssignment{Name: "ps2"}). + Obj())).Should(gomega.Succeed()) + + ginkgo.By("Updating reclaimable pods") + err := workload.UpdateReclaimablePods(ctx, k8sClient, wl, []kueue.ReclaimablePod{ + {Name: "ps1", Count: 1}, + }) + gomega.Expect(err).Should(testing.BeAPIError(testing.ForbiddenError)) + }) + + ginkgo.It("podSetUpdates should be immutable when state is ready", func() { + ginkgo.By("Creating a new Workload") + wl := testing.MakeWorkload(workloadName, ns.Name). + PodSets( + *testing.MakePodSet("first", 1).Obj(), + *testing.MakePodSet("second", 1).Obj(), + ). + Obj() + gomega.Expect(k8sClient.Create(ctx, wl)).Should(gomega.Succeed()) + + ginkgo.By("Setting admission check state") + gomega.Eventually(func() error { + gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), wl)).To(gomega.Succeed()) + workload.SetAdmissionCheckState(&wl.Status.AdmissionChecks, kueue.AdmissionCheckState{ + Name: "ac1", + Message: "old", + LastTransitionTime: metav1.NewTime(time.Now()), + PodSetUpdates: []kueue.PodSetUpdate{{Name: "first", Labels: map[string]string{"foo": "bar"}}, {Name: "second"}}, + State: kueue.CheckStateReady, + }) + return k8sClient.Status().Update(ctx, wl) + }, util.Timeout, util.Interval).Should(gomega.Succeed()) + + ginkgo.By("Updating admission check state") + gomega.Eventually(func() error { + gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), wl)).To(gomega.Succeed()) + workload.SetAdmissionCheckState(&wl.Status.AdmissionChecks, kueue.AdmissionCheckState{ + Name: "ac1", + Message: "new", + LastTransitionTime: metav1.NewTime(time.Now()), + PodSetUpdates: []kueue.PodSetUpdate{{Name: "first", Labels: map[string]string{"foo": "baz"}}, {Name: "second"}}, + State: kueue.CheckStateReady, + }) + return k8sClient.Status().Update(ctx, wl) + }, util.Timeout, util.Interval).Should(testing.BeAPIError(testing.ForbiddenError)) + }) + }) }) From 37c0943ab03c141d2d559decd44f29a98e76ce6d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 03:11:19 -0700 Subject: [PATCH 45/49] Bump github.com/onsi/ginkgo/v2 from 2.17.1 to 2.17.2 (#2091) Bumps [github.com/onsi/ginkgo/v2](https://github.com/onsi/ginkgo) from 2.17.1 to 2.17.2. - [Release notes](https://github.com/onsi/ginkgo/releases) - [Changelog](https://github.com/onsi/ginkgo/blob/master/CHANGELOG.md) - [Commits](https://github.com/onsi/ginkgo/compare/v2.17.1...v2.17.2) --- updated-dependencies: - dependency-name: github.com/onsi/ginkgo/v2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 20 ++++++++++---------- go.sum | 41 ++++++++++++++++++++--------------------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/go.mod b/go.mod index c5622297bb..dadb4b4c6f 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/google/go-cmp v0.6.0 github.com/kubeflow/mpi-operator v0.5.0 github.com/kubeflow/training-operator v1.7.0 - github.com/onsi/ginkgo/v2 v2.17.1 + github.com/onsi/ginkgo/v2 v2.17.2 github.com/onsi/gomega v1.33.0 github.com/open-policy-agent/cert-controller v0.10.1 github.com/prometheus/client_golang v1.18.0 @@ -56,7 +56,7 @@ require ( github.com/go-openapi/jsonpointer v0.20.0 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect github.com/go-openapi/swag v0.22.4 // indirect - github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gobuffalo/flect v1.0.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect @@ -64,7 +64,7 @@ require ( github.com/google/cel-go v0.17.7 // indirect github.com/google/gnostic-models v0.6.8 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/pprof v0.0.0-20230323073829-e72429f035bd // indirect + github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect github.com/google/uuid v1.6.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect @@ -100,17 +100,17 @@ require ( go.opentelemetry.io/proto/otlp v1.0.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.21.0 // indirect + golang.org/x/crypto v0.22.0 // indirect golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect - golang.org/x/mod v0.14.0 // indirect - golang.org/x/net v0.23.0 // indirect + golang.org/x/mod v0.17.0 // indirect + golang.org/x/net v0.24.0 // indirect golang.org/x/oauth2 v0.12.0 // indirect - golang.org/x/sync v0.6.0 // indirect - golang.org/x/sys v0.18.0 // indirect - golang.org/x/term v0.18.0 // indirect + golang.org/x/sync v0.7.0 // indirect + golang.org/x/sys v0.19.0 // indirect + golang.org/x/term v0.19.0 // indirect golang.org/x/text v0.14.0 // indirect golang.org/x/time v0.3.0 // indirect - golang.org/x/tools v0.17.0 // indirect + golang.org/x/tools v0.20.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/appengine v1.6.8 // indirect google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d // indirect diff --git a/go.sum b/go.sum index 954c1391b5..26f89a815e 100644 --- a/go.sum +++ b/go.sum @@ -61,8 +61,8 @@ github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU= github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gobuffalo/flect v1.0.2 h1:eqjPGSo2WmjgY2XlpGwo2NXgL3RucAKo4k4qQMNA5sA= github.com/gobuffalo/flect v1.0.2/go.mod h1:A5msMlrHtLqh9umBSnvabjsMrCcCpAyzglnDvkbYKHs= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= @@ -93,8 +93,8 @@ github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/ github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20230323073829-e72429f035bd h1:r8yyd+DJDmsUhGrRBxH5Pj7KeFK5l+Y3FsgT8keqKtk= -github.com/google/pprof v0.0.0-20230323073829-e72429f035bd/go.mod h1:79YE0hCXdHag9sBkw2o+N/YnZtTkXi0UT9Nnixa5eYk= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= @@ -151,8 +151,8 @@ github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= -github.com/onsi/ginkgo/v2 v2.17.1 h1:V++EzdbhI4ZV4ev0UTIj0PzhzOcReJFyJaLjtSF55M8= -github.com/onsi/ginkgo/v2 v2.17.1/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= +github.com/onsi/ginkgo/v2 v2.17.2 h1:7eMhcy3GimbsA3hEnVKdw/PQM9XN9krpKVXsZdph0/g= +github.com/onsi/ginkgo/v2 v2.17.2/go.mod h1:nP2DPOQoNsQmsVyv5rDA8JkXQoCs6goXIvr/PRJ1eCc= github.com/onsi/gomega v1.33.0 h1:snPCflnZrpMsy94p4lXVEkHo12lmPnc3vY5XBbreexE= github.com/onsi/gomega v1.33.0/go.mod h1:+925n5YtiFsLzzafLUHzVMBpvvRAzrydIBiSIxjX3wY= github.com/open-policy-agent/cert-controller v0.10.1 h1:RXSYoyn8FdCenWecRP//UV5nbVfmstNpj4kHQFkvPK4= @@ -190,7 +190,6 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= @@ -250,31 +249,31 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= -golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= +golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0= -golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= +golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4= golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= -golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -285,12 +284,12 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= -golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= +golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= -golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q= +golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= @@ -305,8 +304,8 @@ golang.org/x/tools v0.0.0-20200505023115-26f46d2f7ef8/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= -golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= +golang.org/x/tools v0.20.0 h1:hz/CVckiOxybQvFw6h7b/q80NTr9IUQb4s1IIzW7KNY= +golang.org/x/tools v0.20.0/go.mod h1:WvitBU7JJf6A4jOdg4S1tviW9bhUxkgeCui/0JHctQg= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= From 5d4a7afcf69cefd93eade9761d917233b101ab25 Mon Sep 17 00:00:00 2001 From: Elad Dolev Date: Mon, 29 Apr 2024 15:35:24 +0300 Subject: [PATCH 46/49] fix(helm): wrong webhooks indentation (#2086) Signed-off-by: Elad Dolev --- charts/kueue/templates/webhook/webhook.yaml | 4 ++-- hack/update-helm.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/charts/kueue/templates/webhook/webhook.yaml b/charts/kueue/templates/webhook/webhook.yaml index 9255cc8db4..54cdbef013 100644 --- a/charts/kueue/templates/webhook/webhook.yaml +++ b/charts/kueue/templates/webhook/webhook.yaml @@ -178,7 +178,7 @@ webhooks: name: mpod.kb.io namespaceSelector: {{- if and (hasKey $integrationsConfig "podOptions") (hasKey ($integrationsConfig.podOptions) "namespaceSelector") }} - {{- toYaml $integrationsConfig.podOptions.namespaceSelector | nindent 4 -}} + {{- toYaml $integrationsConfig.podOptions.namespaceSelector | nindent 6 -}} {{- else }} matchExpressions: - key: kubernetes.io/metadata.name @@ -480,7 +480,7 @@ webhooks: name: vpod.kb.io namespaceSelector: {{- if and (hasKey $integrationsConfig "podOptions") (hasKey ($integrationsConfig.podOptions) "namespaceSelector") }} - {{- toYaml $integrationsConfig.podOptions.namespaceSelector | nindent 4 -}} + {{- toYaml $integrationsConfig.podOptions.namespaceSelector | nindent 6 -}} {{- else }} matchExpressions: - key: kubernetes.io/metadata.name diff --git a/hack/update-helm.sh b/hack/update-helm.sh index 4ae0aeee3e..9df51beffc 100755 --- a/hack/update-helm.sh +++ b/hack/update-helm.sh @@ -114,7 +114,7 @@ add_webhook_pod_mutate=$( name: mpod.kb.io namespaceSelector: {{- if and (hasKey $integrationsConfig "podOptions") (hasKey ($integrationsConfig.podOptions) "namespaceSelector") }} - {{- toYaml $integrationsConfig.podOptions.namespaceSelector | nindent 4 -}} + {{- toYaml $integrationsConfig.podOptions.namespaceSelector | nindent 6 -}} {{- else }} matchExpressions: - key: kubernetes.io/metadata.name @@ -135,7 +135,7 @@ add_webhook_pod_validate=$( name: vpod.kb.io namespaceSelector: {{- if and (hasKey $integrationsConfig "podOptions") (hasKey ($integrationsConfig.podOptions) "namespaceSelector") }} - {{- toYaml $integrationsConfig.podOptions.namespaceSelector | nindent 4 -}} + {{- toYaml $integrationsConfig.podOptions.namespaceSelector | nindent 6 -}} {{- else }} matchExpressions: - key: kubernetes.io/metadata.name From b5d3b4d002f4d6a507e18a42d2e9abe892f1f36a Mon Sep 17 00:00:00 2001 From: Elad Dolev Date: Mon, 29 Apr 2024 16:00:14 +0300 Subject: [PATCH 47/49] fix(helm): wrong secret name in certificate (#2087) Signed-off-by: Elad Dolev --- charts/kueue/templates/certmanager/certificate.yaml | 6 +++--- charts/kueue/templates/internalcert/secret.yaml | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/charts/kueue/templates/certmanager/certificate.yaml b/charts/kueue/templates/certmanager/certificate.yaml index e9c3d119e6..b874edc7ee 100644 --- a/charts/kueue/templates/certmanager/certificate.yaml +++ b/charts/kueue/templates/certmanager/certificate.yaml @@ -19,9 +19,9 @@ metadata: spec: dnsNames: - '{{ include "kueue.fullname" . }}-webhook-service.{{ .Release.Namespace }}.svc' - - '{{ include "kueue.fullname" . }}-webhook-service.{{ .Release.Namespace }}.svc.{{ .Values.kubernetesClusterDomain}}' + - '{{ include "kueue.fullname" . }}-webhook-service.{{ .Release.Namespace }}.svc.{{ .Values.kubernetesClusterDomain }}' issuerRef: kind: Issuer - name: '{{ include "kueue.fullname" . }}-selfsigned-issuer' - secretName: webhook-server-cert + name: {{ include "kueue.fullname" . }}-selfsigned-issuer + secretName: {{ include "kueue.fullname" . }}-webhook-server-cert {{- end }} diff --git a/charts/kueue/templates/internalcert/secret.yaml b/charts/kueue/templates/internalcert/secret.yaml index 77c7f0a97a..9c8da2c481 100644 --- a/charts/kueue/templates/internalcert/secret.yaml +++ b/charts/kueue/templates/internalcert/secret.yaml @@ -1,3 +1,4 @@ +{{- if not .Values.enableCertManager }} apiVersion: v1 kind: Secret metadata: @@ -5,3 +6,4 @@ metadata: namespace: '{{ .Release.Namespace }}' labels: {{- include "kueue.labels" . | nindent 4 }} +{{- end }} From d9a5fd35574246f0bcc7403d9c8b8bb207125d02 Mon Sep 17 00:00:00 2001 From: Mykhailo Bobrovskyi Date: Mon, 29 Apr 2024 18:05:51 +0300 Subject: [PATCH 48/49] [scalability] Changed binary directory of scalability runner and minikueue. (#2079) --- Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 9f22c73847..b9a73f936b 100644 --- a/Makefile +++ b/Makefile @@ -213,14 +213,15 @@ run-test-multikueue-e2e-%: FORCE @echo Running multikueue e2e for k8s ${K8S_VERSION} E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" JOBSET_VERSION=$(JOBSET_VERSION) ./hack/multikueue-e2e-test.sh -SCALABILITY_RUNNER := $(ARTIFACTS)/performance-scheduler-runner +SCALABILITY_RUNNER := $(PROJECT_DIR)/bin/performance-scheduler-runner .PHONY: performance-scheduler-runner performance-scheduler-runner: $(GO_BUILD_ENV) $(GO_CMD) build -ldflags="$(LD_FLAGS)" -o $(SCALABILITY_RUNNER) test/performance/scheduler/runner/main.go +MINIMALKUEUE_RUNNER := $(PROJECT_DIR)/bin/minimalkueue .PHONY: minimalkueue minimalkueue: - $(GO_BUILD_ENV) $(GO_CMD) build -ldflags="$(LD_FLAGS)" -o $(ARTIFACTS)/minimalkueue test/performance/scheduler/minimalkueue/main.go + $(GO_BUILD_ENV) $(GO_CMD) build -ldflags="$(LD_FLAGS)" -o $(MINIMALKUEUE_RUNNER) test/performance/scheduler/minimalkueue/main.go ifdef SCALABILITY_CPU_PROFILE SCALABILITY_EXTRA_ARGS += --withCPUProfile=true @@ -249,7 +250,7 @@ run-performance-scheduler: envtest performance-scheduler-runner minimalkueue --o $(SCALABILITY_RUN_DIR) \ --crds=$(PROJECT_DIR)/config/components/crd/bases \ --generatorConfig=$(SCALABILITY_GENERATOR_CONFIG) \ - --minimalKueue=$(ARTIFACTS)/minimalkueue $(SCALABILITY_EXTRA_ARGS) $(SCALABILITY_SCRAPE_ARGS) + --minimalKueue=$(MINIMALKUEUE_RUNNER) $(SCALABILITY_EXTRA_ARGS) $(SCALABILITY_SCRAPE_ARGS) .PHONY: test-performance-scheduler test-performance-scheduler: gotestsum run-performance-scheduler From 28d9bd0fc777de37ef32f3354cdb63e61a71e220 Mon Sep 17 00:00:00 2001 From: vladikkuzn <51460778+vladikkuzn@users.noreply.github.com> Date: Mon, 29 Apr 2024 23:17:33 +0300 Subject: [PATCH 49/49] * Guarantee a pod failure (#2088) --- test/e2e/singlecluster/e2e_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/test/e2e/singlecluster/e2e_test.go b/test/e2e/singlecluster/e2e_test.go index 029a53551e..98b36b1e37 100644 --- a/test/e2e/singlecluster/e2e_test.go +++ b/test/e2e/singlecluster/e2e_test.go @@ -156,6 +156,7 @@ var _ = ginkgo.Describe("Kueue", func() { sampleJob = (&testingjob.JobWrapper{Job: *sampleJob}). Label(constants.PrebuiltWorkloadLabel, "prebuilt-wl"). BackoffLimit(0). + Image("gcr.io/k8s-staging-perf-tests/sleep:v0.1.0", []string{"-termination-code=1", "10m"}). TerminationGracePeriod(1). Obj() testingjob.SetContainerDefaults(&sampleJob.Spec.Template.Spec.Containers[0])