From e55bb8c9cecf0e23ce248879ece764a1b4a6eccd Mon Sep 17 00:00:00 2001
From: vladikkuzn <51460778+vladikkuzn@users.noreply.github.com>
Date: Thu, 18 Apr 2024 16:51:50 +0300
Subject: [PATCH 01/49] Non-admitted workloads with QuotaReserved condition are
shown as Admitted by kubectl (#1991)
* * Rename "Admitted by" to "Reserving in" printed column
* Add "Admitted" printed column
* * Output Admitted column without -wide
* * Remove priority=0 as it's default
* * Documentation update
* Rename "Reserving in" to "Reserved in"
---
apis/kueue/v1beta1/workload_types.go | 7 ++++---
charts/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml | 8 ++++++--
config/components/crd/bases/kueue.x-k8s.io_workloads.yaml | 8 ++++++--
site/content/en/docs/tasks/run/jobs.md | 8 ++++----
.../en/docs/tasks/troubleshooting/troubleshooting_jobs.md | 8 ++++----
5 files changed, 24 insertions(+), 15 deletions(-)
diff --git a/apis/kueue/v1beta1/workload_types.go b/apis/kueue/v1beta1/workload_types.go
index fc8c48c95d..698787d8c1 100644
--- a/apis/kueue/v1beta1/workload_types.go
+++ b/apis/kueue/v1beta1/workload_types.go
@@ -330,9 +330,10 @@ const (
// +kubebuilder:object:root=true
// +kubebuilder:storageversion
// +kubebuilder:subresource:status
-// +kubebuilder:printcolumn:name="Queue",JSONPath=".spec.queueName",type=string,description="Name of the queue this workload was submitted to"
-// +kubebuilder:printcolumn:name="Admitted by",JSONPath=".status.admission.clusterQueue",type=string,description="Name of the ClusterQueue that admitted this workload"
-// +kubebuilder:printcolumn:name="Age",JSONPath=".metadata.creationTimestamp",type=date,description="Time this workload was created"
+// +kubebuilder:printcolumn:name="Queue",JSONPath=".spec.queueName",type="string",description="Name of the queue this workload was submitted to"
+// +kubebuilder:printcolumn:name="Reserved in",JSONPath=".status.admission.clusterQueue",type="string",description="Name of the ClusterQueue where the workload is reserving quota"
+// +kubebuilder:printcolumn:name="Admitted",JSONPath=".status.conditions[?(@.type=='Admitted')].status",type="string",description="Admission status"
+// +kubebuilder:printcolumn:name="Age",JSONPath=".metadata.creationTimestamp",type="date",description="Time this workload was created"
// +kubebuilder:resource:shortName={wl}
// Workload is the Schema for the workloads API
diff --git a/charts/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml b/charts/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml
index 8697489777..3f2afaa75a 100644
--- a/charts/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml
+++ b/charts/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml
@@ -36,9 +36,13 @@ spec:
jsonPath: .spec.queueName
name: Queue
type: string
- - description: Name of the ClusterQueue that admitted this workload
+ - description: Name of the ClusterQueue where the workload is reserving quota
jsonPath: .status.admission.clusterQueue
- name: Admitted by
+ name: Reserved in
+ type: string
+ - description: Admission status
+ jsonPath: .status.conditions[?(@.type=='Admitted')].status
+ name: Admitted
type: string
- description: Time this workload was created
jsonPath: .metadata.creationTimestamp
diff --git a/config/components/crd/bases/kueue.x-k8s.io_workloads.yaml b/config/components/crd/bases/kueue.x-k8s.io_workloads.yaml
index 095dcb1072..9bc7c62a89 100644
--- a/config/components/crd/bases/kueue.x-k8s.io_workloads.yaml
+++ b/config/components/crd/bases/kueue.x-k8s.io_workloads.yaml
@@ -21,9 +21,13 @@ spec:
jsonPath: .spec.queueName
name: Queue
type: string
- - description: Name of the ClusterQueue that admitted this workload
+ - description: Name of the ClusterQueue where the workload is reserving quota
jsonPath: .status.admission.clusterQueue
- name: Admitted by
+ name: Reserved in
+ type: string
+ - description: Admission status
+ jsonPath: .status.conditions[?(@.type=='Admitted')].status
+ name: Admitted
type: string
- description: Time this workload was created
jsonPath: .metadata.creationTimestamp
diff --git a/site/content/en/docs/tasks/run/jobs.md b/site/content/en/docs/tasks/run/jobs.md
index 86121ed6bb..9d5d2e8112 100644
--- a/site/content/en/docs/tasks/run/jobs.md
+++ b/site/content/en/docs/tasks/run/jobs.md
@@ -77,8 +77,8 @@ kubectl -n default get workloads
The output will be similar to the following:
```shell
-NAME QUEUE ADMITTED BY AGE
-sample-job-sl4bm user-queue 1s
+NAME QUEUE RESERVED IN ADMITTED AGE
+sample-job-sl4bm user-queue 1s
```
## 3. (Optional) Monitor the status of the workload
@@ -124,8 +124,8 @@ kubectl -n default get workloads
The output is similar to the following:
```shell
-NAME QUEUE ADMITTED BY AGE
-sample-job-sl4bm user-queue cluster-queue 45s
+NAME QUEUE RESERVED IN ADMITTED AGE
+sample-job-sl4bm user-queue cluster-queue True 1s
```
To view the event for the Workload admission, run the following command:
diff --git a/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md b/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md
index c3411049d0..db8dc8b7dc 100644
--- a/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md
+++ b/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md
@@ -41,8 +41,8 @@ Job is called `my-job` in the `my-namespace` namespace.
The output looks like the following:
```
- NAME QUEUE ADMITTED BY AGE
- job-my-job-19797 user-queue cluster-queue 9m45s
+ NAME QUEUE RESERVED IN ADMITTED AGE
+ job-my-job-19797 user-queue cluster-queue True 9m45s
```
3. You can list all of the workloads in the same namespace of your job and identify the one
@@ -56,8 +56,8 @@ Job is called `my-job` in the `my-namespace` namespace.
The output looks like the following:
```
- NAME QUEUE ADMITTED BY AGE
- job-my-job-19797 user-queue cluster-queue 9m45s
+ NAME QUEUE RESERVED IN ADMITTED AGE
+ job-my-job-19797 user-queue cluster-queue True 9m45s
```
## Is my Job running?
From 472ce6d2c6bbfab2903db065dbb8f117415bc8d5 Mon Sep 17 00:00:00 2001
From: Vanessasaurus <814322+vsoch@users.noreply.github.com>
Date: Thu, 18 Apr 2024 08:45:59 -0600
Subject: [PATCH 02/49] docs: add troubleshooting resource requests (#2001)
* docs: add note about resources matching cluster-queue
Problem: the troubleshooting guide should demonstrate how to
debug the case where jobs are not admitted.
Solution: add a small section to show that resource types
need to match resource requests, and other small debug tips.
Signed-off-by: vsoch
* fix: typos in provisioning and troubleshooting
Signed-off-by: vsoch
* fix: code indent
Signed-off-by: vsoch
* review: aldo
Signed-off-by: vsoch
---------
Signed-off-by: vsoch
Co-authored-by: vsoch
---
.../provisioning.md | 2 +-
.../troubleshooting/troubleshooting_jobs.md | 38 +++++++++++++++++++
.../troubleshooting/troubleshooting_pods.md | 2 +-
3 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/site/content/en/docs/admission-check-controllers/provisioning.md b/site/content/en/docs/admission-check-controllers/provisioning.md
index aec9c66250..620261400c 100644
--- a/site/content/en/docs/admission-check-controllers/provisioning.md
+++ b/site/content/en/docs/admission-check-controllers/provisioning.md
@@ -15,7 +15,7 @@ The Provisioning Admission Check Controller is supported on [Kubernetes cluster-
## Usage
To use the Provisioning AdmissionCheck, create an [AdmissionCheck](docs/concepts/admission_check)
-with `kueue.x-k8s.io/provisioning-request` as a `.spec.controllerName` and create a ProvisioningRequest configuration usign a `ProvisioningRequestConfig` object. See an example below.
+with `kueue.x-k8s.io/provisioning-request` as a `.spec.controllerName` and create a ProvisioningRequest configuration using a `ProvisioningRequestConfig` object. See an example below.
## ProvisioningRequest configuration
diff --git a/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md b/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md
index db8dc8b7dc..c517b15186 100644
--- a/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md
+++ b/site/content/en/docs/tasks/troubleshooting/troubleshooting_jobs.md
@@ -133,6 +133,44 @@ status:
type: QuotaReserved
```
+### Does my ClusterQueue have the resource requests that the job requires?
+
+When you submit a job that has a resource request, for example:
+
+```bash
+$ kubectl get jobs job-0-9-size-6 -o json | jq -r .spec.template.spec.containers[0].resources
+```
+```console
+{
+ "limits": {
+ "cpu": "2"
+ },
+ "requests": {
+ "cpu": "2"
+ }
+}
+```
+
+If your ClusterQueue does not have a definition for the `requests`, Kueue cannot admit the job. For the job above, you should define `cpu` quotas under `resourceGroups`. A ClusterQueue defining `cpu` quota looks like the following:
+
+```yaml
+apiVersion: kueue.x-k8s.io/v1beta1
+kind: ClusterQueue
+metadata:
+ name: "cluster-queue"
+spec:
+ namespaceSelector: {}
+ resourceGroups:
+ - coveredResources: ["cpu"]
+ flavors:
+ - name: "default-flavor"
+ resources:
+ - name: "cpu"
+ nominalQuota: 40
+```
+
+See [resources groups](https://kueue.sigs.k8s.io/docs/concepts/cluster_queue/#resource-groups) for more information.
+
### Unattempted Workload
When using a [ClusterQueue](/docs/concepts/cluster_queue) with the `StrictFIFO`
diff --git a/site/content/en/docs/tasks/troubleshooting/troubleshooting_pods.md b/site/content/en/docs/tasks/troubleshooting/troubleshooting_pods.md
index 3dc4f7f8d0..0dbfc6ff37 100644
--- a/site/content/en/docs/tasks/troubleshooting/troubleshooting_pods.md
+++ b/site/content/en/docs/tasks/troubleshooting/troubleshooting_pods.md
@@ -78,7 +78,7 @@ Events:
## Why did my Pod disappear?
When you enable [preemption](/docs/concepts/cluster_queue/#preemption), Kueue might preempt Pods
-to accomodate higher priority jobs or reclaim quota. Preemption is implemented via `DELETE` calls,
+to accommodate higher priority jobs or reclaim quota. Preemption is implemented via `DELETE` calls,
the standard way of terminating a Pod in Kubernetes.
When using single Pods, Kubernetes will delete Workload object along with the Pod, as there is
From df18528c880b211190949fa485cc6ce4effaef64 Mon Sep 17 00:00:00 2001
From: Marty Mcfly
Date: Fri, 19 Apr 2024 14:02:21 +0800
Subject: [PATCH 03/49] site: fix kubectl command of tasks example (#2013)
---
site/content/en/docs/tasks/run/plain_pods.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/site/content/en/docs/tasks/run/plain_pods.md b/site/content/en/docs/tasks/run/plain_pods.md
index 66ff545526..314daf70bf 100644
--- a/site/content/en/docs/tasks/run/plain_pods.md
+++ b/site/content/en/docs/tasks/run/plain_pods.md
@@ -107,7 +107,7 @@ Here is a sample Pod that just sleeps for a few seconds:
You can create the Pod using the following command:
```sh
# Create the pod
-kubectl apply -f kueue-pod.yaml
+kubectl create -f kueue-pod.yaml
```
## Running a group of Pods to be admitted together
@@ -164,7 +164,7 @@ Here is a sample Pod group that just sleeps for a few seconds:
You can create the Pod group using the following command:
```sh
-kubectl apply -f kueue-pod-group.yaml
+kubectl create -f kueue-pod-group.yaml
```
The name of the associated Workload created by Kueue equals the name of the Pod
From 69fb8d317c064591539b066a26fa4b3d959c65ab Mon Sep 17 00:00:00 2001
From: Oleksandr Redko
Date: Fri, 19 Apr 2024 09:30:56 +0300
Subject: [PATCH 04/49] Fix deprecated comment for constants.QueueAnnotation
(#1976)
---
.golangci.yaml | 7 ++++++-
pkg/controller/constants/constants.go | 2 +-
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/.golangci.yaml b/.golangci.yaml
index 8daf73f55a..b4ba30a602 100644
--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -11,7 +11,6 @@ linters-settings:
- assignOp
- captLocal
- commentFormatting
- - deprecatedComment
- elseif
- exitAfterDefer
- ifElseChain
@@ -38,6 +37,12 @@ issues:
# Which dirs to exclude: issues from them won't be reported
exclude-dirs:
- bin
+ # Excluding configuration per-path, per-linter, per-text and per-source
+ exclude-rules:
+ - linters:
+ - staticcheck
+ # TODO(#768): Drop when incrementing the API version.
+ text: "SA1019: constants.QueueAnnotation is deprecated"
# Show all issues from a linter
max-issues-per-linter: 0
# Show all issues with the same text
diff --git a/pkg/controller/constants/constants.go b/pkg/controller/constants/constants.go
index 0287cc4154..718f9ad251 100644
--- a/pkg/controller/constants/constants.go
+++ b/pkg/controller/constants/constants.go
@@ -22,7 +22,7 @@ const (
// QueueAnnotation is the annotation key in the workload that holds the queue name.
//
- // DEPRECATED: Use QueueLabel as a label key.
+ // Deprecated: Use QueueLabel as a label key.
QueueAnnotation = QueueLabel
// PrebuiltWorkloadLabel is the label key of the job holding the name of the pre-built workload to use.
From a4354c359df7c39a2bc6fe9ba25c99fa78697399 Mon Sep 17 00:00:00 2001
From: jiangjiang <86391540+googs1025@users.noreply.github.com>
Date: Fri, 19 Apr 2024 15:07:41 +0800
Subject: [PATCH 05/49] fix: website jobset url (#2011)
---
site/content/en/docs/tasks/run/jobsets.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/site/content/en/docs/tasks/run/jobsets.md b/site/content/en/docs/tasks/run/jobsets.md
index e91e48e22c..5d57953578 100644
--- a/site/content/en/docs/tasks/run/jobsets.md
+++ b/site/content/en/docs/tasks/run/jobsets.md
@@ -15,11 +15,11 @@ This guide is for [batch users](/docs/tasks#batch-user) that have a basic unders
1. Check [Administer cluster quotas](/docs/tasks/manage/administer_cluster_quotas) for details on the initial Kueue setup.
-2. See [JobSet Installation](https://github.com/kubernetes-sigs/jobset/blob/main/docs/setup/install.md) for installation and configuration details of JobSet Operator.
+2. See [JobSet Installation](https://jobset.sigs.k8s.io/docs/installation/) for installation and configuration details of JobSet Operator.
## JobSet definition
-When running [JobSets](https://github.com/kubernetes-sigs/jobset/blob/main/docs/concepts/README.md) on
+When running [JobSets](https://jobset.sigs.k8s.io/docs/concepts/) on
Kueue, take into consideration the following aspects:
### a. Queue selection
From 4e5eb4366249b1a483ef5d4244b966fa6d0aca0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?=
Date: Fri, 19 Apr 2024 12:13:05 +0200
Subject: [PATCH 06/49] Fix config for e2e tests (#2017)
---
test/e2e/config/controller_manager_config.yaml | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/test/e2e/config/controller_manager_config.yaml b/test/e2e/config/controller_manager_config.yaml
index a3b5950bdb..b21739e9a2 100644
--- a/test/e2e/config/controller_manager_config.yaml
+++ b/test/e2e/config/controller_manager_config.yaml
@@ -5,10 +5,11 @@ leaderElection:
controller:
groupKindConcurrency:
Job.batch: 5
+ Pod: 5
+ Workload.kueue.x-k8s.io: 5
LocalQueue.kueue.x-k8s.io: 1
ClusterQueue.kueue.x-k8s.io: 1
ResourceFlavor.kueue.x-k8s.io: 1
- Workload.kueue.x-k8s.io: 1
clientConnection:
qps: 50
burst: 100
@@ -24,8 +25,3 @@ integrations:
- "kubeflow.org/tfjob"
- "kubeflow.org/xgboostjob"
- "pod"
-controller:
- groupKindConcurrency:
- Job.batch: 5
- Pod.: 5
- Workload.kueue.x-k8s.io: 5
From bfb148b97c43a306ded8e85a2655d240f289f178 Mon Sep 17 00:00:00 2001
From: Christian Zaccaria <73656840+ChristianZaccaria@users.noreply.github.com>
Date: Fri, 19 Apr 2024 11:33:11 +0100
Subject: [PATCH 07/49] CVE fixes - Upgrade mpi-operator (#1989)
* CVE fixes - Upgrading mpi-operator
* Adjust type for mpi-operator v0.5.0
---
go.mod | 35 +++++-----
go.sum | 70 +++++++++----------
.../jobs/mpijob/mpijob_controller_test.go | 13 ++--
.../testingjobs/mpijob/wrappers_mpijob.go | 3 +-
4 files changed, 58 insertions(+), 63 deletions(-)
diff --git a/go.mod b/go.mod
index 307e28448c..ee0d082ba9 100644
--- a/go.mod
+++ b/go.mod
@@ -5,8 +5,7 @@ go 1.22
require (
github.com/go-logr/logr v1.4.1
github.com/google/go-cmp v0.6.0
- github.com/kubeflow/common v0.4.7
- github.com/kubeflow/mpi-operator v0.4.0
+ github.com/kubeflow/mpi-operator v0.5.0
github.com/kubeflow/training-operator v1.7.0
github.com/onsi/ginkgo/v2 v2.17.1
github.com/onsi/gomega v1.32.0
@@ -16,18 +15,18 @@ require (
github.com/ray-project/kuberay/ray-operator v1.1.0
github.com/spf13/cobra v1.8.0
go.uber.org/zap v1.27.0
- k8s.io/api v0.29.3
- k8s.io/apimachinery v0.29.3
- k8s.io/apiserver v0.29.3
+ k8s.io/api v0.29.4
+ k8s.io/apimachinery v0.29.4
+ k8s.io/apiserver v0.29.4
k8s.io/autoscaler/cluster-autoscaler/apis v0.0.0-20240325113845-0130d33747bb
- k8s.io/client-go v0.29.3
- k8s.io/code-generator v0.29.3
- k8s.io/component-base v0.29.3
- k8s.io/component-helpers v0.29.3
+ k8s.io/client-go v0.29.4
+ k8s.io/code-generator v0.29.4
+ k8s.io/component-base v0.29.4
+ k8s.io/component-helpers v0.29.4
k8s.io/klog/v2 v2.110.1
k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00
- k8s.io/metrics v0.29.3
- k8s.io/utils v0.0.0-20230726121419-3b25d923346b
+ k8s.io/metrics v0.29.4
+ k8s.io/utils v0.0.0-20240102154912-e7106e64919e
sigs.k8s.io/controller-runtime v0.17.3
sigs.k8s.io/controller-tools v0.14.0
sigs.k8s.io/jobset v0.5.0
@@ -50,7 +49,7 @@ require (
github.com/evanphx/json-patch v5.6.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.8.0 // indirect
github.com/fatih/color v1.16.0 // indirect
- github.com/felixge/httpsnoop v1.0.3 // indirect
+ github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-logr/zapr v1.3.0 // indirect
@@ -91,7 +90,7 @@ require (
go.etcd.io/etcd/client/v3 v3.5.11 // indirect
go.etcd.io/etcd/server/v3 v3.5.11 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.0 // indirect
- go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0 // indirect
+ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.0 // indirect
go.opentelemetry.io/otel v1.20.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.20.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.20.0 // indirect
@@ -101,14 +100,14 @@ require (
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
go.uber.org/atomic v1.11.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
- golang.org/x/crypto v0.18.0 // indirect
+ golang.org/x/crypto v0.21.0 // indirect
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
golang.org/x/mod v0.14.0 // indirect
- golang.org/x/net v0.20.0 // indirect
+ golang.org/x/net v0.23.0 // indirect
golang.org/x/oauth2 v0.12.0 // indirect
golang.org/x/sync v0.6.0 // indirect
- golang.org/x/sys v0.16.0 // indirect
- golang.org/x/term v0.16.0 // indirect
+ golang.org/x/sys v0.18.0 // indirect
+ golang.org/x/term v0.18.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.3.0 // indirect
golang.org/x/tools v0.17.0 // indirect
@@ -125,7 +124,7 @@ require (
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apiextensions-apiserver v0.29.2 // indirect
k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01 // indirect
- k8s.io/kms v0.29.3 // indirect
+ k8s.io/kms v0.29.4 // indirect
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.28.0 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
)
diff --git a/go.sum b/go.sum
index b9ca19d18f..8dd1eabf2a 100644
--- a/go.sum
+++ b/go.sum
@@ -40,8 +40,8 @@ github.com/evanphx/json-patch/v5 v5.8.0 h1:lRj6N9Nci7MvzrXuX6HFzU8XjmhPiXPlsKEy1
github.com/evanphx/json-patch/v5 v5.8.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ=
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
-github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk=
-github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
+github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
+github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU=
@@ -127,10 +127,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
-github.com/kubeflow/common v0.4.7 h1:zz6QS4k2u2FY838M/FjOtwjJq39MRZVZcvPahRYL97M=
-github.com/kubeflow/common v0.4.7/go.mod h1:43MAof/uhpJA2C0urynqatE3oKFQc7m2HLmJty7waqY=
-github.com/kubeflow/mpi-operator v0.4.0 h1:PS4jLoMuRyrk/DHuYkI0D46sQQYpQt375HjOV4KVMFs=
-github.com/kubeflow/mpi-operator v0.4.0/go.mod h1:/A4mTy/RYh2UIgaGUiXUaW70eThjsogu80WbbcZpuMg=
+github.com/kubeflow/mpi-operator v0.5.0 h1:XvBwyXXQ9103DNMa22sxsaQlaktvaT2LY/g0UniGn5U=
+github.com/kubeflow/mpi-operator v0.5.0/go.mod h1:SeZQJW8KJxSTWD++eQYKRFpoDg1v8yrdC6fjx2/3mG0=
github.com/kubeflow/training-operator v1.7.0 h1:Zh61GlOWrlRi4UFOtJeV+/5REo/OndhwQ25KYd0llzc=
github.com/kubeflow/training-operator v1.7.0/go.mod h1:BZCLX1h06wY3YSeSZZcGYAqI9/nVi7isVCRkfgZe9nE=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
@@ -224,8 +222,8 @@ go.etcd.io/etcd/server/v3 v3.5.11 h1:FEa0ImvoXdIPa81/vZUKpnJ74fpQ5ZivseoIKMPzfpg
go.etcd.io/etcd/server/v3 v3.5.11/go.mod h1:CS0+TwcuRlhg1I5CpA3YlisOcoqJB1h1GMRgje75uDs=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.0 h1:PzIubN4/sjByhDRHLviCjJuweBXWFZWhghjg7cS28+M=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.0/go.mod h1:Ct6zzQEuGK3WpJs2n4dn+wfJYzd/+hNnxMRTWjGn30M=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0 h1:KfYpVmrjI7JuToy5k8XV3nkapjWx48k4E4JOtVstzQI=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0/go.mod h1:SeQhzAEccGVZVEy7aH87Nh0km+utSpo1pTv6eMMop48=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.0 h1:1eHu3/pUSWaOgltNK3WJFaywKsTIr/PwvHyDmi0lQA0=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.0/go.mod h1:HyABWq60Uy1kjJSa2BVOxUVao8Cdick5AWSKPutqy6U=
go.opentelemetry.io/otel v1.20.0 h1:vsb/ggIY+hUjD/zCAQHpzTmndPqv/ml2ArbsbfBYTAc=
go.opentelemetry.io/otel v1.20.0/go.mod h1:oUIGj3D77RwJdM6PPZImDpSZGDvkD9fhesHny69JFrs=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.20.0 h1:DeFD0VgTZ+Cj6hxravYYZE2W4GlneVH81iAOPjZkzk8=
@@ -252,8 +250,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc=
-golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
+golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
+golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g=
golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
@@ -267,8 +265,8 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo=
-golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
+golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
+golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4=
golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -287,12 +285,12 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
-golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
+golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE=
-golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
+golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8=
+golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
@@ -345,39 +343,39 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-k8s.io/api v0.29.3 h1:2ORfZ7+bGC3YJqGpV0KSDDEVf8hdGQ6A03/50vj8pmw=
-k8s.io/api v0.29.3/go.mod h1:y2yg2NTyHUUkIoTC+phinTnEa3KFM6RZ3szxt014a80=
+k8s.io/api v0.29.4 h1:WEnF/XdxuCxdG3ayHNRR8yH3cI1B/llkWBma6bq4R3w=
+k8s.io/api v0.29.4/go.mod h1:DetSv0t4FBTcEpfA84NJV3g9a7+rSzlUHk5ADAYHUv0=
k8s.io/apiextensions-apiserver v0.29.2 h1:UK3xB5lOWSnhaCk0RFZ0LUacPZz9RY4wi/yt2Iu+btg=
k8s.io/apiextensions-apiserver v0.29.2/go.mod h1:aLfYjpA5p3OwtqNXQFkhJ56TB+spV8Gc4wfMhUA3/b8=
-k8s.io/apimachinery v0.29.3 h1:2tbx+5L7RNvqJjn7RIuIKu9XTsIZ9Z5wX2G22XAa5EU=
-k8s.io/apimachinery v0.29.3/go.mod h1:hx/S4V2PNW4OMg3WizRrHutyB5la0iCUbZym+W0EQIU=
-k8s.io/apiserver v0.29.3 h1:xR7ELlJ/BZSr2n4CnD3lfA4gzFivh0wwfNfz9L0WZcE=
-k8s.io/apiserver v0.29.3/go.mod h1:hrvXlwfRulbMbBgmWRQlFru2b/JySDpmzvQwwk4GUOs=
+k8s.io/apimachinery v0.29.4 h1:RaFdJiDmuKs/8cm1M6Dh1Kvyh59YQFDcFuFTSmXes6Q=
+k8s.io/apimachinery v0.29.4/go.mod h1:i3FJVwhvSp/6n8Fl4K97PJEP8C+MM+aoDq4+ZJBf70Y=
+k8s.io/apiserver v0.29.4 h1:wPwGOO58GQOpRiZu59P5eRoDcB7QtV+QBglkRiXwCiM=
+k8s.io/apiserver v0.29.4/go.mod h1:VqTF9t98HVfhKZVRohCPezsdUt9u2g3bHKftxGcXoRo=
k8s.io/autoscaler/cluster-autoscaler/apis v0.0.0-20240325113845-0130d33747bb h1:ycQ/tSpcJEUHHx0pv6MXdq4NcRflCvFX6SMwmKROiis=
k8s.io/autoscaler/cluster-autoscaler/apis v0.0.0-20240325113845-0130d33747bb/go.mod h1:LPhCVj3E5Lp9W6HGVlW664m/X+KN2firfF3wtBBji54=
-k8s.io/client-go v0.29.3 h1:R/zaZbEAxqComZ9FHeQwOh3Y1ZUs7FaHKZdQtIc2WZg=
-k8s.io/client-go v0.29.3/go.mod h1:tkDisCvgPfiRpxGnOORfkljmS+UrW+WtXAy2fTvXJB0=
-k8s.io/code-generator v0.29.3 h1:m7E25/t9R9NvejspO2zBdyu+/Gl0Z5m7dCRc680KS14=
-k8s.io/code-generator v0.29.3/go.mod h1:x47ofBhN4gxYFcxeKA1PYXeaPreAGaDN85Y/lNUsPoM=
-k8s.io/component-base v0.29.3 h1:Oq9/nddUxlnrCuuR2K/jp6aflVvc0uDvxMzAWxnGzAo=
-k8s.io/component-base v0.29.3/go.mod h1:Yuj33XXjuOk2BAaHsIGHhCKZQAgYKhqIxIjIr2UXYio=
-k8s.io/component-helpers v0.29.3 h1:1dqZswuZgT2ZMixYeORyCUOAApXxgsvjVSgfoUT+P4o=
-k8s.io/component-helpers v0.29.3/go.mod h1:yiDqbRQrnQY+sPju/bL7EkwDJb6LVOots53uZNMZBos=
+k8s.io/client-go v0.29.4 h1:79ytIedxVfyXV8rpH3jCBW0u+un0fxHDwX5F9K8dPR8=
+k8s.io/client-go v0.29.4/go.mod h1:kC1thZQ4zQWYwldsfI088BbK6RkxK+aF5ebV8y9Q4tk=
+k8s.io/code-generator v0.29.4 h1:8ESudFNbY5/9BzB8KOEFG2uV9Q0AQxkc4mrQESr30Ks=
+k8s.io/code-generator v0.29.4/go.mod h1:7TYnI0dYItL2cKuhhgPSuF3WED9uMdELgbVXFfn/joE=
+k8s.io/component-base v0.29.4 h1:xeKzuuHI/1tjleu5jycDAcYbhAxeGHCQBZUY2eRIkOo=
+k8s.io/component-base v0.29.4/go.mod h1:pYjt+oEZP9gtmwSikwAJgfSBikqKX2gOqRat0QjmQt0=
+k8s.io/component-helpers v0.29.4 h1:lbVFhywtv64KlaIYTKszkHaFAqwCjNn7xyRTeWorzfI=
+k8s.io/component-helpers v0.29.4/go.mod h1:rMOVMGYEju7/GKMV0USfYAYJBIQdxlMMN1VFl/Mf2so=
k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01 h1:pWEwq4Asjm4vjW7vcsmijwBhOr1/shsbSYiWXmNGlks=
k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E=
k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y=
k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0=
k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo=
-k8s.io/kms v0.29.3 h1:ReljsAUhYlm2spdT4yXmY+9a8x8dc/OT4mXvwQPPteQ=
-k8s.io/kms v0.29.3/go.mod h1:TBGbJKpRUMk59neTMDMddjIDL+D4HuFUbpuiuzmOPg0=
+k8s.io/kms v0.29.4 h1:cFGEoCLwoXk/eqYZppLZxybCdmEWeRKMCbm9f13IdRQ=
+k8s.io/kms v0.29.4/go.mod h1:vWVImKkJd+1BQY4tBwdfSwjQBiLrnbNtHADcDEDQFtk=
k8s.io/kube-aggregator v0.28.1 h1:rvG4llYnQKHjj6YjjoBPEJxfD1uH0DJwkrJTNKGAaCs=
k8s.io/kube-aggregator v0.28.1/go.mod h1:JaLizMe+AECSpO2OmrWVsvnG0V3dX1RpW+Wq/QHbu18=
k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780=
k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA=
-k8s.io/metrics v0.29.3 h1:nN+eavbMQ7Kuif2tIdTr2/F2ec2E/SIAWSruTZ+Ye6U=
-k8s.io/metrics v0.29.3/go.mod h1:kb3tGGC4ZcIDIuvXyUE291RwJ5WmDu0tB4wAVZM6h2I=
-k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI=
-k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+k8s.io/metrics v0.29.4 h1:06sZ63/Kt9HEb5GP/1y6xbHDz6XkxnHpu949UdXfoXQ=
+k8s.io/metrics v0.29.4/go.mod h1:ZN9peB0nLTqPZuwQna8ZUrPFJQ0i8QNH4pqRJopS+9c=
+k8s.io/utils v0.0.0-20240102154912-e7106e64919e h1:eQ/4ljkx21sObifjzXwlPKpdGLrCfRziVtos3ofG/sQ=
+k8s.io/utils v0.0.0-20240102154912-e7106e64919e/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.28.0 h1:TgtAeesdhpm2SGwkQasmbeqDo8th5wOBA5h/AjTKA4I=
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.28.0/go.mod h1:VHVDI/KrK4fjnV61bE2g3sA7tiETLn8sooImelsCx3Y=
sigs.k8s.io/controller-runtime v0.17.3 h1:65QmN7r3FWgTxDMz9fvGnO1kbf2nu+acg9p2R9oYYYk=
diff --git a/pkg/controller/jobs/mpijob/mpijob_controller_test.go b/pkg/controller/jobs/mpijob/mpijob_controller_test.go
index 0de713b22e..71f3e68e46 100644
--- a/pkg/controller/jobs/mpijob/mpijob_controller_test.go
+++ b/pkg/controller/jobs/mpijob/mpijob_controller_test.go
@@ -21,7 +21,6 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
- common "github.com/kubeflow/common/pkg/apis/common/v1"
kubeflow "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -53,7 +52,7 @@ func TestCalcPriorityClassName(t *testing.T) {
PriorityClass: "scheduling-priority",
},
},
- MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
+ MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*kubeflow.ReplicaSpec{
kubeflow.MPIReplicaTypeLauncher: {
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
@@ -79,7 +78,7 @@ func TestCalcPriorityClassName(t *testing.T) {
RunPolicy: kubeflow.RunPolicy{
SchedulingPolicy: &kubeflow.SchedulingPolicy{},
},
- MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
+ MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*kubeflow.ReplicaSpec{
kubeflow.MPIReplicaTypeLauncher: {
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
@@ -95,7 +94,7 @@ func TestCalcPriorityClassName(t *testing.T) {
"specified on launcher takes precedence over worker": {
job: kubeflow.MPIJob{
Spec: kubeflow.MPIJobSpec{
- MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
+ MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*kubeflow.ReplicaSpec{
kubeflow.MPIReplicaTypeLauncher: {
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
@@ -118,7 +117,7 @@ func TestCalcPriorityClassName(t *testing.T) {
"launcher present, but without priority; fallback to worker": {
job: kubeflow.MPIJob{
Spec: kubeflow.MPIJobSpec{
- MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
+ MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*kubeflow.ReplicaSpec{
kubeflow.MPIReplicaTypeLauncher: {
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{},
@@ -139,7 +138,7 @@ func TestCalcPriorityClassName(t *testing.T) {
"specified on worker only": {
job: kubeflow.MPIJob{
Spec: kubeflow.MPIJobSpec{
- MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
+ MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*kubeflow.ReplicaSpec{
kubeflow.MPIReplicaTypeLauncher: {},
kubeflow.MPIReplicaTypeWorker: {
Template: corev1.PodTemplateSpec{
@@ -156,7 +155,7 @@ func TestCalcPriorityClassName(t *testing.T) {
"worker present, but without priority; fallback to empty": {
job: kubeflow.MPIJob{
Spec: kubeflow.MPIJobSpec{
- MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
+ MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*kubeflow.ReplicaSpec{
kubeflow.MPIReplicaTypeLauncher: {},
kubeflow.MPIReplicaTypeWorker: {
Template: corev1.PodTemplateSpec{
diff --git a/pkg/util/testingjobs/mpijob/wrappers_mpijob.go b/pkg/util/testingjobs/mpijob/wrappers_mpijob.go
index 57228bb463..42be69ce49 100644
--- a/pkg/util/testingjobs/mpijob/wrappers_mpijob.go
+++ b/pkg/util/testingjobs/mpijob/wrappers_mpijob.go
@@ -17,7 +17,6 @@ limitations under the License.
package testing
import (
- common "github.com/kubeflow/common/pkg/apis/common/v1"
kubeflow "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
@@ -43,7 +42,7 @@ func MakeMPIJob(name, ns string) *MPIJobWrapper {
RunPolicy: kubeflow.RunPolicy{
Suspend: ptr.To(true),
},
- MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
+ MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*kubeflow.ReplicaSpec{
kubeflow.MPIReplicaTypeLauncher: {
Replicas: ptr.To[int32](1),
Template: corev1.PodTemplateSpec{
From 59a77a8fb3e98b294308e2df13592a61862f8713 Mon Sep 17 00:00:00 2001
From: peng
Date: Fri, 19 Apr 2024 20:49:28 +0800
Subject: [PATCH 08/49] Update kubeflow sample pytorchjob to pytorch 2.x and
cuda 12.x (#1910) (#1992)
Signed-off-by: wangdepeng
---
site/static/examples/jobs/sample-pytorchjob.yaml | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/site/static/examples/jobs/sample-pytorchjob.yaml b/site/static/examples/jobs/sample-pytorchjob.yaml
index 37acc3bc7f..bbb5c9e28c 100644
--- a/site/static/examples/jobs/sample-pytorchjob.yaml
+++ b/site/static/examples/jobs/sample-pytorchjob.yaml
@@ -14,7 +14,9 @@ spec:
spec:
containers:
- name: pytorch
- image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727
+ image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-21320b6
+# If you have gpu, pytorch-mnist-gpu would be helpful. pytorch-mnist-gpu is approximately 22GB
+# image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
imagePullPolicy: Always
command:
- "python3"
@@ -31,7 +33,9 @@ spec:
spec:
containers:
- name: pytorch
- image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727
+ image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-21320b6
+# If you have gpu, pytorch-mnist-gpu would be helpful. pytorch-mnist-gpu is approximately 22GB
+# image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
imagePullPolicy: Always
command:
- "python3"
From 497050a385444bb2f60845675486f8713738ea13 Mon Sep 17 00:00:00 2001
From: Traian Schiau <55734665+trasc@users.noreply.github.com>
Date: Fri, 19 Apr 2024 17:42:29 +0300
Subject: [PATCH 09/49] [test] Scalability runner metrics (#1987)
* Add runner metrics
This reverts commit c64bb7d36acc04487539f6ec28abc5b4e74a0bc6.
* Review Remarks
* Review remarks
* Use different channels for wl and cq events
---
Makefile | 1 +
test/scalability/checker/checker_test.go | 45 ++
test/scalability/default_rangespec.yaml | 8 +
.../runner/controller/controller.go | 11 +-
test/scalability/runner/main.go | 16 +
test/scalability/runner/recorder/recorder.go | 387 +++++++++++++++++-
6 files changed, 451 insertions(+), 17 deletions(-)
diff --git a/Makefile b/Makefile
index 86b6a1db1b..240beee7f8 100644
--- a/Makefile
+++ b/Makefile
@@ -246,6 +246,7 @@ run-scalability: envtest scalability-runner minimalkueue
.PHONY: test-scalability
test-scalability: gotestsum run-scalability
$(GOTESTSUM) --junitfile $(ARTIFACTS)/junit.xml -- $(GO_TEST_FLAGS) ./test/scalability/checker \
+ --summary=$(SCALABILITY_RUN_DIR)/summary.yaml \
--cmdStats=$(SCALABILITY_RUN_DIR)/minimalkueue.stats.yaml \
--range=$(PROJECT_DIR)/test/scalability/default_rangespec.yaml
diff --git a/test/scalability/checker/checker_test.go b/test/scalability/checker/checker_test.go
index 57aa5eff70..09a0d40d91 100644
--- a/test/scalability/checker/checker_test.go
+++ b/test/scalability/checker/checker_test.go
@@ -23,10 +23,12 @@ import (
"sigs.k8s.io/yaml"
+ "sigs.k8s.io/kueue/test/scalability/runner/recorder"
"sigs.k8s.io/kueue/test/scalability/runner/stats"
)
var (
+ summaryFile = flag.String("summary", "", "the runner summary report")
cmdStatsFile = flag.String("cmdStats", "", "command stats yaml file")
rangeFile = flag.String("range", "", "expectations range file")
)
@@ -38,9 +40,23 @@ type RangeSpec struct {
MaxSysMs int64 `json:"maxSysMs"`
Maxrss uint64 `json:"maxrss"`
} `json:"cmd"`
+ ClusterQueueClassesMinUsage map[string]float64 `json:"clusterQueueClassesMinUsage"`
+ WlClassesMaxAvgTimeToAdmissionMs map[string]int64 `json:"wlClassesMaxAvgTimeToAdmissionMs"`
}
func TestScalability(t *testing.T) {
+ summaryBytes, err := os.ReadFile(*summaryFile)
+ if err != nil {
+ t.Fatalf("Unable to read summary: %s", err)
+ }
+
+ summary := recorder.Summary{}
+
+ err = yaml.UnmarshalStrict(summaryBytes, &summary)
+ if err != nil {
+ t.Fatalf("Unable to unmarshal summary: %s", err)
+ }
+
cmdStatsBytes, err := os.ReadFile(*cmdStatsFile)
if err != nil {
t.Fatalf("Unable to read command stats: %s", err)
@@ -77,4 +93,33 @@ func TestScalability(t *testing.T) {
t.Errorf("Maxrss %dKib is greater than maximum expected %dKib", cmdStats.Maxrss, rangeSpec.Cmd.Maxrss)
}
})
+
+ t.Run("ClusterQueueClasses", func(t *testing.T) {
+ for c, cqcSummarry := range summary.ClusterQueueClasses {
+ t.Run(c, func(t *testing.T) {
+ expected, found := rangeSpec.ClusterQueueClassesMinUsage[c]
+ if !found {
+ t.Fatalf("Unexpected class")
+ }
+ actual := float64(cqcSummarry.CPUUsed) * 100 / (float64(cqcSummarry.NominalQuota) * float64(cqcSummarry.LastEventTime.Sub(cqcSummarry.FirstEventTime).Milliseconds()))
+ if actual < expected {
+ t.Errorf("Usage %.2f%% is less then expected %.2f%%", actual, expected)
+ }
+ })
+ }
+ })
+
+ t.Run("WorkloadClasses", func(t *testing.T) {
+ for c, wlcSummary := range summary.WorkloadClasses {
+ t.Run(c, func(t *testing.T) {
+ expected, found := rangeSpec.WlClassesMaxAvgTimeToAdmissionMs[c]
+ if !found {
+ t.Fatalf("Unexpected class")
+ }
+ if wlcSummary.AverageTimeToAdmissionMs > expected {
+ t.Errorf("Average wait for admission %dms is more then expected %dms", wlcSummary.AverageTimeToAdmissionMs, expected)
+ }
+ })
+ }
+ })
}
diff --git a/test/scalability/default_rangespec.yaml b/test/scalability/default_rangespec.yaml
index e57e111793..cd13b6714b 100644
--- a/test/scalability/default_rangespec.yaml
+++ b/test/scalability/default_rangespec.yaml
@@ -5,3 +5,11 @@ cmd:
maxUserMs: 3600_000
maxSysMs: 3600_000
maxrss: 1024_000 #1000MiB
+
+clusterQueueClassesMinUsage:
+ cq: 10 #10%
+
+wlClassesMaxAvgTimeToAdmissionMs:
+ large: 3600_000 #1h
+ medium: 3600_000
+ small: 3600_000
diff --git a/test/scalability/runner/controller/controller.go b/test/scalability/runner/controller/controller.go
index 9c37e121ef..3e6cab6ca2 100644
--- a/test/scalability/runner/controller/controller.go
+++ b/test/scalability/runner/controller/controller.go
@@ -73,7 +73,10 @@ var _ reconcile.Reconciler = (*reconciler)(nil)
var _ predicate.Predicate = (*reconciler)(nil)
func (r *reconciler) Create(ev event.CreateEvent) bool {
- _, isWl := (ev.Object).(*kueue.Workload)
+ wl, isWl := (ev.Object).(*kueue.Workload)
+ if isWl {
+ r.recorder.RecordWorkloadState(wl)
+ }
return !isWl
}
@@ -90,6 +93,8 @@ func (r *reconciler) Update(ev event.UpdateEvent) bool {
admitted := apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadAdmitted)
r.setAdmittedTime(wl.UID, admitted)
+ r.recorder.RecordWorkloadState(wl)
+
return admitted && !apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadFinished)
}
@@ -155,12 +160,12 @@ func (r *reconciler) SetupWithManager(mgr ctrl.Manager) error {
cqHandler := handler.Funcs{
CreateFunc: func(_ context.Context, ev event.CreateEvent, _ workqueue.RateLimitingInterface) {
if cq, isCq := ev.Object.(*kueue.ClusterQueue); isCq {
- r.recorder.RecordCQStatus(cq)
+ r.recorder.RecordCQState(cq)
}
},
UpdateFunc: func(_ context.Context, ev event.UpdateEvent, _ workqueue.RateLimitingInterface) {
if cq, isCq := ev.ObjectNew.(*kueue.ClusterQueue); isCq {
- r.recorder.RecordCQStatus(cq)
+ r.recorder.RecordCQState(cq)
}
},
}
diff --git a/test/scalability/runner/main.go b/test/scalability/runner/main.go
index 2ce9f1d30f..b134845506 100644
--- a/test/scalability/runner/main.go
+++ b/test/scalability/runner/main.go
@@ -196,6 +196,22 @@ func main() {
os.Exit(1)
}
+ err = recorder.WriteSummary(path.Join(*outputDir, "summary.yaml"))
+ if err != nil {
+ log.Error(err, "Writing summary")
+ os.Exit(1)
+ }
+ err = recorder.WriteCQCsv(path.Join(*outputDir, "cqStates.csv"))
+ if err != nil {
+ log.Error(err, "Writing cq csv")
+ os.Exit(1)
+ }
+ err = recorder.WriteWLCsv(path.Join(*outputDir, "wlStates.csv"))
+ if err != nil {
+ log.Error(err, "Writing wl csv")
+ os.Exit(1)
+ }
+
if *minimalKueuePath == "" {
c, err := client.New(cfg, client.Options{Scheme: scheme})
if err != nil {
diff --git a/test/scalability/runner/recorder/recorder.go b/test/scalability/runner/recorder/recorder.go
index 442f9454ca..065addc4ac 100644
--- a/test/scalability/runner/recorder/recorder.go
+++ b/test/scalability/runner/recorder/recorder.go
@@ -18,27 +18,124 @@ package recorder
import (
"context"
+ "encoding/csv"
+ "os"
+ "strconv"
"sync/atomic"
"time"
apimeta "k8s.io/apimachinery/pkg/api/meta"
+ "k8s.io/apimachinery/pkg/types"
+ "sigs.k8s.io/yaml"
kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
+ "sigs.k8s.io/kueue/test/scalability/runner/generator"
)
-type CQStatus struct {
- Name string
+type CQEvent struct {
+ Time time.Time
+ Name string
+ ClassName string
+ Cohort string
+ UID types.UID
+
+ CPUReservation int64
+ CPUUsage int64
+ CPUQuota int64
PendingWorkloads int32
ReservingWorkloads int32
+ AdmittedWorkloads int32
Active bool
}
-type Store map[string]CQStatus
+type CQState struct {
+ FirstEventTime time.Time
+ FirstActiveTime time.Time
+ CPUUsed int64
+ CPUMaxUsage int64
+ LastEvent *CQEvent
+}
+
+var CQStateCsvHeader = []string{
+ "name",
+ "cohort",
+ "class name",
+ "CPU quota (mCPU)",
+ "CPU used (mCPU * ms)",
+ "CPU max usage (mCPU)",
+ "monitor time (ms)",
+}
+
+func (cqs *CQState) CsvRecord() []string {
+ monitoringTimeMs := cqs.LastEvent.Time.Sub(cqs.FirstEventTime).Milliseconds()
+ return []string{
+ cqs.LastEvent.Name,
+ cqs.LastEvent.Cohort,
+ cqs.LastEvent.ClassName,
+ strconv.FormatInt(cqs.LastEvent.CPUQuota, 10),
+ strconv.FormatInt(cqs.CPUUsed, 10),
+ strconv.FormatInt(cqs.CPUMaxUsage, 10),
+ strconv.FormatInt(monitoringTimeMs, 10),
+ }
+}
+
+type CQStore map[string]*CQState
+
+type WLEvent struct {
+ Time time.Time
+ types.NamespacedName
+ UID types.UID
+ ClassName string
+ Admitted bool
+ Evicted bool
+ Finished bool
+}
+
+type WLState struct {
+ Id int
+ types.NamespacedName
+ ClassName string
+ FirstEventTime time.Time
+ TimeToAdmitMs int64
+ TimeToFinishedMs int64
+ EvictionCount int32
+ LastEvent *WLEvent
+}
+
+var WLStateCsvHeader = []string{
+ "id",
+ "class name",
+ "namespace",
+ "name",
+ "ms to admitted",
+ "ms to finish",
+ "num evictions",
+}
+
+func (wls *WLState) CsvRecord() []string {
+ return []string{
+ strconv.Itoa(wls.Id),
+ wls.ClassName,
+ wls.Namespace,
+ wls.Name,
+ strconv.FormatInt(wls.TimeToAdmitMs, 10),
+ strconv.FormatInt(wls.TimeToFinishedMs, 10),
+ strconv.FormatInt(int64(wls.EvictionCount), 10),
+ }
+}
+
+type WLStore map[types.UID]*WLState
+
+type Store struct {
+ CQ CQStore
+ WL WLStore
+}
type Recorder struct {
maxRecording time.Duration
running atomic.Bool
- evChan chan CQStatus
+ cqEvChan chan *CQEvent
+ wlEvChan chan *WLEvent
Store Store
}
@@ -47,17 +144,67 @@ func New(maxRecording time.Duration) *Recorder {
return &Recorder{
maxRecording: maxRecording,
running: atomic.Bool{},
- evChan: make(chan CQStatus, 10),
- Store: map[string]CQStatus{},
+ cqEvChan: make(chan *CQEvent, 10),
+ wlEvChan: make(chan *WLEvent, 10),
+ Store: Store{
+ CQ: make(CQStore),
+ WL: make(WLStore),
+ },
+ }
+}
+
+func (r *Recorder) recordCQEvent(ev *CQEvent) {
+ state, found := r.Store.CQ[ev.Name]
+ if !found {
+ state = &CQState{
+ FirstEventTime: ev.Time,
+ LastEvent: ev,
+ }
+ r.Store.CQ[ev.Name] = state
+ } else {
+ if state.LastEvent.CPUUsage > 0 {
+ state.CPUUsed += state.LastEvent.CPUUsage * ev.Time.Sub(state.LastEvent.Time).Milliseconds()
+ }
+ state.LastEvent = ev
}
+
+ if ev.Active && state.FirstActiveTime.IsZero() {
+ state.FirstActiveTime = ev.Time
+ }
+ state.CPUMaxUsage = max(state.CPUMaxUsage, ev.CPUUsage)
}
-func (r *Recorder) record(ev CQStatus) {
- r.Store[ev.Name] = ev
+func (r *Recorder) recordWLEvent(ev *WLEvent) {
+ state, found := r.Store.WL[ev.UID]
+ if !found {
+ state = &WLState{
+ Id: len(r.Store.WL),
+ NamespacedName: ev.NamespacedName,
+ ClassName: ev.ClassName,
+ FirstEventTime: ev.Time,
+ LastEvent: &WLEvent{},
+ }
+ r.Store.WL[ev.UID] = state
+ }
+
+ if ev.Admitted && !state.LastEvent.Admitted {
+ state.TimeToAdmitMs = ev.Time.Sub(state.FirstEventTime).Milliseconds()
+ }
+
+ if ev.Evicted && !state.LastEvent.Evicted {
+ state.EvictionCount++
+ }
+
+ if ev.Finished && !state.LastEvent.Finished {
+ state.TimeToFinishedMs = ev.Time.Sub(state.FirstEventTime).Milliseconds()
+ }
+
+ state.LastEvent = ev
}
func (r *Recorder) expectMoreEvents() bool {
- for _, s := range r.Store {
+ for _, cqStatus := range r.Store.CQ {
+ s := cqStatus.LastEvent
if (s.PendingWorkloads > 0 || s.ReservingWorkloads > 0) && s.Active {
return true
}
@@ -65,6 +212,176 @@ func (r *Recorder) expectMoreEvents() bool {
return false
}
+type CQGroupSummary struct {
+ CPUUsed int64 `json:"cpuUsed"`
+ CPUAverageUsage int64 `json:"cpuAverageUsage"`
+ NominalQuota int64 `json:"nominalQuota"`
+ FirstEventTime time.Time `json:"firstEventTime"`
+ LastEventTime time.Time `json:"lastEventTime"`
+}
+
+func (qgs *CQGroupSummary) AddQueueSummary(qs *CQState) {
+ qgs.CPUUsed += qs.CPUUsed
+ qgs.NominalQuota += qs.LastEvent.CPUQuota
+ if qs.FirstEventTime.Before(qgs.FirstEventTime) {
+ qgs.FirstEventTime = qs.FirstEventTime
+ }
+ if qs.LastEvent.Time.After(qgs.LastEventTime) {
+ qgs.LastEventTime = qs.LastEvent.Time
+ }
+}
+
+func (qgs *CQGroupSummary) refreshAverage() {
+ monitoringTime := qgs.LastEventTime.Sub(qgs.FirstEventTime).Milliseconds()
+ if monitoringTime > 0 {
+ qgs.CPUAverageUsage = qgs.CPUUsed / monitoringTime
+ }
+}
+
+func newCQGroupSummary(qs *CQState) *CQGroupSummary {
+ ret := &CQGroupSummary{
+ CPUUsed: qs.CPUUsed,
+ CPUAverageUsage: 0,
+ NominalQuota: qs.LastEvent.CPUQuota,
+ FirstEventTime: qs.FirstEventTime,
+ LastEventTime: qs.LastEvent.Time,
+ }
+ return ret
+}
+
+type WorkloadsClassSummary struct {
+ Count int32 `json:"count"`
+ totalTimeToAdmissionMs int64 `json:"-"`
+ totalTimeToFinishMs int64 `json:"-"`
+ TotalEvictions int32 `json:"totalEvictions"`
+ AverageTimeToAdmissionMs int64 `json:"averageTimeToAdmissionMs"`
+ AverageTimeToFinishMs int64 `json:"averageTimeToFinishMs"`
+}
+
+func (wcs *WorkloadsClassSummary) refreshAverage() {
+ if wcs == nil || wcs.Count == 0 {
+ return
+ }
+ wcs.AverageTimeToAdmissionMs = wcs.totalTimeToAdmissionMs / int64(wcs.Count)
+ wcs.AverageTimeToFinishMs = wcs.totalTimeToFinishMs / int64(wcs.Count)
+}
+
+type Summary struct {
+ ClusterQueueClasses map[string]*CQGroupSummary `json:"clusterQueueClasses"`
+ WorkloadClasses map[string]*WorkloadsClassSummary `json:"workloadClasses"`
+}
+
+func (r *Recorder) WriteSummary(path string) error {
+ summary := Summary{
+ ClusterQueueClasses: map[string]*CQGroupSummary{},
+ WorkloadClasses: map[string]*WorkloadsClassSummary{},
+ }
+
+ for _, cqState := range r.Store.CQ {
+ if cqState.LastEvent == nil {
+ continue
+ }
+ if groupSummary, found := summary.ClusterQueueClasses[cqState.LastEvent.ClassName]; found {
+ groupSummary.AddQueueSummary(cqState)
+ } else {
+ summary.ClusterQueueClasses[cqState.LastEvent.ClassName] = newCQGroupSummary(cqState)
+ }
+ }
+
+ for _, group := range summary.ClusterQueueClasses {
+ group.refreshAverage()
+ }
+
+ for _, wlState := range r.Store.WL {
+ if class, found := summary.WorkloadClasses[wlState.ClassName]; !found {
+ summary.WorkloadClasses[wlState.ClassName] = &WorkloadsClassSummary{
+ Count: 1,
+ totalTimeToAdmissionMs: wlState.TimeToAdmitMs,
+ totalTimeToFinishMs: wlState.TimeToFinishedMs,
+ TotalEvictions: wlState.EvictionCount,
+ }
+ } else {
+ class.Count++
+ class.totalTimeToAdmissionMs += wlState.TimeToAdmitMs
+ class.totalTimeToFinishMs += wlState.TimeToFinishedMs
+ class.TotalEvictions += wlState.EvictionCount
+ }
+ }
+
+ for _, class := range summary.WorkloadClasses {
+ class.refreshAverage()
+ }
+
+ bytes, err := yaml.Marshal(summary)
+ if err != nil {
+ return err
+ }
+
+ return os.WriteFile(path, bytes, 0666)
+}
+
+func (r *Recorder) WriteCQCsv(path string) (err error) {
+ var f *os.File
+ f, err = os.Create(path)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ cWriter := csv.NewWriter(f)
+
+ defer func() {
+ cWriter.Flush()
+ if err == nil {
+ err = cWriter.Error()
+ }
+ }()
+
+ err = cWriter.Write(CQStateCsvHeader)
+ if err != nil {
+ return err
+ }
+
+ for _, cqs := range r.Store.CQ {
+ err = cWriter.Write(cqs.CsvRecord())
+ if err != nil {
+ return err
+ }
+ }
+
+ return err
+}
+
+func (r *Recorder) WriteWLCsv(path string) (err error) {
+ var f *os.File
+ f, err = os.Create(path)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ cWriter := csv.NewWriter(f)
+
+ defer func() {
+ cWriter.Flush()
+ if err == nil {
+ err = cWriter.Error()
+ }
+ }()
+
+ err = cWriter.Write(WLStateCsvHeader)
+ if err != nil {
+ return err
+ }
+
+ for _, ev := range r.Store.WL {
+ err = cWriter.Write(ev.CsvRecord())
+ if err != nil {
+ return err
+ }
+ }
+
+ return err
+}
+
func (r *Recorder) Run(ctx context.Context, genDone <-chan struct{}) error {
r.running.Store(true)
defer r.running.Store(false)
@@ -83,24 +400,66 @@ func (r *Recorder) Run(ctx context.Context, genDone <-chan struct{}) error {
select {
case <-ctx.Done():
return ctx.Err()
- case ev := <-r.evChan:
- r.record(ev)
+ case ev := <-r.cqEvChan:
+ r.recordCQEvent(ev)
if generateDone.Load() && !r.expectMoreEvents() {
return nil
}
+ case ev := <-r.wlEvChan:
+ r.recordWLEvent(ev)
}
}
}
-func (r *Recorder) RecordCQStatus(cq *kueue.ClusterQueue) {
+func (r *Recorder) RecordWorkloadState(wl *kueue.Workload) {
if !r.running.Load() {
return
}
+ r.wlEvChan <- &WLEvent{
+ Time: time.Now(),
+ NamespacedName: types.NamespacedName{
+ Namespace: wl.Namespace,
+ Name: wl.Name,
+ },
+ UID: wl.UID,
+ ClassName: wl.Labels[generator.ClassLabel],
+ Admitted: apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadAdmitted),
+ Evicted: apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadEvicted),
+ Finished: apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadFinished),
+ }
+}
+
+func (r *Recorder) RecordCQState(cq *kueue.ClusterQueue) {
+ if !r.running.Load() {
+ return
+ }
+
+ var cpuReserved, cpuUsed, cpuQuota int64
+ if len(cq.Status.FlavorsReservation) > 0 && len(cq.Status.FlavorsReservation[0].Resources) > 0 {
+ cpuReserved = cq.Status.FlavorsReservation[0].Resources[0].Total.MilliValue()
+ }
+
+ if len(cq.Status.FlavorsUsage) > 0 && len(cq.Status.FlavorsUsage[0].Resources) > 0 {
+ cpuUsed = cq.Status.FlavorsUsage[0].Resources[0].Total.MilliValue()
+ }
+
+ if len(cq.Spec.ResourceGroups) > 0 && len(cq.Spec.ResourceGroups[0].Flavors) > 0 && len(cq.Spec.ResourceGroups[0].Flavors[0].Resources) > 0 {
+ cpuQuota = cq.Spec.ResourceGroups[0].Flavors[0].Resources[0].NominalQuota.MilliValue()
+ }
+
+ r.cqEvChan <- &CQEvent{
+ Time: time.Now(),
+ Name: cq.Name,
+ ClassName: cq.Labels[generator.ClassLabel],
+ Cohort: cq.Spec.Cohort,
+ UID: cq.UID,
- r.evChan <- CQStatus{
- Name: cq.Name,
+ CPUReservation: cpuReserved,
+ CPUUsage: cpuUsed,
+ CPUQuota: cpuQuota,
PendingWorkloads: cq.Status.PendingWorkloads,
ReservingWorkloads: cq.Status.ReservingWorkloads,
+ AdmittedWorkloads: cq.Status.AdmittedWorkloads,
Active: apimeta.IsStatusConditionTrue(cq.Status.Conditions, kueue.AdmissionCheckActive),
}
}
From 34dc915d93ca1b206672dfe96943765a1ac15067 Mon Sep 17 00:00:00 2001
From: Dennis Zhou
Date: Mon, 22 Apr 2024 16:00:28 +0800
Subject: [PATCH 10/49] Clickable headers in documentation (#2006)
---
site/layouts/_default/_markup/render-heading.html | 6 ++++++
site/layouts/partials/anchor.html | 4 ++++
2 files changed, 10 insertions(+)
create mode 100644 site/layouts/_default/_markup/render-heading.html
create mode 100644 site/layouts/partials/anchor.html
diff --git a/site/layouts/_default/_markup/render-heading.html b/site/layouts/_default/_markup/render-heading.html
new file mode 100644
index 0000000000..e322af4159
--- /dev/null
+++ b/site/layouts/_default/_markup/render-heading.html
@@ -0,0 +1,6 @@
+
+{{ .Text | safeHTML }}
+
+ {{ partial "anchor.html" . }}
+
+
diff --git a/site/layouts/partials/anchor.html b/site/layouts/partials/anchor.html
new file mode 100644
index 0000000000..83e19c7441
--- /dev/null
+++ b/site/layouts/partials/anchor.html
@@ -0,0 +1,4 @@
+
\ No newline at end of file
From 5762eee822b610d518b539423859f6fcff436476 Mon Sep 17 00:00:00 2001
From: Oleksandr Redko
Date: Mon, 22 Apr 2024 11:34:36 +0300
Subject: [PATCH 11/49] [jobframework] Fix logging of error message (#1944)
---
.golangci.yaml | 1 +
pkg/controller/jobframework/reconciler.go | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/.golangci.yaml b/.golangci.yaml
index b4ba30a602..6c0fdd3289 100644
--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -29,6 +29,7 @@ linters:
- gocritic
- goimports
- govet
+ - loggercheck
- misspell
- unconvert
diff --git a/pkg/controller/jobframework/reconciler.go b/pkg/controller/jobframework/reconciler.go
index 26b67f5e88..e61257e4ac 100644
--- a/pkg/controller/jobframework/reconciler.go
+++ b/pkg/controller/jobframework/reconciler.go
@@ -402,7 +402,7 @@ func (r *JobReconciler) ReconcileGenericJob(ctx context.Context, req ctrl.Reques
// Mark the workload as finished with failure since the is no point to retry.
errUpdateStatus := workload.UpdateStatus(ctx, r.client, wl, kueue.WorkloadFinished, metav1.ConditionTrue, FailedToStartFinishedReason, err.Error(), constants.JobControllerName)
if errUpdateStatus != nil {
- log.Error(errUpdateStatus, "Updating workload status, on start failure %s", err.Error())
+ log.Error(errUpdateStatus, "Updating workload status, on start failure", "err", err)
}
return ctrl.Result{}, errUpdateStatus
}
From 9723760039ed23338d4115b0843e0fc8b892426b Mon Sep 17 00:00:00 2001
From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com>
Date: Mon, 22 Apr 2024 05:05:52 -0400
Subject: [PATCH 12/49] Rule s2-b for flat fair preemption (#2002)
* Rule s2-b for flat fair preemption
Change-Id: Iac87a154d8fe2e65b7e6a2097037f509a7d46b44
* Simplified algorithm and extra test cases
Change-Id: I84df52813e3f2990f4a1699ee66f7ac35d7caefc
* review
Change-Id: Ifd8d09dd2768d68c67e20ce2f3280128d6f63057
---
pkg/queue/cluster_queue.go | 2 +-
pkg/scheduler/preemption/preemption.go | 80 ++++++++++++++-------
pkg/scheduler/preemption/preemption_test.go | 77 ++++++++++++++++++++
pkg/util/heap/heap.go | 4 +-
4 files changed, 134 insertions(+), 29 deletions(-)
diff --git a/pkg/queue/cluster_queue.go b/pkg/queue/cluster_queue.go
index c25018f156..d252e07591 100644
--- a/pkg/queue/cluster_queue.go
+++ b/pkg/queue/cluster_queue.go
@@ -95,7 +95,7 @@ func newClusterQueue(cq *kueue.ClusterQueue, wo workload.Ordering) (*ClusterQueu
func newClusterQueueImpl(wo workload.Ordering, clock clock.Clock) *ClusterQueue {
lessFunc := queueOrderingFunc(wo)
return &ClusterQueue{
- heap: heap.New(workloadKey, lessFunc),
+ heap: *heap.New(workloadKey, lessFunc),
inadmissibleWorkloads: make(map[string]*workload.Info),
queueInadmissibleCycle: -1,
lessFunc: lessFunc,
diff --git a/pkg/scheduler/preemption/preemption.go b/pkg/scheduler/preemption/preemption.go
index 5605859dc2..52681a4fad 100644
--- a/pkg/scheduler/preemption/preemption.go
+++ b/pkg/scheduler/preemption/preemption.go
@@ -256,35 +256,13 @@ func restoreSnapshot(snapshot *cache.Snapshot, targets []*workload.Info) {
}
func fairPreemptions(wl *workload.Info, assignment flavorassigner.Assignment, snapshot *cache.Snapshot, resPerFlv resourcesPerFlavor, candidates []*workload.Info, allowBorrowingBelowPriority *int32) []*workload.Info {
- cqHeap := heap.New(
- func(c *candidateCQ) string {
- return c.cq.Name
- },
- func(c1, c2 *candidateCQ) bool {
- return c1.share > c2.share
- },
- )
- for _, cand := range candidates {
- candCQ := cqHeap.GetByKey(cand.ClusterQueue)
- if candCQ == nil {
- cq := snapshot.ClusterQueues[cand.ClusterQueue]
- share, _ := cq.DominantResourceShare()
- candCQ = &candidateCQ{
- cq: cq,
- share: share,
- workloads: []*workload.Info{cand},
- }
- _ = cqHeap.PushIfNotPresent(candCQ)
- } else {
- candCQ.workloads = append(candCQ.workloads, cand)
- }
- }
-
+ cqHeap := cqHeapFromCandidates(candidates, false, snapshot)
nominatedCQ := snapshot.ClusterQueues[wl.ClusterQueue]
newNominatedShareValue, _ := nominatedCQ.DominantResourceShareWith(wl)
wlReq := totalRequestsForAssignment(wl, assignment)
var targets []*workload.Info
fits := false
+ var retryCandidates []*workload.Info
for cqHeap.Len() > 0 && !fits {
candCQ := cqHeap.Pop()
@@ -323,12 +301,35 @@ func fairPreemptions(wl *workload.Info, assignment flavorassigner.Assignment, sn
}
// Might need to pick a different CQ due to changing values.
break
+ } else {
+ retryCandidates = append(retryCandidates, candCQ.workloads[i])
}
}
}
if !fits {
- restoreSnapshot(snapshot, targets)
- return nil
+ // Try rule S2-b in https://sigs.k8s.io/kueue/keps/1714-fair-sharing#choosing-workloads-from-clusterqueues-for-preemption
+ // if rule S2-a was not enough.
+ cqHeap = cqHeapFromCandidates(retryCandidates, true, snapshot)
+
+ for cqHeap.Len() > 0 && !fits {
+ candCQ := cqHeap.Pop()
+ if newNominatedShareValue < candCQ.share {
+ // The criteria doesn't depend on the preempted workload, so just preempt the first candidate.
+ candWl := candCQ.workloads[0]
+ snapshot.RemoveWorkload(candWl)
+ targets = append(targets, candWl)
+ if workloadFits(wlReq, nominatedCQ, true) {
+ fits = true
+ }
+ // No requeueing because there doesn't seem to be an scenario where
+ // it's possible to apply rule S2-b more than once in a CQ.
+ }
+ }
+
+ if !fits {
+ restoreSnapshot(snapshot, targets)
+ return nil
+ }
}
targets = fillBackWorkloads(targets, wlReq, nominatedCQ, snapshot, true)
restoreSnapshot(snapshot, targets)
@@ -341,6 +342,33 @@ type candidateCQ struct {
share int
}
+func cqHeapFromCandidates(candidates []*workload.Info, firstOnly bool, snapshot *cache.Snapshot) *heap.Heap[candidateCQ] {
+ cqHeap := heap.New(
+ func(c *candidateCQ) string {
+ return c.cq.Name
+ },
+ func(c1, c2 *candidateCQ) bool {
+ return c1.share > c2.share
+ },
+ )
+ for _, cand := range candidates {
+ candCQ := cqHeap.GetByKey(cand.ClusterQueue)
+ if candCQ == nil {
+ cq := snapshot.ClusterQueues[cand.ClusterQueue]
+ share, _ := cq.DominantResourceShare()
+ candCQ = &candidateCQ{
+ cq: cq,
+ share: share,
+ workloads: []*workload.Info{cand},
+ }
+ cqHeap.PushOrUpdate(candCQ)
+ } else if !firstOnly {
+ candCQ.workloads = append(candCQ.workloads, cand)
+ }
+ }
+ return cqHeap
+}
+
type resourcesPerFlavor map[kueue.ResourceFlavorReference]sets.Set[corev1.ResourceName]
func resourcesRequiringPreemption(assignment flavorassigner.Assignment) resourcesPerFlavor {
diff --git a/pkg/scheduler/preemption/preemption_test.go b/pkg/scheduler/preemption/preemption_test.go
index 9351d2b4c7..421b2c1461 100644
--- a/pkg/scheduler/preemption/preemption_test.go
+++ b/pkg/scheduler/preemption/preemption_test.go
@@ -1522,6 +1522,83 @@ func TestFairPreemptions(t *testing.T) {
targetCQ: "a",
wantPreempted: sets.New("/a_low", "/b1"),
},
+ "preempt huge workload if there is no other option, as long as the target CQ gets a lower share": {
+ admitted: []kueue.Workload{
+ *utiltesting.MakeWorkload("b1", "").Request(corev1.ResourceCPU, "9").SimpleReserveQuota("b", "default", now).Obj(),
+ },
+ incoming: utiltesting.MakeWorkload("a_incoming", "").Request(corev1.ResourceCPU, "2").Obj(),
+ targetCQ: "a",
+ wantPreempted: sets.New("/b1"),
+ },
+ "can't preempt huge workload if the incoming is also huge": {
+ admitted: []kueue.Workload{
+ *utiltesting.MakeWorkload("a1", "").Request(corev1.ResourceCPU, "2").SimpleReserveQuota("a", "default", now).Obj(),
+ *utiltesting.MakeWorkload("b1", "").Request(corev1.ResourceCPU, "7").SimpleReserveQuota("b", "default", now).Obj(),
+ },
+ incoming: utiltesting.MakeWorkload("a_incoming", "").Request(corev1.ResourceCPU, "5").Obj(),
+ targetCQ: "a",
+ },
+ "can't preempt 2 smaller workloads if the incoming is huge": {
+ admitted: []kueue.Workload{
+ *utiltesting.MakeWorkload("b1", "").Request(corev1.ResourceCPU, "2").SimpleReserveQuota("b", "default", now).Obj(),
+ *utiltesting.MakeWorkload("b2", "").Request(corev1.ResourceCPU, "2").SimpleReserveQuota("b", "default", now).Obj(),
+ *utiltesting.MakeWorkload("b3", "").Request(corev1.ResourceCPU, "3").SimpleReserveQuota("b", "default", now).Obj(),
+ },
+ incoming: utiltesting.MakeWorkload("a_incoming", "").Request(corev1.ResourceCPU, "6").Obj(),
+ targetCQ: "a",
+ },
+ "preempt from target and others even if over nominal": {
+ admitted: []kueue.Workload{
+ *utiltesting.MakeWorkload("a1_low", "").Priority(-1).Request(corev1.ResourceCPU, "2").SimpleReserveQuota("b", "default", now).Obj(),
+ *utiltesting.MakeWorkload("a2_low", "").Priority(-1).Request(corev1.ResourceCPU, "1").SimpleReserveQuota("b", "default", now).Obj(),
+ *utiltesting.MakeWorkload("b1", "").Request(corev1.ResourceCPU, "3").SimpleReserveQuota("b", "default", now).Obj(),
+ *utiltesting.MakeWorkload("b2", "").Request(corev1.ResourceCPU, "3").SimpleReserveQuota("b", "default", now).Obj(),
+ },
+ incoming: utiltesting.MakeWorkload("a_incoming", "").Request(corev1.ResourceCPU, "4").Obj(),
+ targetCQ: "a",
+ wantPreempted: sets.New("/a1_low", "/b1"),
+ },
+ "prefer to preempt workloads that don't make the target CQ have the biggest share": {
+ admitted: []kueue.Workload{
+ *utiltesting.MakeWorkload("b1", "").Request(corev1.ResourceCPU, "2").SimpleReserveQuota("b", "default", now).Obj(),
+ *utiltesting.MakeWorkload("b2", "").Request(corev1.ResourceCPU, "1").SimpleReserveQuota("b", "default", now).Obj(),
+ *utiltesting.MakeWorkload("b3", "").Request(corev1.ResourceCPU, "2").SimpleReserveQuota("b", "default", now).Obj(),
+ *utiltesting.MakeWorkload("c1", "").Request(corev1.ResourceCPU, "1").SimpleReserveQuota("c", "default", now).Obj(),
+ },
+ incoming: utiltesting.MakeWorkload("a_incoming", "").Request(corev1.ResourceCPU, "3.5").Obj(),
+ targetCQ: "a",
+ // It would have been possible to preempt "/b1" under rule S2-b, but S2-a was possible first.
+ wantPreempted: sets.New("/b2"),
+ },
+ "preempt from different cluster queues if the end result has a smaller max share": {
+ admitted: []kueue.Workload{
+ *utiltesting.MakeWorkload("b1", "").Request(corev1.ResourceCPU, "2").SimpleReserveQuota("b", "default", now).Obj(),
+ *utiltesting.MakeWorkload("b2", "").Request(corev1.ResourceCPU, "2.5").SimpleReserveQuota("b", "default", now).Obj(),
+ *utiltesting.MakeWorkload("c1", "").Request(corev1.ResourceCPU, "2").SimpleReserveQuota("c", "default", now).Obj(),
+ *utiltesting.MakeWorkload("c2", "").Request(corev1.ResourceCPU, "2.5").SimpleReserveQuota("c", "default", now).Obj(),
+ },
+ incoming: utiltesting.MakeWorkload("a_incoming", "").Request(corev1.ResourceCPU, "3.5").Obj(),
+ targetCQ: "a",
+ wantPreempted: sets.New("/b1", "/c1"),
+ },
+ "scenario above does not flap": {
+ admitted: []kueue.Workload{
+ *utiltesting.MakeWorkload("a1", "").Request(corev1.ResourceCPU, "3.5").SimpleReserveQuota("a", "default", now).Obj(),
+ *utiltesting.MakeWorkload("b2", "").Request(corev1.ResourceCPU, "2.5").SimpleReserveQuota("b", "default", now).Obj(),
+ *utiltesting.MakeWorkload("c2", "").Request(corev1.ResourceCPU, "2.5").SimpleReserveQuota("c", "default", now).Obj(),
+ },
+ incoming: utiltesting.MakeWorkload("b_incoming", "").Request(corev1.ResourceCPU, "2").Obj(),
+ targetCQ: "b",
+ },
+ "cannot preempt if it would make the candidate CQ go under nominal after preempting one element": {
+ admitted: []kueue.Workload{
+ *utiltesting.MakeWorkload("b1", "").Request(corev1.ResourceCPU, "3").SimpleReserveQuota("b", "default", now).Obj(),
+ *utiltesting.MakeWorkload("b2", "").Request(corev1.ResourceCPU, "3").SimpleReserveQuota("b", "default", now).Obj(),
+ *utiltesting.MakeWorkload("c1", "").Request(corev1.ResourceCPU, "3").SimpleReserveQuota("c", "default", now).Obj(),
+ },
+ incoming: utiltesting.MakeWorkload("a_incoming", "").Request(corev1.ResourceCPU, "4").Obj(),
+ targetCQ: "a",
+ },
"workloads under priority threshold can always be preempted": {
admitted: []kueue.Workload{
*unitWl.Clone().Name("a1").SimpleReserveQuota("a", "default", now).Obj(),
diff --git a/pkg/util/heap/heap.go b/pkg/util/heap/heap.go
index ebad36dc6b..d993434325 100644
--- a/pkg/util/heap/heap.go
+++ b/pkg/util/heap/heap.go
@@ -171,8 +171,8 @@ func (h *Heap[T]) List() []*T {
}
// New returns a Heap which can be used to queue up items to process.
-func New[T any](keyFn keyFunc[T], lessFn lessFunc[T]) Heap[T] {
- return Heap[T]{
+func New[T any](keyFn keyFunc[T], lessFn lessFunc[T]) *Heap[T] {
+ return &Heap[T]{
data: data[T]{
items: make(map[string]*heapItem[T]),
keyFunc: keyFn,
From 7880819ae316be7bd9d906c75b7eeb15cd26f133 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 22 Apr 2024 03:28:45 -0700
Subject: [PATCH 13/49] Bump github.com/onsi/gomega from 1.32.0 to 1.33.0
(#2028)
Bumps [github.com/onsi/gomega](https://github.com/onsi/gomega) from 1.32.0 to 1.33.0.
- [Release notes](https://github.com/onsi/gomega/releases)
- [Changelog](https://github.com/onsi/gomega/blob/master/CHANGELOG.md)
- [Commits](https://github.com/onsi/gomega/compare/v1.32.0...v1.33.0)
---
updated-dependencies:
- dependency-name: github.com/onsi/gomega
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
go.mod | 2 +-
go.sum | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/go.mod b/go.mod
index ee0d082ba9..c5622297bb 100644
--- a/go.mod
+++ b/go.mod
@@ -8,7 +8,7 @@ require (
github.com/kubeflow/mpi-operator v0.5.0
github.com/kubeflow/training-operator v1.7.0
github.com/onsi/ginkgo/v2 v2.17.1
- github.com/onsi/gomega v1.32.0
+ github.com/onsi/gomega v1.33.0
github.com/open-policy-agent/cert-controller v0.10.1
github.com/prometheus/client_golang v1.18.0
github.com/prometheus/client_model v0.6.1
diff --git a/go.sum b/go.sum
index 8dd1eabf2a..954c1391b5 100644
--- a/go.sum
+++ b/go.sum
@@ -153,8 +153,8 @@ github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
github.com/onsi/ginkgo/v2 v2.17.1 h1:V++EzdbhI4ZV4ev0UTIj0PzhzOcReJFyJaLjtSF55M8=
github.com/onsi/ginkgo/v2 v2.17.1/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs=
-github.com/onsi/gomega v1.32.0 h1:JRYU78fJ1LPxlckP6Txi/EYqJvjtMrDC04/MM5XRHPk=
-github.com/onsi/gomega v1.32.0/go.mod h1:a4x4gW6Pz2yK1MAmvluYme5lvYTn61afQ2ETw/8n4Lg=
+github.com/onsi/gomega v1.33.0 h1:snPCflnZrpMsy94p4lXVEkHo12lmPnc3vY5XBbreexE=
+github.com/onsi/gomega v1.33.0/go.mod h1:+925n5YtiFsLzzafLUHzVMBpvvRAzrydIBiSIxjX3wY=
github.com/open-policy-agent/cert-controller v0.10.1 h1:RXSYoyn8FdCenWecRP//UV5nbVfmstNpj4kHQFkvPK4=
github.com/open-policy-agent/cert-controller v0.10.1/go.mod h1:4uRbBLY5DsPOog+a9pqk3JLxuuhrWsbUedQW65HcLTI=
github.com/open-policy-agent/frameworks/constraint v0.0.0-20230822235116-f0b62fe1e4c4 h1:5dum5SLEz+95JDLkMls7Z7IDPjvSq3UhJSFe4f5einQ=
From 63f46ac9e56a9a988c978945629b135dcdfdc8e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?=
Date: Mon, 22 Apr 2024 18:52:31 +0200
Subject: [PATCH 14/49] Make the defaults for PodsReady backoff more practical
(#2025)
---
apis/config/v1beta1/configuration_types.go | 10 +++----
.../README.md | 26 ++++++++--------
pkg/controller/core/core.go | 30 +++++++++++++++++--
pkg/controller/core/workload_controller.go | 28 ++++++++++-------
.../core/workload_controller_test.go | 7 +++--
.../scheduler/podsready/suite_test.go | 3 +-
6 files changed, 69 insertions(+), 35 deletions(-)
diff --git a/apis/config/v1beta1/configuration_types.go b/apis/config/v1beta1/configuration_types.go
index f21f2dd79d..88f36ddaf5 100644
--- a/apis/config/v1beta1/configuration_types.go
+++ b/apis/config/v1beta1/configuration_types.go
@@ -238,12 +238,12 @@ type RequeuingStrategy struct {
// Once the number is reached, the workload is deactivated (`.spec.activate`=`false`).
// When it is null, the workloads will repeatedly and endless re-queueing.
//
- // Every backoff duration is about "1.41284738^(n-1)+Rand" where the "n" represents the "workloadStatus.requeueState.count",
- // and the "Rand" represents the random jitter. During this time, the workload is taken as an inadmissible and
+ // Every backoff duration is about "10s*2^(n-1)+Rand" where:
+ // - "n" represents the "workloadStatus.requeueState.count",
+ // - "Rand" represents the random jitter.
+ // During this time, the workload is taken as an inadmissible and
// other workloads will have a chance to be admitted.
- // For example, when the "waitForPodsReady.timeout" is the default, the workload deactivation time is as follows:
- // {backoffLimitCount, workloadDeactivationSeconds}
- // ~= {1, 601}, {2, 902}, ...,{5, 1811}, ...,{10, 3374}, ...,{20, 8730}, ...,{30, 86400(=24 hours)}, ...
+ // By default, the consecutive requeue delays are around: (10s, 20s, 40s, ...).
//
// Defaults to null.
// +optional
diff --git a/keps/1282-pods-ready-requeue-strategy/README.md b/keps/1282-pods-ready-requeue-strategy/README.md
index df4af8a1b2..b025b3bb9f 100644
--- a/keps/1282-pods-ready-requeue-strategy/README.md
+++ b/keps/1282-pods-ready-requeue-strategy/README.md
@@ -143,12 +143,12 @@ type RequeuingStrategy struct {
// Once the number is reached, the workload is deactivated (`.spec.activate`=`false`).
// When it is null, the workloads will repeatedly and endless re-queueing.
//
- // Every backoff duration is about "1.41284738^(n-1)+Rand" where the "n" represents the "workloadStatus.requeueState.count",
- // and the "Rand" represents the random jitter. During this time, the workload is taken as an inadmissible and
+ // Every backoff duration is about "10s*2^(n-1)+Rand" where:
+ // - "n" represents the "workloadStatus.requeueState.count",
+ // - "Rand" represents the random jitter.
+ // During this time, the workload is taken as an inadmissible and
// other workloads will have a chance to be admitted.
- // For example, when the "waitForPodsReady.timeout" is the default, the workload deactivation time is as follows:
- // {backoffLimitCount, workloadDeactivationSeconds}
- // ~= {1, 601}, {2, 902}, ...,{5, 1811}, ...,{10, 3374}, ...,{20, 8730}, ...,{30, 86400(=24 hours)}, ...
+ // By default, the consecutive requeue delays are around: (10s, 20s, 40s, ...).
//
// Defaults to null.
// +optional
@@ -222,16 +222,16 @@ the queueManager holds the evicted workloads as inadmissible workloads while exp
Duration this time, other workloads will have a chance to be admitted.
The queueManager calculates an exponential backoff duration by [the Step function](https://pkg.go.dev/k8s.io/apimachinery/pkg/util/wait@v0.29.1#Backoff.Step)
-according to the $1.41284738^{(n-1)}+Rand$ where the $n$ represents the `workloadStatus.requeueState.count`, and the $Rand$ represents the random jitter.
+according to the $10s*2^{(n-1)}+Rand$ where the $n$ represents the `workloadStatus.requeueState.count`, and the $Rand$ represents the random jitter.
-Considering the `.waitForPodsReady.timeout` (default: 300 seconds),
-this duration indicates that an evicted workload with `PodsReadyTimeout` reason is continued re-queuing
-for the following period where the $t$ represents `.waitForPodsReady.timeout`:
+It will spend awaiting to be requeued after eviction:
+$$\sum_{k=1}^{n}(10s*2^{(k-1)} + Rand)$$
-$$t(n+1) + \sum_{k=1}^{n}(1.41284738^{(k-1)} + Rand)$$
-
-Given that the `backoffLimitCount` equals `30` and the `waitForPodsReady.timeout` equals `300` (default),
-the result equals 24 hours (+ $Rand$ seconds).
+Assuming `backoffLimitCount` equals 10, and the workload is requeued 10 times
+after failing to have all pods ready, then the total time awaiting for requeue
+will take (neglecting the jitter): `10s+20s+40s +...+7680s=2h 8min`.
+Also, considering `.waitForPodsReady.timeout=300s` (default),
+the workload will spend `50min` total waiting for pods ready.
#### Evaluation
diff --git a/pkg/controller/core/core.go b/pkg/controller/core/core.go
index b29eb39aa7..94beae3232 100644
--- a/pkg/controller/core/core.go
+++ b/pkg/controller/core/core.go
@@ -28,11 +28,26 @@ import (
"sigs.k8s.io/kueue/pkg/queue"
)
-const updateChBuffer = 10
+const (
+ updateChBuffer = 10
+ defaultRequeuingBaseDelaySeconds = 10
+)
+
+type ControllerOptions struct {
+ requeuingBaseDelaySeconds int32
+}
+
+type ControllerOption func(*ControllerOptions)
+
+func WithControllerRequeuingBaseDelaySeconds(value int32) ControllerOption {
+ return func(o *ControllerOptions) {
+ o.requeuingBaseDelaySeconds = value
+ }
+}
// SetupControllers sets up the core controllers. It returns the name of the
// controller that failed to create and an error, if any.
-func SetupControllers(mgr ctrl.Manager, qManager *queue.Manager, cc *cache.Cache, cfg *configapi.Configuration) (string, error) {
+func SetupControllers(mgr ctrl.Manager, qManager *queue.Manager, cc *cache.Cache, cfg *configapi.Configuration, controllerOpts ...ControllerOption) (string, error) {
rfRec := NewResourceFlavorReconciler(mgr.GetClient(), qManager, cc)
if err := rfRec.SetupWithManager(mgr, cfg); err != nil {
return "ResourceFlavor", err
@@ -63,11 +78,20 @@ func SetupControllers(mgr ctrl.Manager, qManager *queue.Manager, cc *cache.Cache
if err := cqRec.SetupWithManager(mgr, cfg); err != nil {
return "ClusterQueue", err
}
+ ctrlOpts := ControllerOptions{
+ requeuingBaseDelaySeconds: defaultRequeuingBaseDelaySeconds,
+ }
+ for _, opt := range controllerOpts {
+ opt(&ctrlOpts)
+ }
+
if err := NewWorkloadReconciler(mgr.GetClient(), qManager, cc,
mgr.GetEventRecorderFor(constants.WorkloadControllerName),
WithWorkloadUpdateWatchers(qRec, cqRec),
WithPodsReadyTimeout(podsReadyTimeout(cfg)),
- WithRequeuingBackoffLimitCount(requeuingBackoffLimitCount(cfg))).SetupWithManager(mgr, cfg); err != nil {
+ WithRequeuingBackoffLimitCount(requeuingBackoffLimitCount(cfg)),
+ WithRequeuingBaseDelaySeconds(ctrlOpts.requeuingBaseDelaySeconds),
+ ).SetupWithManager(mgr, cfg); err != nil {
return "Workload", err
}
return "", nil
diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go
index 0a964ce7b1..47dbfa62e2 100644
--- a/pkg/controller/core/workload_controller.go
+++ b/pkg/controller/core/workload_controller.go
@@ -72,6 +72,7 @@ type options struct {
watchers []WorkloadUpdateWatcher
podsReadyTimeout *time.Duration
requeuingBackoffLimitCount *int32
+ requeuingBaseDelaySeconds int32
}
// Option configures the reconciler.
@@ -93,6 +94,14 @@ func WithRequeuingBackoffLimitCount(value *int32) Option {
}
}
+// WithRequeuingBaseDelaySeconds indicates the base delay for the computation
+// of the requeue delay.
+func WithRequeuingBaseDelaySeconds(value int32) Option {
+ return func(o *options) {
+ o.requeuingBaseDelaySeconds = value
+ }
+}
+
// WithWorkloadUpdateWatchers allows to specify the workload update watchers
func WithWorkloadUpdateWatchers(value ...WorkloadUpdateWatcher) Option {
return func(o *options) {
@@ -115,6 +124,7 @@ type WorkloadReconciler struct {
watchers []WorkloadUpdateWatcher
podsReadyTimeout *time.Duration
requeuingBackoffLimitCount *int32
+ requeuingBaseDelaySeconds int32
recorder record.EventRecorder
}
@@ -132,6 +142,7 @@ func NewWorkloadReconciler(client client.Client, queues *queue.Manager, cache *c
watchers: options.watchers,
podsReadyTimeout: options.podsReadyTimeout,
requeuingBackoffLimitCount: options.requeuingBackoffLimitCount,
+ requeuingBaseDelaySeconds: options.requeuingBaseDelaySeconds,
recorder: recorder,
}
}
@@ -389,17 +400,14 @@ func (r *WorkloadReconciler) triggerDeactivationOrBackoffRequeue(ctx context.Con
"Deactivated Workload %q by reached re-queue backoffLimitCount", klog.KObj(wl))
return true, nil
}
- // Every backoff duration is about "1.41284738^(n-1)+Rand" where the "n" represents the "requeuingCount",
- // and the "Rand" represents the random jitter. During this time, the workload is taken as an inadmissible and
- // other workloads will have a chance to be admitted.
- // Considering the ".waitForPodsReady.timeout",
- // this indicates that an evicted workload with PodsReadyTimeout reason is continued re-queuing for
- // the "t(n+1) + SUM[k=1,n](1.41284738^(k-1) + Rand)" seconds where the "t" represents "waitForPodsReady.timeout".
- // Given that the "backoffLimitCount" equals "30" and the "waitForPodsReady.timeout" equals "300" (default),
- // the result equals 24 hours (+Rand seconds).
+ // Every backoff duration is about "10s*2^(n-1)+Rand" where:
+ // - "n" represents the "requeuingCount",
+ // - "Rand" represents the random jitter.
+ // During this time, the workload is taken as an inadmissible and other
+ // workloads will have a chance to be admitted.
backoff := &wait.Backoff{
- Duration: 1 * time.Second,
- Factor: 1.41284738,
+ Duration: time.Duration(r.requeuingBaseDelaySeconds) * time.Second,
+ Factor: 2,
Jitter: 0.0001,
Steps: int(requeuingCount),
}
diff --git a/pkg/controller/core/workload_controller_test.go b/pkg/controller/core/workload_controller_test.go
index 1bd9f421dc..8e16f77cd4 100644
--- a/pkg/controller/core/workload_controller_test.go
+++ b/pkg/controller/core/workload_controller_test.go
@@ -508,6 +508,7 @@ func TestReconcile(t *testing.T) {
reconcilerOpts: []Option{
WithPodsReadyTimeout(ptr.To(3 * time.Second)),
WithRequeuingBackoffLimitCount(ptr.To[int32](100)),
+ WithRequeuingBaseDelaySeconds(10),
},
workload: utiltesting.MakeWorkload("wl", "ns").
ReserveQuota(utiltesting.MakeAdmission("q1").Obj()).
@@ -523,7 +524,7 @@ func TestReconcile(t *testing.T) {
Message: "Admitted by ClusterQueue q1",
}).
Admitted(true).
- RequeueState(ptr.To[int32](29), nil).
+ RequeueState(ptr.To[int32](3), nil).
Generation(1).
Obj(),
wantWorkload: utiltesting.MakeWorkload("wl", "ns").
@@ -541,8 +542,8 @@ func TestReconcile(t *testing.T) {
Message: "Exceeded the PodsReady timeout ns/wl",
ObservedGeneration: 1,
}).
- // 1.41284738^(30-1) = 22530.0558
- RequeueState(ptr.To[int32](30), ptr.To(metav1.NewTime(testStartTime.Add(22530*time.Second).Truncate(time.Second)))).
+ // 10s * 2^(4-1) = 80s
+ RequeueState(ptr.To[int32](4), ptr.To(metav1.NewTime(testStartTime.Add(80*time.Second).Truncate(time.Second)))).
Obj(),
},
"deactivated workload": {
diff --git a/test/integration/scheduler/podsready/suite_test.go b/test/integration/scheduler/podsready/suite_test.go
index 13c25ce7fe..bc2daabd4b 100644
--- a/test/integration/scheduler/podsready/suite_test.go
+++ b/test/integration/scheduler/podsready/suite_test.go
@@ -87,7 +87,8 @@ func managerAndSchedulerSetupWithTimeoutAdmission(
queue.WithPodsReadyRequeuingTimestamp(requeuingTimestamp),
)
- failedCtrl, err := core.SetupControllers(mgr, queues, cCache, cfg)
+ failedCtrl, err := core.SetupControllers(mgr, queues, cCache, cfg,
+ core.WithControllerRequeuingBaseDelaySeconds(1))
gomega.Expect(err).ToNot(gomega.HaveOccurred(), "controller", failedCtrl)
failedWebhook, err := webhooks.Setup(mgr)
From b2bb2bfbfb90c318fe50c5311e8334b0b00c093a Mon Sep 17 00:00:00 2001
From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com>
Date: Mon, 22 Apr 2024 13:45:19 -0400
Subject: [PATCH 15/49] Fix generation of API reference (#2034)
Change-Id: I58e0036c9dba6215e156a927b45554d22b28201a
---
Makefile | 2 +-
{site => hack}/genref/config.yaml | 0
{site => hack}/genref/markdown/members.tpl | 0
{site => hack}/genref/markdown/pkg.tpl | 0
{site => hack}/genref/markdown/type.tpl | 0
.../en/docs/reference/kueue-config.v1beta1.md | 27 +++++--
.../en/docs/reference/kueue.v1beta1.md | 75 ++++++++++++++++++-
7 files changed, 96 insertions(+), 8 deletions(-)
rename {site => hack}/genref/config.yaml (100%)
rename {site => hack}/genref/markdown/members.tpl (100%)
rename {site => hack}/genref/markdown/pkg.tpl (100%)
rename {site => hack}/genref/markdown/type.tpl (100%)
diff --git a/Makefile b/Makefile
index 240beee7f8..83ce334469 100644
--- a/Makefile
+++ b/Makefile
@@ -498,4 +498,4 @@ cluster-autoscaler-crd:
.PHONY: generate-apiref
generate-apiref: genref
- cd $(PROJECT_DIR)/site/genref/ && $(GENREF) -o $(PROJECT_DIR)/site/content/en/docs/reference
+ cd $(PROJECT_DIR)/hack/genref/ && $(GENREF) -o $(PROJECT_DIR)/site/content/en/docs/reference
diff --git a/site/genref/config.yaml b/hack/genref/config.yaml
similarity index 100%
rename from site/genref/config.yaml
rename to hack/genref/config.yaml
diff --git a/site/genref/markdown/members.tpl b/hack/genref/markdown/members.tpl
similarity index 100%
rename from site/genref/markdown/members.tpl
rename to hack/genref/markdown/members.tpl
diff --git a/site/genref/markdown/pkg.tpl b/hack/genref/markdown/pkg.tpl
similarity index 100%
rename from site/genref/markdown/pkg.tpl
rename to hack/genref/markdown/pkg.tpl
diff --git a/site/genref/markdown/type.tpl b/hack/genref/markdown/type.tpl
similarity index 100%
rename from site/genref/markdown/type.tpl
rename to hack/genref/markdown/type.tpl
diff --git a/site/content/en/docs/reference/kueue-config.v1beta1.md b/site/content/en/docs/reference/kueue-config.v1beta1.md
index a3016ae344..55fb12778a 100644
--- a/site/content/en/docs/reference/kueue-config.v1beta1.md
+++ b/site/content/en/docs/reference/kueue-config.v1beta1.md
@@ -436,6 +436,21 @@ Possible options:
PodOptions defines kueue controller behaviour for pod objects
+
labelKeysToCopy[Required]
+[]string
+
+
+
labelKeysToCopy is a list of label keys that should be copied from the job into the
+workload object. It is not required for the job to have all the labels from this
+list. If a job does not have some label with the given key from this list, the
+constructed workload object will be created without this label. In the case
+of creating a workload from a composable job (pod group), if multiple objects
+have labels with some key from the list, the values of these labels must
+match or otherwise the workload creation would fail. The labels are copied only
+during the workload creation and are not updated even if the labels of the
+underlying job are changed.
+
+
@@ -622,12 +637,14 @@ that was evicted due to Pod readiness. The possible values are:
BackoffLimitCount defines the maximum number of re-queuing retries.
Once the number is reached, the workload is deactivated (.spec.activate=false).
When it is null, the workloads will repeatedly and endless re-queueing.
-
Every backoff duration is about "1.41284738^(n-1)+Rand" where the "n" represents the "workloadStatus.requeueState.count",
-and the "Rand" represents the random jitter. During this time, the workload is taken as an inadmissible and
+
Every backoff duration is about "10s*2^(n-1)+Rand" where:
+
+
"n" represents the "workloadStatus.requeueState.count",
+
"Rand" represents the random jitter.
+During this time, the workload is taken as an inadmissible and
other workloads will have a chance to be admitted.
-For example, when the "waitForPodsReady.timeout" is the default, the workload deactivation time is as follows:
-{backoffLimitCount, workloadDeactivationSeconds}
-~= {1, 601}, {2, 902}, ...,{5, 1811}, ...,{10, 3374}, ...,{20, 8730}, ...,{30, 86400(=24 hours)}, ...
+By default, the consecutive requeue delays are around: (10s, 20s, 40s, ...).
onFlavors is a list of ResourceFlavors' names that this AdmissionCheck should run for.
+If empty, the AdmissionCheck will run for all workloads submitted to the ClusterQueue.
admissionChecks is a list of strategies for AdmissionChecks
+
+
+
+
+
## `BorrowWithinCohort` {#kueue-x-k8s-io-v1beta1-BorrowWithinCohort}
@@ -748,7 +808,7 @@ reclaim its nominal quota.
and there are admitted Workloads in the ClusterQueue with lower priority.
The preemption algorithm tries to find a minimal set of Workloads to
-preempt to accommodate the pending Workload, preempting Workloads with
+preempt to accomomdate the pending Workload, preempting Workloads with
lower priority first.
admissionCheckStrategy defines a list of strategies to determine which ResourceFlavors require AdmissionChecks.
+This property cannot be used in conjunction with the 'admissionChecks' property.
stopPolicy
@@ -1448,6 +1517,8 @@ this time would be reset to null.
**Appears in:**
+- [AdmissionCheckStrategyRule](#kueue-x-k8s-io-v1beta1-AdmissionCheckStrategyRule)
+
- [FlavorQuotas](#kueue-x-k8s-io-v1beta1-FlavorQuotas)
- [FlavorUsage](#kueue-x-k8s-io-v1beta1-FlavorUsage)
From a19e8b7163360c749508374dcfb4404703638fa1 Mon Sep 17 00:00:00 2001
From: Mykhailo Bobrovskyi
Date: Tue, 23 Apr 2024 09:19:00 +0300
Subject: [PATCH 16/49] Allow run test-multikueue-e2e for mac os (#1971)
* [multikueue] Fixed multikueue e2e tests for Mac OS
* [multikueue] Fixed imports
* [multikueue] Recreate the client to force disconnect
* [multikueue] Recreate the client after connect
* [metrics] Fixed timeout error.
* [metrics] Moved cluster server replacements to multikueue-e2e-test.sh.
* [metrics] Fixed multikueue-e2e-test.sh.
* [metrics] Using WaitForKueueAvailability instead time.Sleep.
* [metrics] Fixed timeout error.
* [metrics] Fixed multikueue-e2e-test.
* [metrics] Fixed imports.
* [metrics] Fixed timeouts.
* [multikueue] Fixed timeout error.
* [multikueue] Added code explanation.
* [metrics] Optimization.
* [multikueue] Put DeleteCluster to Eventually.
---
hack/multikueue-e2e-test.sh | 26 +++++---------
hack/multikueue/worker-cluster.kind.yaml | 2 --
test/e2e/multikueue/e2e_test.go | 43 ++++++++++++++++++++----
test/e2e/multikueue/suite_test.go | 10 ++++--
4 files changed, 52 insertions(+), 29 deletions(-)
diff --git a/hack/multikueue-e2e-test.sh b/hack/multikueue-e2e-test.sh
index 7607bc8493..14470fb173 100755
--- a/hack/multikueue-e2e-test.sh
+++ b/hack/multikueue-e2e-test.sh
@@ -50,21 +50,9 @@ function startup {
mkdir -p "$ARTIFACTS"
fi
- cluster_create "$MANAGER_KIND_CLUSTER_NAME" "$SOURCE_DIR"/multikueue/manager-cluster.kind.yaml
-
- # NOTE: for local setup, make sure that your firewall allows tcp from manager to the GW ip
- # eg. ufw `sudo ufw allow from 172.18.0.0/16 proto tcp to 172.18.0.1`
- #
- # eg. iptables `sudo iptables --append INPUT --protocol tcp --src 172.18.0.0/16 --dst 172.18.0.1 --jump ACCEPT
- # sudo iptables --append OUTPUT --protocol tcp --src 172.18.0.1 --dst 172.18.0./0/16 --jump ACCEPT`
-
- # have the worker forward the api to the docker gateway address instead of lo
- export GW="$(docker inspect "${MANAGER_KIND_CLUSTER_NAME}"-control-plane -f '{{.NetworkSettings.Networks.kind.Gateway}}')"
- $YQ e '.networking.apiServerAddress=env(GW)' "$SOURCE_DIR/multikueue/worker-cluster.kind.yaml" > "$ARTIFACTS"/worker-cluster.yaml
-
- cluster_create $WORKER1_KIND_CLUSTER_NAME $ARTIFACTS/worker-cluster.yaml
- cluster_create $WORKER2_KIND_CLUSTER_NAME $ARTIFACTS/worker-cluster.yaml
-
+ cluster_create "$MANAGER_KIND_CLUSTER_NAME" "$SOURCE_DIR/multikueue/manager-cluster.kind.yaml"
+ cluster_create $WORKER1_KIND_CLUSTER_NAME "$SOURCE_DIR/multikueue/worker-cluster.kind.yaml"
+ cluster_create $WORKER2_KIND_CLUSTER_NAME "$SOURCE_DIR/multikueue/worker-cluster.kind.yaml"
fi
}
@@ -96,19 +84,21 @@ function kueue_deploy {
function prepare_secrets {
kubectl config use-context kind-${WORKER1_KIND_CLUSTER_NAME}
source ${SOURCE_DIR}/create-multikueue-kubeconfig.sh ${ARTIFACTS}/worker1.kubeconfig
+ $YQ e ".clusters[0].cluster.server = \"https://${WORKER1_KIND_CLUSTER_NAME}-control-plane:6443\"" ${ARTIFACTS}/worker1.kubeconfig > ${ARTIFACTS}/worker1.kubeconfig.internal
kubectl config use-context kind-${WORKER2_KIND_CLUSTER_NAME}
source ${SOURCE_DIR}/create-multikueue-kubeconfig.sh ${ARTIFACTS}/worker2.kubeconfig
+ $YQ e ".clusters[0].cluster.server = \"https://${WORKER2_KIND_CLUSTER_NAME}-control-plane:6443\"" ${ARTIFACTS}/worker2.kubeconfig > ${ARTIFACTS}/worker2.kubeconfig.internal
kubectl config use-context kind-${MANAGER_KIND_CLUSTER_NAME}
- kubectl create secret generic multikueue1 -n kueue-system --from-file=kubeconfig=${ARTIFACTS}/worker1.kubeconfig
- kubectl create secret generic multikueue2 -n kueue-system --from-file=kubeconfig=${ARTIFACTS}/worker2.kubeconfig
+ kubectl create secret generic multikueue1 -n kueue-system --from-file=kubeconfig=${ARTIFACTS}/worker1.kubeconfig.internal
+ kubectl create secret generic multikueue2 -n kueue-system --from-file=kubeconfig=${ARTIFACTS}/worker2.kubeconfig.internal
}
trap cleanup EXIT
startup
kind_load
-kueue_deploy
+kueue_deploy
prepare_secrets
$GINKGO $GINKGO_ARGS --junit-report=junit.xml --output-dir=$ARTIFACTS -v ./test/e2e/multikueue/...
diff --git a/hack/multikueue/worker-cluster.kind.yaml b/hack/multikueue/worker-cluster.kind.yaml
index 7d613955fa..c7892087f9 100644
--- a/hack/multikueue/worker-cluster.kind.yaml
+++ b/hack/multikueue/worker-cluster.kind.yaml
@@ -1,7 +1,5 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
-networking:
- apiServerAddress: "FILLED_AT_RUNTIME"
nodes:
- role: control-plane
kubeadmConfigPatches:
diff --git a/test/e2e/multikueue/e2e_test.go b/test/e2e/multikueue/e2e_test.go
index 397192e3f2..c2a9c1c675 100644
--- a/test/e2e/multikueue/e2e_test.go
+++ b/test/e2e/multikueue/e2e_test.go
@@ -17,6 +17,7 @@ limitations under the License.
package mke2e
import (
+ "fmt"
"os/exec"
"github.com/google/go-cmp/cmp/cmpopts"
@@ -345,15 +346,32 @@ var _ = ginkgo.Describe("MultiKueue", func() {
})
ginkgo.When("The connection to a worker cluster is unreliable", func() {
ginkgo.It("Should update the cluster status to reflect the connection state", func() {
+ worker1Cq2 := utiltesting.MakeClusterQueue("q2").
+ ResourceGroup(
+ *utiltesting.MakeFlavorQuotas(worker1Flavor.Name).
+ Resource(corev1.ResourceCPU, "2").
+ Resource(corev1.ResourceMemory, "1G").
+ Obj(),
+ ).
+ Obj()
+ gomega.Expect(k8sWorker1Client.Create(ctx, worker1Cq2)).Should(gomega.Succeed())
+
+ worker1Container := fmt.Sprintf("%s-control-plane", worker1ClusterName)
+ worker1ClusterKey := client.ObjectKeyFromObject(workerCluster1)
+
ginkgo.By("Disconnecting worker1 container from the kind network", func() {
- cmd := exec.Command("docker", "network", "disconnect", "kind", "kind-worker1-control-plane")
+ cmd := exec.Command("docker", "network", "disconnect", "kind", worker1Container)
output, err := cmd.CombinedOutput()
gomega.Expect(err).NotTo(gomega.HaveOccurred(), "%s: %s", err, output)
- })
- worker1ClusterKey := client.ObjectKeyFromObject(workerCluster1)
+ podList := &corev1.PodList{}
+ podListOptions := client.InNamespace("kueue-system")
+ gomega.Eventually(func(g gomega.Gomega) error {
+ return k8sWorker1Client.List(ctx, podList, podListOptions)
+ }, util.LongTimeout, util.Interval).ShouldNot(gomega.Succeed())
+ })
- ginkgo.By("Waiting for the cluster do become inactive", func() {
+ ginkgo.By("Waiting for the cluster to become inactive", func() {
readClient := &kueuealpha.MultiKueueCluster{}
gomega.Eventually(func(g gomega.Gomega) {
g.Expect(k8sManagerClient.Get(ctx, worker1ClusterKey, readClient)).To(gomega.Succeed())
@@ -364,13 +382,26 @@ var _ = ginkgo.Describe("MultiKueue", func() {
Reason: "ClientConnectionFailed",
},
util.IgnoreConditionTimestampsAndObservedGeneration, util.IgnoreConditionMessage)))
- }, util.Timeout, util.Interval).Should(gomega.Succeed())
+ }, util.LongTimeout, util.Interval).Should(gomega.Succeed())
})
ginkgo.By("Reconnecting worker1 container to the kind network", func() {
- cmd := exec.Command("docker", "network", "connect", "kind", "kind-worker1-control-plane")
+ cmd := exec.Command("docker", "network", "connect", "kind", worker1Container)
output, err := cmd.CombinedOutput()
gomega.Expect(err).NotTo(gomega.HaveOccurred(), "%s: %s", err, output)
+ gomega.Eventually(func() error {
+ return util.DeleteClusterQueue(ctx, k8sWorker1Client, worker1Cq2)
+ }, util.LongTimeout, util.Interval).ShouldNot(gomega.HaveOccurred())
+
+ // After reconnecting the container to the network, when we try to get pods,
+ // we get it with the previous values (as before disconnect). Therefore, it
+ // takes some time for the cluster to restore them, and we got actually values.
+ // To be sure that the leader of kueue-control-manager successfully recovered
+ // we can check it by removing already created Cluster Queue.
+ var cq kueue.ClusterQueue
+ gomega.Eventually(func() error {
+ return k8sWorker1Client.Get(ctx, client.ObjectKeyFromObject(worker1Cq2), &cq)
+ }, util.LongTimeout, util.Interval).Should(utiltesting.BeNotFoundError())
})
ginkgo.By("Waiting for the cluster do become active", func() {
diff --git a/test/e2e/multikueue/suite_test.go b/test/e2e/multikueue/suite_test.go
index 4356089313..14b28841df 100644
--- a/test/e2e/multikueue/suite_test.go
+++ b/test/e2e/multikueue/suite_test.go
@@ -31,6 +31,10 @@ import (
)
var (
+ managerClusterName string
+ worker1ClusterName string
+ worker2ClusterName string
+
k8sManagerClient client.Client
k8sWorker1Client client.Client
k8sWorker2Client client.Client
@@ -49,13 +53,13 @@ func TestAPIs(t *testing.T) {
}
var _ = ginkgo.BeforeSuite(func() {
- managerClusterName := os.Getenv("MANAGER_KIND_CLUSTER_NAME")
+ managerClusterName = os.Getenv("MANAGER_KIND_CLUSTER_NAME")
gomega.Expect(managerClusterName).NotTo(gomega.BeEmpty(), "MANAGER_KIND_CLUSTER_NAME should not be empty")
- worker1ClusterName := os.Getenv("WORKER1_KIND_CLUSTER_NAME")
+ worker1ClusterName = os.Getenv("WORKER1_KIND_CLUSTER_NAME")
gomega.Expect(worker1ClusterName).NotTo(gomega.BeEmpty(), "WORKER1_KIND_CLUSTER_NAME should not be empty")
- worker2ClusterName := os.Getenv("WORKER2_KIND_CLUSTER_NAME")
+ worker2ClusterName = os.Getenv("WORKER2_KIND_CLUSTER_NAME")
gomega.Expect(worker2ClusterName).NotTo(gomega.BeEmpty(), "WORKER2_KIND_CLUSTER_NAME should not be empty")
k8sManagerClient = util.CreateClientUsingCluster("kind-" + managerClusterName)
From 92baacd06e54f57de85a15590a1780eb84455941 Mon Sep 17 00:00:00 2001
From: Mykhailo Bobrovskyi
Date: Tue, 23 Apr 2024 09:48:55 +0300
Subject: [PATCH 17/49] [metrics] Add quota_reserved_wait_time_seconds (#1977)
* [metrics] Created QuotaReservedWorkloadsTotal and quotaReservedWaitTime metrics.
* [metrics] Added integration tests.
* [metrics] Fixed imports.
* [metrics] Added new metric doc.
* [metrics] Added new metric doc.
* [metrics] Added new metric doc.
* [metrics] Revert new empty lines.
* [metrics] Revert formatting changes.
* [metrics] Added "Should admit workloads with admission checks" test.
* [metrics] Added ExpectAdmittedWorkloadsTotalMetric on rejected workload test.
* [metrics] Improving debuggability
* [metrics] Renamed quota_reserved_wait_time_seconds to quota_reserved_to_admission_wait_time_seconds.
* [metrics] Added buckets for quotaReservedWaitTime and admissionWaitTime.
* [metrics] Added generateExponentialBuckets test.
* [metrics] Added WorkloadRequeued condition.
* [metrics] Change explanation for WorkloadRequeued.
* [metrics] Remove extra argument (LastTransitionTime) on SetRequeuedCondition.
* [metrics] Added QueuedWaitTime helper.
* [metrics] Rename test doc.
---
apis/kueue/v1beta1/workload_types.go | 3 +
pkg/controller/core/workload_controller.go | 9 +-
.../jobs/job/job_controller_test.go | 6 +
.../jobs/pod/pod_controller_test.go | 7 ++
.../raycluster/raycluster_controller_test.go | 6 +
pkg/metrics/metrics.go | 48 +++++++-
pkg/metrics/metrics_test.go | 9 ++
pkg/scheduler/scheduler.go | 15 ++-
pkg/workload/workload.go | 37 +++++-
site/content/en/docs/reference/metrics.md | 5 +-
.../core/workload_controller_test.go | 49 +++++++-
.../scheduler/podsready/scheduler_test.go | 5 +
test/integration/scheduler/scheduler_test.go | 112 ++++++++++++++++--
test/util/util.go | 19 ++-
14 files changed, 296 insertions(+), 34 deletions(-)
diff --git a/apis/kueue/v1beta1/workload_types.go b/apis/kueue/v1beta1/workload_types.go
index 698787d8c1..fd96a53704 100644
--- a/apis/kueue/v1beta1/workload_types.go
+++ b/apis/kueue/v1beta1/workload_types.go
@@ -302,6 +302,9 @@ const (
// more detailed information. The more detailed reasons should be prefixed
// by one of the "base" reasons.
WorkloadPreempted = "Preempted"
+
+ // WorkloadRequeued means that the Workload was requeued due to eviction.
+ WorkloadRequeued = "Requeued"
)
const (
diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go
index 47dbfa62e2..19f97cb456 100644
--- a/pkg/controller/core/workload_controller.go
+++ b/pkg/controller/core/workload_controller.go
@@ -51,6 +51,7 @@ import (
"sigs.k8s.io/kueue/pkg/cache"
"sigs.k8s.io/kueue/pkg/constants"
"sigs.k8s.io/kueue/pkg/controller/core/indexer"
+ "sigs.k8s.io/kueue/pkg/metrics"
"sigs.k8s.io/kueue/pkg/queue"
utilac "sigs.k8s.io/kueue/pkg/util/admissioncheck"
utilslices "sigs.k8s.io/kueue/pkg/util/slices"
@@ -192,8 +193,12 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
return ctrl.Result{}, err
}
if workload.IsAdmitted(&wl) {
- c := apimeta.FindStatusCondition(wl.Status.Conditions, kueue.WorkloadQuotaReserved)
- r.recorder.Eventf(&wl, corev1.EventTypeNormal, "Admitted", "Admitted by ClusterQueue %v, wait time since reservation was %.0fs", wl.Status.Admission.ClusterQueue, time.Since(c.LastTransitionTime.Time).Seconds())
+ queuedWaitTime := workload.QueuedWaitTime(&wl)
+ quotaReservedCondition := apimeta.FindStatusCondition(wl.Status.Conditions, kueue.WorkloadQuotaReserved)
+ quotaReservedWaitTime := time.Since(quotaReservedCondition.LastTransitionTime.Time)
+ r.recorder.Eventf(&wl, corev1.EventTypeNormal, "Admitted", "Admitted by ClusterQueue %v, wait time since reservation was %.0fs", wl.Status.Admission.ClusterQueue, quotaReservedWaitTime.Seconds())
+ metrics.AdmittedWorkload(kueue.ClusterQueueReference(cqName), queuedWaitTime)
+ metrics.AdmissionChecksWaitTime(kueue.ClusterQueueReference(cqName), quotaReservedWaitTime)
}
return ctrl.Result{}, nil
}
diff --git a/pkg/controller/jobs/job/job_controller_test.go b/pkg/controller/jobs/job/job_controller_test.go
index 52414d2b85..a6eee4360e 100644
--- a/pkg/controller/jobs/job/job_controller_test.go
+++ b/pkg/controller/jobs/job/job_controller_test.go
@@ -633,6 +633,12 @@ func TestReconciler(t *testing.T) {
Reason: "Pending",
Message: "The workload is deactivated",
}).
+ Condition(metav1.Condition{
+ Type: kueue.WorkloadRequeued,
+ Status: metav1.ConditionTrue,
+ Reason: "Pending",
+ Message: "The workload is deactivated",
+ }).
Condition(metav1.Condition{
Type: kueue.WorkloadEvicted,
Status: metav1.ConditionTrue,
diff --git a/pkg/controller/jobs/pod/pod_controller_test.go b/pkg/controller/jobs/pod/pod_controller_test.go
index 89d3891d03..ea08fd4174 100644
--- a/pkg/controller/jobs/pod/pod_controller_test.go
+++ b/pkg/controller/jobs/pod/pod_controller_test.go
@@ -1651,6 +1651,13 @@ func TestReconciler(t *testing.T) {
Reason: "Pending",
Message: "Preempted to accommodate a higher priority Workload",
}).
+ SetOrReplaceCondition(metav1.Condition{
+ Type: kueue.WorkloadRequeued,
+ Status: metav1.ConditionTrue,
+ LastTransitionTime: metav1.Now(),
+ Reason: "Pending",
+ Message: "Preempted to accommodate a higher priority Workload",
+ }).
Obj(),
},
workloadCmpOpts: defaultWorkloadCmpOpts,
diff --git a/pkg/controller/jobs/raycluster/raycluster_controller_test.go b/pkg/controller/jobs/raycluster/raycluster_controller_test.go
index 0fec379850..cfb180779f 100644
--- a/pkg/controller/jobs/raycluster/raycluster_controller_test.go
+++ b/pkg/controller/jobs/raycluster/raycluster_controller_test.go
@@ -325,6 +325,12 @@ func TestReconciler(t *testing.T) {
Message: "The workload has no reservation",
ObservedGeneration: 1,
}).
+ Condition(metav1.Condition{
+ Type: kueue.WorkloadRequeued,
+ Status: metav1.ConditionTrue,
+ Reason: "Pending",
+ ObservedGeneration: 1,
+ }).
Obj(),
},
},
diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go
index 74ded34f31..015e476eb0 100644
--- a/pkg/metrics/metrics.go
+++ b/pkg/metrics/metrics.go
@@ -91,6 +91,23 @@ The label 'result' can have the following values:
}, []string{"cluster_queue", "status"},
)
+ QuotaReservedWorkloadsTotal = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Subsystem: constants.KueueName,
+ Name: "quota_reserved_workloads_total",
+ Help: "The total number of quota reserved workloads per 'cluster_queue'",
+ }, []string{"cluster_queue"},
+ )
+
+ quotaReservedWaitTime = prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Subsystem: constants.KueueName,
+ Name: "quota_reserved_wait_time_seconds",
+ Help: "The time between a workload was created or requeued until it got quota reservation, per 'cluster_queue'",
+ Buckets: generateExponentialBuckets(14),
+ }, []string{"cluster_queue"},
+ )
+
AdmittedWorkloadsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: constants.KueueName,
@@ -103,7 +120,17 @@ The label 'result' can have the following values:
prometheus.HistogramOpts{
Subsystem: constants.KueueName,
Name: "admission_wait_time_seconds",
- Help: "The time between a Workload was created until it was admitted, per 'cluster_queue'",
+ Help: "The time between a workload was created or requeued until admission, per 'cluster_queue'",
+ Buckets: generateExponentialBuckets(14),
+ }, []string{"cluster_queue"},
+ )
+
+ admissionChecksWaitTime = prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Subsystem: constants.KueueName,
+ Name: "admission_checks_wait_time_seconds",
+ Help: "The time from when a workload got the quota reservation until admission, per 'cluster_queue'",
+ Buckets: generateExponentialBuckets(14),
}, []string{"cluster_queue"},
)
@@ -176,16 +203,29 @@ For a ClusterQueue, the metric only reports a value of 1 for one of the statuses
)
)
+func generateExponentialBuckets(count int) []float64 {
+ return append([]float64{1}, prometheus.ExponentialBuckets(2.5, 2, count-1)...)
+}
+
func AdmissionAttempt(result AdmissionResult, duration time.Duration) {
AdmissionAttemptsTotal.WithLabelValues(string(result)).Inc()
admissionAttemptDuration.WithLabelValues(string(result)).Observe(duration.Seconds())
}
+func QuotaReservedWorkload(cqName kueue.ClusterQueueReference, waitTime time.Duration) {
+ QuotaReservedWorkloadsTotal.WithLabelValues(string(cqName)).Inc()
+ quotaReservedWaitTime.WithLabelValues(string(cqName)).Observe(waitTime.Seconds())
+}
+
func AdmittedWorkload(cqName kueue.ClusterQueueReference, waitTime time.Duration) {
AdmittedWorkloadsTotal.WithLabelValues(string(cqName)).Inc()
admissionWaitTime.WithLabelValues(string(cqName)).Observe(waitTime.Seconds())
}
+func AdmissionChecksWaitTime(cqName kueue.ClusterQueueReference, waitTime time.Duration) {
+ admissionChecksWaitTime.WithLabelValues(string(cqName)).Observe(waitTime.Seconds())
+}
+
func ReportPendingWorkloads(cqName string, active, inadmissible int) {
PendingWorkloads.WithLabelValues(cqName, PendingStatusActive).Set(float64(active))
PendingWorkloads.WithLabelValues(cqName, PendingStatusInadmissible).Set(float64(inadmissible))
@@ -194,8 +234,11 @@ func ReportPendingWorkloads(cqName string, active, inadmissible int) {
func ClearQueueSystemMetrics(cqName string) {
PendingWorkloads.DeleteLabelValues(cqName, PendingStatusActive)
PendingWorkloads.DeleteLabelValues(cqName, PendingStatusInadmissible)
+ QuotaReservedWorkloadsTotal.DeleteLabelValues(cqName)
+ quotaReservedWaitTime.DeleteLabelValues(cqName)
AdmittedWorkloadsTotal.DeleteLabelValues(cqName)
admissionWaitTime.DeleteLabelValues(cqName)
+ admissionChecksWaitTime.DeleteLabelValues(cqName)
}
func ReportClusterQueueStatus(cqName string, cqStatus ClusterQueueStatus) {
@@ -295,8 +338,11 @@ func Register() {
PendingWorkloads,
ReservingActiveWorkloads,
AdmittedActiveWorkloads,
+ QuotaReservedWorkloadsTotal,
+ quotaReservedWaitTime,
AdmittedWorkloadsTotal,
admissionWaitTime,
+ admissionChecksWaitTime,
ClusterQueueResourceUsage,
ClusterQueueResourceReservations,
ClusterQueueResourceNominalQuota,
diff --git a/pkg/metrics/metrics_test.go b/pkg/metrics/metrics_test.go
index 4cdc4dd001..d133f1c79b 100644
--- a/pkg/metrics/metrics_test.go
+++ b/pkg/metrics/metrics_test.go
@@ -19,6 +19,7 @@ package metrics
import (
"testing"
+ "github.com/google/go-cmp/cmp"
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/kueue/pkg/features"
@@ -37,6 +38,14 @@ func expectFilteredMetricsCount(t *testing.T, vec *prometheus.GaugeVec, count in
}
}
+func TestGenerateExponentialBuckets(t *testing.T) {
+ expect := []float64{1, 2.5, 5, 10, 20, 40, 80, 160, 320, 640, 1280, 2560, 5120, 10240}
+ result := generateExponentialBuckets(14)
+ if diff := cmp.Diff(result, expect); len(diff) != 0 {
+ t.Errorf("Unexpected buckets (-want,+got):\n%s", diff)
+ }
+}
+
func TestReportAndCleanupClusterQueueMetrics(t *testing.T) {
defer features.SetFeatureGateDuringTest(t, features.LendingLimit, true)()
ReportClusterQueueQuotas("cohort", "queue", "flavor", "res", 5, 10, 3)
diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go
index 65c87b6408..4665047079 100644
--- a/pkg/scheduler/scheduler.go
+++ b/pkg/scheduler/scheduler.go
@@ -27,7 +27,6 @@ import (
"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
- apimeta "k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
@@ -528,16 +527,16 @@ func (s *Scheduler) admit(ctx context.Context, e *entry, cq *cache.ClusterQueue)
s.admissionRoutineWrapper.Run(func() {
err := s.applyAdmission(ctx, newWorkload)
if err == nil {
- waitStarted := e.Obj.CreationTimestamp.Time
- if c := apimeta.FindStatusCondition(e.Obj.Status.Conditions, kueue.WorkloadEvicted); c != nil {
- waitStarted = c.LastTransitionTime.Time
- }
- waitTime := time.Since(waitStarted)
+ waitTime := workload.QueuedWaitTime(newWorkload)
s.recorder.Eventf(newWorkload, corev1.EventTypeNormal, "QuotaReserved", "Quota reserved in ClusterQueue %v, wait time since queued was %.0fs", admission.ClusterQueue, waitTime.Seconds())
+ metrics.QuotaReservedWorkload(admission.ClusterQueue, waitTime)
if workload.IsAdmitted(newWorkload) {
- s.recorder.Eventf(newWorkload, corev1.EventTypeNormal, "Admitted", "Admitted by ClusterQueue %v, wait time since reservation was 0s ", admission.ClusterQueue)
+ s.recorder.Eventf(newWorkload, corev1.EventTypeNormal, "Admitted", "Admitted by ClusterQueue %v, wait time since reservation was 0s", admission.ClusterQueue)
+ metrics.AdmittedWorkload(admission.ClusterQueue, waitTime)
+ if len(newWorkload.Status.AdmissionChecks) > 0 {
+ metrics.AdmissionChecksWaitTime(admission.ClusterQueue, 0)
+ }
}
- metrics.AdmittedWorkload(admission.ClusterQueue, waitTime)
log.V(2).Info("Workload successfully admitted and assigned flavors", "assignments", admission.PodSetAssignments)
return
}
diff --git a/pkg/workload/workload.go b/pkg/workload/workload.go
index de377d9d28..d72f8a0fb9 100644
--- a/pkg/workload/workload.go
+++ b/pkg/workload/workload.go
@@ -21,6 +21,7 @@ import (
"fmt"
"maps"
"strings"
+ "time"
"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
@@ -43,7 +44,13 @@ import (
)
var (
- admissionManagedConditions = []string{kueue.WorkloadQuotaReserved, kueue.WorkloadEvicted, kueue.WorkloadAdmitted, kueue.WorkloadPreempted}
+ admissionManagedConditions = []string{
+ kueue.WorkloadQuotaReserved,
+ kueue.WorkloadEvicted,
+ kueue.WorkloadAdmitted,
+ kueue.WorkloadPreempted,
+ kueue.WorkloadRequeued,
+ }
)
type AssignmentClusterQueueState struct {
@@ -340,10 +347,14 @@ func UpdateStatus(ctx context.Context,
return c.Status().Patch(ctx, newWl, client.Apply, client.FieldOwner(managerPrefix+"-"+condition.Type))
}
-// UnsetQuotaReservationWithCondition sets the QuotaReserved condition to false and clears
-// the admission.
+// UnsetQuotaReservationWithCondition sets the QuotaReserved condition to false, clears
+// the admission and set the WorkloadRequeued status.
// Returns whether any change was done.
func UnsetQuotaReservationWithCondition(wl *kueue.Workload, reason, message string) bool {
+ if HasQuotaReservation(wl) {
+ SetRequeuedCondition(wl, reason, message)
+ }
+
condition := metav1.Condition{
Type: kueue.WorkloadQuotaReserved,
Status: metav1.ConditionFalse,
@@ -365,6 +376,26 @@ func UnsetQuotaReservationWithCondition(wl *kueue.Workload, reason, message stri
return changed
}
+// SetRequeuedCondition sets the WorkloadRequeued condition to true
+func SetRequeuedCondition(wl *kueue.Workload, reason string, message string) {
+ condition := metav1.Condition{
+ Type: kueue.WorkloadRequeued,
+ Status: metav1.ConditionTrue,
+ Reason: reason,
+ Message: api.TruncateConditionMessage(message),
+ ObservedGeneration: wl.Generation,
+ }
+ apimeta.SetStatusCondition(&wl.Status.Conditions, condition)
+}
+
+func QueuedWaitTime(wl *kueue.Workload) time.Duration {
+ queuedTime := wl.CreationTimestamp.Time
+ if c := apimeta.FindStatusCondition(wl.Status.Conditions, kueue.WorkloadRequeued); c != nil {
+ queuedTime = c.LastTransitionTime.Time
+ }
+ return time.Since(queuedTime)
+}
+
// BaseSSAWorkload creates a new object based on the input workload that
// only contains the fields necessary to identify the original object.
// The object can be used in as a base for Server-Side-Apply.
diff --git a/site/content/en/docs/reference/metrics.md b/site/content/en/docs/reference/metrics.md
index 4412eb1125..c35b76e251 100644
--- a/site/content/en/docs/reference/metrics.md
+++ b/site/content/en/docs/reference/metrics.md
@@ -25,8 +25,11 @@ Use the following metrics to monitor the status of your ClusterQueues:
| Metric name | Type | Description | Labels |
| ----------- | ---- | ----------- | ------ |
| `kueue_pending_workloads` | Gauge | The number of pending workloads. | `cluster_queue`: the name of the ClusterQueue `status`: possible values are `active` or `inadmissible` |
+| `kueue_quota_reserved_workloads_total` | Counter | The total number of quota reserved workloads. | `cluster_queue`: the name of the ClusterQueue |
+| `kueue_quota_reserved_wait_time_seconds` | Histogram | The time between a workload was created or requeued until it got quota reservation. | `cluster_queue`: the name of the ClusterQueue |
| `kueue_admitted_workloads_total` | Counter | The total number of admitted workloads. | `cluster_queue`: the name of the ClusterQueue |
-| `kueue_admission_wait_time_seconds` | Histogram | The time between a Workload was created until it was admitted. | `cluster_queue`: the name of the ClusterQueue |
+| `kueue_admission_wait_time_seconds` | Histogram | The time between a workload was created or requeued until admission. | `cluster_queue`: the name of the ClusterQueue |
+| `kueue_admission_checks_wait_time_seconds` | Histogram | The time from when a workload got the quota reservation until admission. | `cluster_queue`: the name of the ClusterQueue |
| `kueue_admitted_active_workloads` | Gauge | The number of admitted Workloads that are active (unsuspended and not finished) | `cluster_queue`: the name of the ClusterQueue |
| `kueue_cluster_queue_status` | Gauge | Reports the status of the ClusterQueue | `cluster_queue`: The name of the ClusterQueue `status`: Possible values are `pending`, `active` or `terminated`. For a ClusterQueue, the metric only reports a value of 1 for one of the statuses. |
diff --git a/test/integration/controller/core/workload_controller_test.go b/test/integration/controller/core/workload_controller_test.go
index fdd34cf610..e8cdf361b1 100644
--- a/test/integration/controller/core/workload_controller_test.go
+++ b/test/integration/controller/core/workload_controller_test.go
@@ -86,7 +86,14 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn
gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), &updatedQueueWorkload)).To(gomega.Succeed())
return len(updatedQueueWorkload.Status.Conditions)
}, util.Timeout, util.Interval).Should(gomega.BeComparableTo(1))
- gomega.Expect(updatedQueueWorkload.Status.Conditions[0].Message).To(gomega.BeComparableTo(message))
+ gomega.Expect(updatedQueueWorkload.Status.Conditions[0]).To(
+ gomega.BeComparableTo(metav1.Condition{
+ Type: kueue.WorkloadQuotaReserved,
+ Status: metav1.ConditionFalse,
+ Reason: "Inadmissible",
+ Message: message,
+ }, util.IgnoreConditionTimestampsAndObservedGeneration),
+ )
})
})
@@ -102,7 +109,14 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn
gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), &updatedQueueWorkload)).To(gomega.Succeed())
return len(updatedQueueWorkload.Status.Conditions)
}, util.Timeout, util.Interval).Should(gomega.BeComparableTo(1))
- gomega.Expect(updatedQueueWorkload.Status.Conditions[0].Message).To(gomega.BeComparableTo(message))
+ gomega.Expect(updatedQueueWorkload.Status.Conditions[0]).To(
+ gomega.BeComparableTo(metav1.Condition{
+ Type: kueue.WorkloadQuotaReserved,
+ Status: metav1.ConditionFalse,
+ Reason: "Inadmissible",
+ Message: message,
+ }, util.IgnoreConditionTimestampsAndObservedGeneration),
+ )
})
})
@@ -122,7 +136,14 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn
gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), &updatedQueueWorkload)).To(gomega.Succeed())
return updatedQueueWorkload.Status.Conditions
}, util.Timeout, util.Interval).ShouldNot(gomega.BeNil())
- gomega.Expect(updatedQueueWorkload.Status.Conditions[0].Message).To(gomega.BeComparableTo(message))
+ gomega.Expect(updatedQueueWorkload.Status.Conditions[0]).To(
+ gomega.BeComparableTo(metav1.Condition{
+ Type: kueue.WorkloadQuotaReserved,
+ Status: metav1.ConditionFalse,
+ Reason: "Inadmissible",
+ Message: message,
+ }, util.IgnoreConditionTimestampsAndObservedGeneration),
+ )
})
})
@@ -273,6 +294,8 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn
Reason: "AdmissionChecksRejected",
Message: "Admission checks [check1] are rejected",
}, util.IgnoreConditionTimestampsAndObservedGeneration))
+
+ util.ExpectAdmittedWorkloadsTotalMetric(clusterQueue, 0)
})
})
@@ -319,6 +342,8 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn
Message: "The workload is admitted",
}, util.IgnoreConditionTimestampsAndObservedGeneration)))
}, util.Timeout, util.Interval).Should(gomega.Succeed())
+
+ util.ExpectAdmittedWorkloadsTotalMetric(clusterQueue, 1)
})
ginkgo.By("setting a rejected check conditions the workload should be evicted and admitted condition kept", func() {
@@ -347,6 +372,12 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn
Reason: "Admitted",
Message: "The workload is admitted",
}, util.IgnoreConditionTimestampsAndObservedGeneration),
+ gomega.BeComparableTo(metav1.Condition{
+ Type: kueue.WorkloadQuotaReserved,
+ Status: metav1.ConditionTrue,
+ Reason: "QuotaReserved",
+ Message: "Quota reserved in ClusterQueue cluster-queue",
+ }, util.IgnoreConditionTimestampsAndObservedGeneration),
))
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})
@@ -368,6 +399,18 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn
Reason: "NoReservationNoChecks",
Message: "The workload has no reservation and not all checks ready",
}, util.IgnoreConditionTimestampsAndObservedGeneration),
+ gomega.BeComparableTo(metav1.Condition{
+ Type: kueue.WorkloadQuotaReserved,
+ Status: metav1.ConditionFalse,
+ Reason: "Pending",
+ Message: "By test",
+ }, util.IgnoreConditionTimestampsAndObservedGeneration),
+ gomega.BeComparableTo(metav1.Condition{
+ Type: kueue.WorkloadRequeued,
+ Status: metav1.ConditionTrue,
+ Reason: "Pending",
+ Message: "By test",
+ }, util.IgnoreConditionTimestampsAndObservedGeneration),
))
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})
diff --git a/test/integration/scheduler/podsready/scheduler_test.go b/test/integration/scheduler/podsready/scheduler_test.go
index 4216b2f3de..d3d3010c76 100644
--- a/test/integration/scheduler/podsready/scheduler_test.go
+++ b/test/integration/scheduler/podsready/scheduler_test.go
@@ -254,6 +254,7 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReady", func() {
gomega.Expect(k8sClient.Create(ctx, prodWl)).Should(gomega.Succeed())
ginkgo.By("checking the 'prod' workload is admitted")
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, prodWl)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 1)
ginkgo.By("exceed the timeout for the 'prod' workload")
time.Sleep(podsReadyTimeout)
@@ -263,10 +264,12 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReady", func() {
ginkgo.By("verify the 'prod' workload gets re-admitted twice")
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, prodWl)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 2)
util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 2)
time.Sleep(podsReadyTimeout)
util.FinishEvictionForWorkloads(ctx, k8sClient, prodWl)
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, prodWl)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 3)
util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 3)
time.Sleep(podsReadyTimeout)
ginkgo.By("evicted re-admitted workload should have 2 in the re-queue count")
@@ -626,6 +629,7 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReadyNonblockingMode", func() {
gomega.Expect(k8sClient.Create(ctx, prodWl)).Should(gomega.Succeed())
ginkgo.By("checking the 'prod' workload is admitted")
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, prodWl)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 1)
ginkgo.By("exceed the timeout for the 'prod' workload")
time.Sleep(podsReadyTimeout)
@@ -634,6 +638,7 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReadyNonblockingMode", func() {
ginkgo.By("verify the 'prod' workload gets re-admitted once")
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, prodWl)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 2)
util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 2)
time.Sleep(podsReadyTimeout)
util.ExpectWorkloadToHaveRequeueCount(ctx, k8sClient, client.ObjectKeyFromObject(prodWl), ptr.To[int32](2))
diff --git a/test/integration/scheduler/scheduler_test.go b/test/integration/scheduler/scheduler_test.go
index e901643db8..631c3e7399 100644
--- a/test/integration/scheduler/scheduler_test.go
+++ b/test/integration/scheduler/scheduler_test.go
@@ -81,17 +81,21 @@ var _ = ginkgo.Describe("Scheduler", func() {
ginkgo.When("Scheduling workloads on clusterQueues", func() {
var (
- prodClusterQ *kueue.ClusterQueue
- devClusterQ *kueue.ClusterQueue
- podsCountClusterQ *kueue.ClusterQueue
- podsCountOnlyClusterQ *kueue.ClusterQueue
- preemptionClusterQ *kueue.ClusterQueue
- prodQueue *kueue.LocalQueue
- devQueue *kueue.LocalQueue
- podsCountQueue *kueue.LocalQueue
- podsCountOnlyQueue *kueue.LocalQueue
- preemptionQueue *kueue.LocalQueue
- cqsStopPolicy *kueue.StopPolicy
+ admissionCheck1 *kueue.AdmissionCheck
+ admissionCheck2 *kueue.AdmissionCheck
+ prodClusterQ *kueue.ClusterQueue
+ devClusterQ *kueue.ClusterQueue
+ podsCountClusterQ *kueue.ClusterQueue
+ podsCountOnlyClusterQ *kueue.ClusterQueue
+ preemptionClusterQ *kueue.ClusterQueue
+ admissionCheckClusterQ *kueue.ClusterQueue
+ prodQueue *kueue.LocalQueue
+ devQueue *kueue.LocalQueue
+ podsCountQueue *kueue.LocalQueue
+ podsCountOnlyQueue *kueue.LocalQueue
+ preemptionQueue *kueue.LocalQueue
+ admissionCheckQueue *kueue.LocalQueue
+ cqsStopPolicy *kueue.StopPolicy
)
ginkgo.JustBeforeEach(func() {
@@ -100,6 +104,14 @@ var _ = ginkgo.Describe("Scheduler", func() {
gomega.Expect(k8sClient.Create(ctx, spotUntaintedFlavor)).To(gomega.Succeed())
cqsStopPolicy := ptr.Deref(cqsStopPolicy, kueue.None)
+ admissionCheck1 = testing.MakeAdmissionCheck("check1").ControllerName("ctrl").Obj()
+ gomega.Expect(k8sClient.Create(ctx, admissionCheck1)).Should(gomega.Succeed())
+ util.SetAdmissionCheckActive(ctx, k8sClient, admissionCheck1, metav1.ConditionTrue)
+
+ admissionCheck2 = testing.MakeAdmissionCheck("check2").ControllerName("ctrl").Obj()
+ gomega.Expect(k8sClient.Create(ctx, admissionCheck2)).Should(gomega.Succeed())
+ util.SetAdmissionCheckActive(ctx, k8sClient, admissionCheck2, metav1.ConditionTrue)
+
prodClusterQ = testing.MakeClusterQueue("prod-cq").
ResourceGroup(
*testing.MakeFlavorQuotas("spot-tainted").Resource(corev1.ResourceCPU, "5", "5").Obj(),
@@ -151,6 +163,15 @@ var _ = ginkgo.Describe("Scheduler", func() {
Obj()
gomega.Expect(k8sClient.Create(ctx, preemptionClusterQ)).Should(gomega.Succeed())
+ admissionCheckClusterQ = testing.MakeClusterQueue("admission-check-cq").
+ ResourceGroup(
+ *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(),
+ ).
+ AdmissionChecks("check1", "check2").
+ StopPolicy(cqsStopPolicy).
+ Obj()
+ gomega.Expect(k8sClient.Create(ctx, admissionCheckClusterQ)).Should(gomega.Succeed())
+
prodQueue = testing.MakeLocalQueue("prod-queue", ns.Name).ClusterQueue(prodClusterQ.Name).Obj()
gomega.Expect(k8sClient.Create(ctx, prodQueue)).Should(gomega.Succeed())
@@ -165,6 +186,9 @@ var _ = ginkgo.Describe("Scheduler", func() {
preemptionQueue = testing.MakeLocalQueue("preemption-queue", ns.Name).ClusterQueue(preemptionClusterQ.Name).Obj()
gomega.Expect(k8sClient.Create(ctx, preemptionQueue)).Should(gomega.Succeed())
+
+ admissionCheckQueue = testing.MakeLocalQueue("admission-check-queue", ns.Name).ClusterQueue(admissionCheckClusterQ.Name).Obj()
+ gomega.Expect(k8sClient.Create(ctx, admissionCheckQueue)).Should(gomega.Succeed())
})
ginkgo.JustAfterEach(func() {
@@ -174,6 +198,9 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, podsCountClusterQ, true)
util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, podsCountOnlyClusterQ, true)
util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, preemptionClusterQ, true)
+ util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, admissionCheckClusterQ, true)
+ util.ExpectAdmissionCheckToBeDeleted(ctx, k8sClient, admissionCheck2, true)
+ util.ExpectAdmissionCheckToBeDeleted(ctx, k8sClient, admissionCheck1, true)
util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true)
util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotTaintedFlavor, true)
util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotUntaintedFlavor, true)
@@ -187,6 +214,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, prodWl1, prodWl1Admission)
util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 1)
ginkgo.By("checking a second no-fit workload does not get admitted")
@@ -202,6 +230,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, devWl, spotUntaintedFlavorAdmission)
util.ExpectPendingWorkloadsMetric(devClusterQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(devClusterQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(devClusterQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(devClusterQ, 1)
ginkgo.By("checking the second workload gets admitted when the first workload finishes")
@@ -210,6 +239,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, prodWl2, prodWl2Admission)
util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 2)
util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 2)
})
@@ -231,6 +261,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, wl1Admission)
util.ExpectPendingWorkloadsMetric(podsCountClusterQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(podsCountClusterQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(podsCountClusterQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(podsCountClusterQ, 1)
})
@@ -258,6 +289,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2)
util.ExpectPendingWorkloadsMetric(podsCountClusterQ, 0, 1)
util.ExpectReservingActiveWorkloadsMetric(podsCountClusterQ, 2)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(podsCountClusterQ, 2)
util.ExpectAdmittedWorkloadsTotalMetric(podsCountClusterQ, 2)
})
@@ -269,6 +301,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, podsCountClusterQ.Name, wl2, wl3)
util.ExpectPendingWorkloadsMetric(podsCountClusterQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(podsCountClusterQ, 2)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(podsCountClusterQ, 3)
util.ExpectAdmittedWorkloadsTotalMetric(podsCountClusterQ, 3)
})
})
@@ -289,6 +322,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, wl1Admission)
util.ExpectPendingWorkloadsMetric(podsCountOnlyClusterQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(podsCountOnlyClusterQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(podsCountOnlyClusterQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(podsCountOnlyClusterQ, 1)
})
@@ -314,6 +348,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2)
util.ExpectPendingWorkloadsMetric(podsCountOnlyClusterQ, 0, 1)
util.ExpectReservingActiveWorkloadsMetric(podsCountOnlyClusterQ, 2)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(podsCountOnlyClusterQ, 2)
util.ExpectAdmittedWorkloadsTotalMetric(podsCountOnlyClusterQ, 2)
})
@@ -325,6 +360,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, podsCountOnlyClusterQ.Name, wl2, wl3)
util.ExpectPendingWorkloadsMetric(podsCountOnlyClusterQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(podsCountOnlyClusterQ, 2)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(podsCountOnlyClusterQ, 3)
util.ExpectAdmittedWorkloadsTotalMetric(podsCountOnlyClusterQ, 3)
})
})
@@ -378,6 +414,22 @@ var _ = ginkgo.Describe("Scheduler", func() {
})
})
+ ginkgo.It("Should admit workloads with admission checks", func() {
+ wl1 := testing.MakeWorkload("admission-check-wl1", ns.Name).
+ Queue(admissionCheckQueue.Name).
+ Request(corev1.ResourceCPU, "2").
+ Obj()
+
+ ginkgo.By("checking the first workload gets created and gets quota reserved", func() {
+ gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed())
+ util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, nil)
+ util.ExpectPendingWorkloadsMetric(admissionCheckClusterQ, 0, 0)
+ util.ExpectReservingActiveWorkloadsMetric(admissionCheckClusterQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(admissionCheckClusterQ, 1)
+ util.ExpectAdmittedWorkloadsTotalMetric(admissionCheckClusterQ, 0)
+ })
+ })
+
ginkgo.When("Hold at startup", func() {
ginkgo.BeforeEach(func() {
cqsStopPolicy = ptr.To(kueue.Hold)
@@ -412,6 +464,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 3)
util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 2)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 2)
util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 2)
ginkgo.By("after the high priority workloads finish, only the mid priority workloads should be admitted")
@@ -420,6 +473,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, wlMid1, wlMid2)
util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 1)
util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 2)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 4)
util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 4)
})
})
@@ -432,6 +486,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, bigWl)
util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 1)
smallWl1 := testing.MakeWorkload("small-wl-1", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "2.5").Obj()
@@ -450,6 +505,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, smallWl1, smallWl2)
util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 2)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodClusterQ, 3)
util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 3)
})
@@ -515,6 +571,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, expectWl1Admission)
util.ExpectPendingWorkloadsMetric(cq, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(cq, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 1)
ginkgo.By("Second big workload is pending")
@@ -523,6 +580,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2)
util.ExpectPendingWorkloadsMetric(cq, 0, 1)
util.ExpectReservingActiveWorkloadsMetric(cq, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 1)
ginkgo.By("Third small workload starts")
@@ -532,6 +590,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl3, expectWl3Admission)
util.ExpectPendingWorkloadsMetric(cq, 0, 1)
util.ExpectReservingActiveWorkloadsMetric(cq, 2)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 2)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 2)
ginkgo.By("Second big workload starts after the first one is deleted")
@@ -540,6 +599,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl2, expectWl2Admission)
util.ExpectPendingWorkloadsMetric(cq, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(cq, 2)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 3)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 3)
})
@@ -563,6 +623,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, expectAdmission)
util.ExpectPendingWorkloadsMetric(fooCQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(fooCQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(fooCQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(fooCQ, 1)
ginkgo.By("Second big workload is pending")
@@ -571,6 +632,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2)
util.ExpectPendingWorkloadsMetric(cq, 0, 1)
util.ExpectReservingActiveWorkloadsMetric(cq, 0)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 0)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 0)
ginkgo.By("Third small workload starts")
@@ -580,6 +642,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl3, expectAdmission)
util.ExpectPendingWorkloadsMetric(fooCQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(fooCQ, 2)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(fooCQ, 2)
util.ExpectAdmittedWorkloadsTotalMetric(fooCQ, 2)
ginkgo.By("Second big workload starts after the first one is deleted")
@@ -588,6 +651,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl2, expectAdmission)
util.ExpectPendingWorkloadsMetric(cq, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(cq, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 1)
})
})
@@ -621,6 +685,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToBePending(ctx, k8sClient, wl)
util.ExpectPendingWorkloadsMetric(cq, 0, 1)
util.ExpectReservingActiveWorkloadsMetric(cq, 0)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 0)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 0)
util.ExpectAdmissionAttemptsMetric(1, 0)
@@ -645,6 +710,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl, expectAdmission)
util.ExpectPendingWorkloadsMetric(cq, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(cq, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 1)
util.ExpectAdmissionAttemptsMetric(1, 1)
})
@@ -703,6 +769,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToBePending(ctx, k8sClient, wl1, wl2)
util.ExpectPendingWorkloadsMetric(cq, 0, 2)
util.ExpectReservingActiveWorkloadsMetric(cq, 0)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 0)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 0)
ginkgo.By("checking the first workload gets admitted after updating the namespace labels to match CQ selector")
@@ -710,6 +777,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
gomega.Expect(k8sClient.Update(ctx, ns)).Should(gomega.Succeed())
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, cq.Name, wl1)
util.ExpectReservingActiveWorkloadsMetric(cq, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 1)
util.ExpectPendingWorkloadsMetric(cq, 0, 1)
})
@@ -744,6 +812,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToBeFrozen(ctx, k8sClient, fooCQ.Name, wl)
util.ExpectPendingWorkloadsMetric(fooCQ, 0, 1)
util.ExpectReservingActiveWorkloadsMetric(fooCQ, 0)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(fooCQ, 0)
util.ExpectAdmittedWorkloadsTotalMetric(fooCQ, 0)
ginkgo.By("Creating foo flavor")
@@ -756,6 +825,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, fooCQ.Name, wl)
util.ExpectPendingWorkloadsMetric(fooCQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(fooCQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(fooCQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(fooCQ, 1)
})
})
@@ -800,6 +870,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, expectAdmission)
util.ExpectPendingWorkloadsMetric(cq, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(cq, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 1)
ginkgo.By("checking a second workload without toleration doesn't start")
@@ -808,6 +879,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2)
util.ExpectPendingWorkloadsMetric(cq, 0, 1)
util.ExpectReservingActiveWorkloadsMetric(cq, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 1)
ginkgo.By("checking a third workload with toleration starts")
@@ -818,6 +890,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl3, expectAdmission)
util.ExpectPendingWorkloadsMetric(cq, 0, 1)
util.ExpectReservingActiveWorkloadsMetric(cq, 2)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 2)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 2)
})
})
@@ -857,6 +930,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
expectAdmission := testing.MakeAdmission(cq.Name).Assignment(corev1.ResourceCPU, "spot-untainted", "1").Obj()
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, expectAdmission)
util.ExpectReservingActiveWorkloadsMetric(cq, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 1)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 1)
util.ExpectPendingWorkloadsMetric(cq, 0, 0)
@@ -870,6 +944,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl2, expectAdmission)
util.ExpectPendingWorkloadsMetric(cq, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(cq, 2)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(cq, 2)
util.ExpectAdmittedWorkloadsTotalMetric(cq, 2)
})
})
@@ -971,6 +1046,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectPendingWorkloadsMetric(prodCQ, 0, 1)
util.ExpectReservingActiveWorkloadsMetric(prodCQ, 0)
util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 0)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 0)
ginkgo.By("checking the workload gets admitted when a fallback ClusterQueue gets added")
fallbackClusterQueue := testing.MakeClusterQueue("fallback-cq").
@@ -988,6 +1064,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl, expectAdmission)
util.ExpectPendingWorkloadsMetric(prodCQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1)
})
@@ -1020,7 +1097,9 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectPendingWorkloadsMetric(devCQ, 0, 1)
util.ExpectReservingActiveWorkloadsMetric(prodCQ, 0)
util.ExpectReservingActiveWorkloadsMetric(devCQ, 0)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 0)
util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 0)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(devCQ, 0)
util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 0)
// Delay cluster queue creation to make sure workloads are in the same
@@ -1040,7 +1119,9 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectPendingWorkloadsMetric(devCQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1)
util.ExpectReservingActiveWorkloadsMetric(devCQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(devCQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 1)
})
@@ -1130,6 +1211,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
testing.MakeAdmission(prodCQ.Name).Assignment(corev1.ResourceCPU, "spot-untainted", "1").Obj())
util.ExpectPendingWorkloadsMetric(prodCQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(prodCQ, 3)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 3)
util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 3)
})
@@ -1162,6 +1244,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, prodWl1Admission)
util.ExpectPendingWorkloadsMetric(prodCQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1)
ginkgo.By("Creating another workload")
@@ -1171,6 +1254,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl2, prodWl2Admission)
util.ExpectPendingWorkloadsMetric(devCQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(devCQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(devCQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 1)
})
@@ -1258,6 +1342,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadsToBePending(ctx, k8sClient, wl)
util.ExpectPendingWorkloadsMetric(prodCQ, 0, 1)
util.ExpectReservingActiveWorkloadsMetric(prodCQ, 0)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 0)
util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 0)
ginkgo.By("checking the workload gets admitted when another ClusterQueue gets added")
@@ -1275,6 +1360,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl, expectAdmission)
util.ExpectPendingWorkloadsMetric(prodCQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1)
})
@@ -1309,7 +1395,9 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectPendingWorkloadsMetric(devCQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1)
util.ExpectReservingActiveWorkloadsMetric(devCQ, 0)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 1)
util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(devCQ, 0)
util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 0)
// Update lending limit of cluster queue
@@ -1331,7 +1419,9 @@ var _ = ginkgo.Describe("Scheduler", func() {
util.ExpectPendingWorkloadsMetric(devCQ, 0, 0)
util.ExpectReservingActiveWorkloadsMetric(prodCQ, 2)
util.ExpectReservingActiveWorkloadsMetric(devCQ, 0)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(prodCQ, 2)
util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 2)
+ util.ExpectQuotaReservedWorkloadsTotalMetric(devCQ, 0)
util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 0)
})
})
diff --git a/test/util/util.go b/test/util/util.go
index bf919c0714..a19d899f35 100644
--- a/test/util/util.go
+++ b/test/util/util.go
@@ -441,11 +441,20 @@ func ExpectReservingActiveWorkloadsMetric(cq *kueue.ClusterQueue, v int) {
func ExpectAdmittedWorkloadsTotalMetric(cq *kueue.ClusterQueue, v int) {
metric := metrics.AdmittedWorkloadsTotal.WithLabelValues(cq.Name)
- gomega.EventuallyWithOffset(1, func() int {
- v, err := testutil.GetCounterMetricValue(metric)
- gomega.Expect(err).ToNot(gomega.HaveOccurred())
- return int(v)
- }, Timeout, Interval).Should(gomega.Equal(v))
+ gomega.EventuallyWithOffset(1, func(g gomega.Gomega) {
+ count, err := testutil.GetCounterMetricValue(metric)
+ g.Expect(err).ToNot(gomega.HaveOccurred())
+ g.Expect(int(count)).Should(gomega.Equal(v))
+ }, Timeout, Interval).Should(gomega.Succeed())
+}
+
+func ExpectQuotaReservedWorkloadsTotalMetric(cq *kueue.ClusterQueue, v int) {
+ metric := metrics.QuotaReservedWorkloadsTotal.WithLabelValues(cq.Name)
+ gomega.EventuallyWithOffset(1, func(g gomega.Gomega) {
+ count, err := testutil.GetCounterMetricValue(metric)
+ g.Expect(err).ToNot(gomega.HaveOccurred())
+ g.Expect(int(count)).Should(gomega.Equal(v))
+ }, Timeout, Interval).Should(gomega.Succeed())
}
func ExpectClusterQueueStatusMetric(cq *kueue.ClusterQueue, status metrics.ClusterQueueStatus) {
From e8fc9b7cf6bf9d964c6e5363aba711a5d841f5cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dominik=20Paj=C4=85k?=
Date: Tue, 23 Apr 2024 19:40:40 +0200
Subject: [PATCH 18/49] Propagate provisioning status of a ProvReq into the
Workload status (#2007)
* Copying the prov req status message into workload
* Integration test for ETA message propagation
* Refactor
* Update the message after successfull provisoning.
* Try to update even if a previous updated happened.
* Comment explaining when the update happens
* PR comments
---
.../provisioning/controller.go | 54 ++++++++++++------
.../provisioning/controller_test.go | 57 +++++++++++++++++++
.../provisioning/provisioning_test.go | 32 +++++++++++
3 files changed, 126 insertions(+), 17 deletions(-)
diff --git a/pkg/controller/admissionchecks/provisioning/controller.go b/pkg/controller/admissionchecks/provisioning/controller.go
index 19b22b95e2..d2a828c142 100644
--- a/pkg/controller/admissionchecks/provisioning/controller.go
+++ b/pkg/controller/admissionchecks/provisioning/controller.go
@@ -462,6 +462,22 @@ func passProvReqParams(wl *kueue.Workload, req *autoscaling.ProvisioningRequest)
}
}
+func updateCheckMessage(checkState *kueue.AdmissionCheckState, message string) bool {
+ if message == "" || checkState.Message == message {
+ return false
+ }
+ checkState.Message = message
+ return true
+}
+
+func updateCheckState(checkState *kueue.AdmissionCheckState, state kueue.CheckState) bool {
+ if checkState.State == state {
+ return false
+ }
+ checkState.State = state
+ return true
+}
+
func (c *Controller) syncCheckStates(ctx context.Context, wl *kueue.Workload, checks []string, activeOrLastPRForChecks map[string]*autoscaling.ProvisioningRequest) error {
log := ctrl.LoggerFrom(ctx)
checksMap := slices.ToRefMap(wl.Status.AdmissionChecks, func(c *kueue.AdmissionCheckState) string { return c.Name })
@@ -472,15 +488,11 @@ func (c *Controller) syncCheckStates(ctx context.Context, wl *kueue.Workload, ch
checkState := *checksMap[check]
if prc, err := c.helper.ConfigForAdmissionCheck(ctx, check); err != nil {
// the check is not active
- if checkState.State != kueue.CheckStatePending || checkState.Message != CheckInactiveMessage {
- updated = true
- checkState.State = kueue.CheckStatePending
- checkState.Message = CheckInactiveMessage
- }
+ updated = updateCheckState(&checkState, kueue.CheckStatePending) || updated
+ updated = updateCheckMessage(&checkState, CheckInactiveMessage) || updated
} else if !c.reqIsNeeded(ctx, wl, prc) {
- if checkState.State != kueue.CheckStateReady {
+ if updateCheckState(&checkState, kueue.CheckStateReady) {
updated = true
- checkState.State = kueue.CheckStateReady
checkState.Message = NoRequestNeeded
checkState.PodSetUpdates = nil
}
@@ -492,7 +504,13 @@ func (c *Controller) syncCheckStates(ctx context.Context, wl *kueue.Workload, ch
prFailed := apimeta.IsStatusConditionTrue(pr.Status.Conditions, autoscaling.Failed)
prProvisioned := apimeta.IsStatusConditionTrue(pr.Status.Conditions, autoscaling.Provisioned)
- log.V(3).Info("Synchronizing admission check state based on provisioning request", "wl", klog.KObj(wl), "check", check, "prName", pr.Name, "failed", prFailed, "accepted", prProvisioned)
+ prAccepted := apimeta.IsStatusConditionTrue(pr.Status.Conditions, autoscaling.Accepted)
+ log.V(3).Info("Synchronizing admission check state based on provisioning request", "wl", klog.KObj(wl),
+ "check", check,
+ "prName", pr.Name,
+ "failed", prFailed,
+ "provisioned", prProvisioned,
+ "accepted", prAccepted)
switch {
case prFailed:
@@ -500,9 +518,8 @@ func (c *Controller) syncCheckStates(ctx context.Context, wl *kueue.Workload, ch
if attempt := getAttempt(ctx, pr, wl.Name, check); attempt <= MaxRetries {
// it is going to be retried
message := fmt.Sprintf("Retrying after failure: %s", apimeta.FindStatusCondition(pr.Status.Conditions, autoscaling.Failed).Message)
- updated = updated || checkState.State != kueue.CheckStatePending || checkState.Message != message
- checkState.State = kueue.CheckStatePending
- checkState.Message = message
+ updated = updateCheckState(&checkState, kueue.CheckStatePending) || updated
+ updated = updateCheckMessage(&checkState, message) || updated
} else {
updated = true
checkState.State = kueue.CheckStateRejected
@@ -510,17 +527,20 @@ func (c *Controller) syncCheckStates(ctx context.Context, wl *kueue.Workload, ch
}
}
case prProvisioned:
- if checkState.State != kueue.CheckStateReady {
+ if updateCheckState(&checkState, kueue.CheckStateReady) {
updated = true
- checkState.State = kueue.CheckStateReady
// add the pod podSetUpdates
checkState.PodSetUpdates = podSetUpdates(wl, pr)
+ updateCheckMessage(&checkState, apimeta.FindStatusCondition(pr.Status.Conditions, autoscaling.Provisioned).Message)
}
+ case prAccepted:
+ // we propagate the message from the provisioning request status into the workload
+ // this happens for provisioned = false (ETA updates) and also for provisioned = true
+ // to change to the "successfully provisioned" message after provisioning
+ updated = updateCheckMessage(&checkState, apimeta.FindStatusCondition(pr.Status.Conditions, autoscaling.Provisioned).Message) || updated
+ updated = updateCheckState(&checkState, kueue.CheckStatePending) || updated
default:
- if checkState.State != kueue.CheckStatePending {
- updated = true
- checkState.State = kueue.CheckStatePending
- }
+ updated = updateCheckState(&checkState, kueue.CheckStatePending) || updated
}
}
diff --git a/pkg/controller/admissionchecks/provisioning/controller_test.go b/pkg/controller/admissionchecks/provisioning/controller_test.go
index f386fa2c2d..1ceccd38b4 100644
--- a/pkg/controller/admissionchecks/provisioning/controller_test.go
+++ b/pkg/controller/admissionchecks/provisioning/controller_test.go
@@ -64,6 +64,14 @@ var (
}
)
+func requestWithConditions(r *autoscaling.ProvisioningRequest, conditions []metav1.Condition) *autoscaling.ProvisioningRequest {
+ r = r.DeepCopy()
+ for _, condition := range conditions {
+ apimeta.SetStatusCondition(&r.Status.Conditions, condition)
+ }
+ return r
+}
+
func requestWithCondition(r *autoscaling.ProvisioningRequest, conditionType string, status metav1.ConditionStatus) *autoscaling.ProvisioningRequest {
r = r.DeepCopy()
apimeta.SetStatusCondition(&r.Status.Conditions, metav1.Condition{
@@ -647,6 +655,55 @@ func TestReconcile(t *testing.T) {
GetProvisioningRequestName("wl", "check2", 1),
},
},
+ "workloads status gets updated based on the provisioning request": {
+ workload: baseWorkload.DeepCopy(),
+ checks: []kueue.AdmissionCheck{*baseCheck.DeepCopy()},
+ flavors: []kueue.ResourceFlavor{*baseFlavor1.DeepCopy(), *baseFlavor2.DeepCopy()},
+ configs: []kueue.ProvisioningRequestConfig{
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "config1",
+ },
+ Spec: kueue.ProvisioningRequestConfigSpec{
+ ProvisioningClassName: "class1",
+ Parameters: map[string]kueue.Parameter{
+ "p1": "v1",
+ },
+ },
+ },
+ },
+ templates: []corev1.PodTemplate{*baseTemplate1.DeepCopy(), *baseTemplate2.DeepCopy()},
+ requests: []autoscaling.ProvisioningRequest{
+ *requestWithConditions(baseRequest,
+ []metav1.Condition{
+ {
+ Type: autoscaling.Failed,
+ Status: metav1.ConditionFalse,
+ },
+ {
+ Type: autoscaling.Provisioned,
+ Status: metav1.ConditionFalse,
+ Message: "Provisioning Request wasn't provisioned. ETA: 2024-02-22T10:36:40Z",
+ },
+ {
+ Type: autoscaling.Accepted,
+ Status: metav1.ConditionTrue,
+ },
+ }),
+ },
+ wantWorkloads: map[string]*kueue.Workload{
+ baseWorkload.Name: (&utiltesting.WorkloadWrapper{Workload: *baseWorkload.DeepCopy()}).
+ AdmissionChecks(kueue.AdmissionCheckState{
+ Name: "check1",
+ State: kueue.CheckStatePending,
+ Message: "Provisioning Request wasn't provisioned. ETA: 2024-02-22T10:36:40Z",
+ }, kueue.AdmissionCheckState{
+ Name: "not-provisioning",
+ State: kueue.CheckStatePending,
+ }).
+ Obj(),
+ },
+ },
}
for name, tc := range cases {
diff --git a/test/integration/controller/admissionchecks/provisioning/provisioning_test.go b/test/integration/controller/admissionchecks/provisioning/provisioning_test.go
index cfae97d28b..554ed4b055 100644
--- a/test/integration/controller/admissionchecks/provisioning/provisioning_test.go
+++ b/test/integration/controller/admissionchecks/provisioning/provisioning_test.go
@@ -317,6 +317,38 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
Namespace: wlKey.Namespace,
Name: provisioning.GetProvisioningRequestName(wlKey.Name, ac.Name, 1),
}
+ ginkgo.By("Setting the provision request as Not Provisioned and providing ETA", func() {
+ createdRequest := &autoscaling.ProvisioningRequest{}
+ gomega.Eventually(func() error {
+ err := k8sClient.Get(ctx, provReqKey, createdRequest)
+ if err != nil {
+ return err
+ }
+ apimeta.SetStatusCondition(&createdRequest.Status.Conditions, metav1.Condition{
+ Type: autoscaling.Accepted,
+ Status: metav1.ConditionTrue,
+ Reason: "Reason",
+ })
+ apimeta.SetStatusCondition(&createdRequest.Status.Conditions, metav1.Condition{
+ Type: autoscaling.Provisioned,
+ Status: metav1.ConditionFalse,
+ Reason: "Reason",
+ Message: "Not provisioned, ETA: 2024-02-22T10:36:40Z.",
+ })
+ return k8sClient.Status().Update(ctx, createdRequest)
+ }, util.Timeout, util.Interval).Should(gomega.Succeed())
+ })
+ ginkgo.By("Checking that the ETA is propagated to workload", func() {
+ updatedWl := &kueue.Workload{}
+ gomega.Eventually(func(g gomega.Gomega) {
+ g.Expect(k8sClient.Get(ctx, wlKey, updatedWl)).To(gomega.Succeed())
+ state := workload.FindAdmissionCheck(updatedWl.Status.AdmissionChecks, ac.Name)
+ g.Expect(state).NotTo(gomega.BeNil())
+ g.Expect(state.State).To(gomega.Equal(kueue.CheckStatePending))
+ g.Expect(state.Message).To(gomega.Equal("Not provisioned, ETA: 2024-02-22T10:36:40Z."))
+ }, util.Timeout, util.Interval).Should(gomega.Succeed())
+ })
+
ginkgo.By("Setting the provision request as Provisioned", func() {
createdRequest := &autoscaling.ProvisioningRequest{}
gomega.Eventually(func() error {
From 2991eccbadf7cdefa8f2d5f5146fd53bfbb9c18a Mon Sep 17 00:00:00 2001
From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com>
Date: Wed, 24 Apr 2024 03:39:15 -0400
Subject: [PATCH 19/49] Make dominant resource share flavor aware (#2037)
Change-Id: I21da836d55a63d788931e6212e0be30d6f78497b
---
keps/1714-fair-sharing/README.md | 10 +-
pkg/cache/clusterqueue.go | 50 ++--
pkg/cache/clusterqueue_test.go | 261 +++++++++++++-----
.../flavorassigner/flavorassigner.go | 16 ++
pkg/scheduler/preemption/preemption.go | 24 +-
pkg/scheduler/scheduler.go | 4 +-
pkg/workload/workload.go | 21 +-
pkg/workload/workload_test.go | 66 ++++-
8 files changed, 314 insertions(+), 138 deletions(-)
diff --git a/keps/1714-fair-sharing/README.md b/keps/1714-fair-sharing/README.md
index db47815e86..908b119a0b 100644
--- a/keps/1714-fair-sharing/README.md
+++ b/keps/1714-fair-sharing/README.md
@@ -206,10 +206,12 @@ The value function is a variation of DRF (see
[1](https://amplab.cs.berkeley.edu/wp-content/uploads/2011/06/Dominant-Resource-Fairness-Fair-Allocation-of-Multiple-Resource-Types.pdf),
[2](https://dash.harvard.edu/bitstream/handle/1/11956916/Parkes_BeyondDominant.pdf;jsessionid=AC0D06C2CC07C693BD42008D7AE25D99?sequence=1)):
-For a given resource r provided by a ClusterQueue or cohort c, we calculate T_r as the total
-requests consumed by the Workloads for that resource in that CQ or cohort, independent of the
-flavor, that are above the nominal quota. The value for a resource is the ratio of T_r and the
-total nominal quotas (or lendingLimits, if defined) in the hierarchy of the parent of C.
+For a given resource _r_ provided by a ClusterQueue or cohort _c_, we calculate $T_r$ as the
+total requests consumed by the Workloads for resource _r_ in that CQ or cohort,
+that are above the nominal quota, added up for all flavors.
+The value for a resource is the ratio of $T_r$ and the total nominal quotas
+(or lendingLimits, if defined) for the resource _r_, added up for all flavors,
+in the hierarchy of the parent of _c_.
Note that the share value for a suborganization (a node in the tree) is independent of the
share value for its children. In other words, the calculation of the share value only
diff --git a/pkg/cache/clusterqueue.go b/pkg/cache/clusterqueue.go
index 42661dcbe3..142ee5ad73 100644
--- a/pkg/cache/clusterqueue.go
+++ b/pkg/cache/clusterqueue.go
@@ -128,8 +128,7 @@ type ResourceQuota struct {
LendingLimit *int64
}
-type ResourceQuantities map[corev1.ResourceName]int64
-type FlavorResourceQuantities map[kueue.ResourceFlavorReference]ResourceQuantities
+type FlavorResourceQuantities map[kueue.ResourceFlavorReference]workload.Requests
type queue struct {
key string
@@ -684,40 +683,53 @@ func (c *ClusterQueue) UsedCohortQuota(fName kueue.ResourceFlavorReference, rNam
return cohortUsage
}
-// DominantResourceShare returns a value from 0 to 100 representing the maximum of the ratios
+// DominantResourceShare returns a value from 0 to 1000 representing the maximum of the ratios
// of usage above nominal quota to the lendable resources in the cohort, among all the resources
// provided by the ClusterQueue.
// If zero, it means that the usage of the ClusterQueue is below the nominal quota.
// The function also returns the resource name that yielded this value.
func (c *ClusterQueue) DominantResourceShare() (int, corev1.ResourceName) {
- return c.dominantResourceShare(nil, 1)
+ return c.dominantResourceShare(nil, 0)
}
-func (c *ClusterQueue) DominantResourceShareWith(w *workload.Info) (int, corev1.ResourceName) {
- return c.dominantResourceShare(w, 1)
+func (c *ClusterQueue) DominantResourceShareWith(wlReq FlavorResourceQuantities) (int, corev1.ResourceName) {
+ return c.dominantResourceShare(wlReq, 1)
}
func (c *ClusterQueue) DominantResourceShareWithout(w *workload.Info) (int, corev1.ResourceName) {
- return c.dominantResourceShare(w, -1)
+ return c.dominantResourceShare(w.FlavorResourceUsage(), -1)
}
-func (c *ClusterQueue) dominantResourceShare(w *workload.Info, m int64) (int, corev1.ResourceName) {
+func (c *ClusterQueue) dominantResourceShare(wlReq FlavorResourceQuantities, m int64) (int, corev1.ResourceName) {
if c.Cohort == nil {
return 0, ""
}
+
+ borrowing := make(map[corev1.ResourceName]int64)
+ for _, rg := range c.ResourceGroups {
+ for _, flv := range rg.Flavors {
+ for rName, quotas := range flv.Resources {
+ b := c.Usage[flv.Name][rName] + m*wlReq[flv.Name][rName] - quotas.Nominal
+ if b > 0 {
+ borrowing[rName] += b
+ }
+ }
+ }
+ }
+ if len(borrowing) == 0 {
+ return 0, ""
+ }
+
var drs int64 = -1
var dRes corev1.ResourceName
- wUsage := w.ResourceUsage()
- for rName, rStats := range c.ResourceStats {
- var ratio int64
- if c.Cohort.ResourceStats[rName].Lendable > 0 {
- ratio = max(rStats.Usage+wUsage[rName]*m-rStats.Nominal, 0) * 100 /
- c.Cohort.ResourceStats[rName].Lendable
- }
- // Use alphabetical order to get a deterministic resource name.
- if ratio > drs || (ratio == drs && rName < dRes) {
- drs = ratio
- dRes = rName
+ for rName, b := range borrowing {
+ if lendable := c.Cohort.ResourceStats[rName].Lendable; lendable > 0 {
+ ratio := b * 1000 / lendable
+ // Use alphabetical order to get a deterministic resource name.
+ if ratio > drs || (ratio == drs && rName < dRes) {
+ drs = ratio
+ dRes = rName
+ }
}
}
return int(drs), dRes
diff --git a/pkg/cache/clusterqueue_test.go b/pkg/cache/clusterqueue_test.go
index bc153e431f..9ad9d5bd8b 100644
--- a/pkg/cache/clusterqueue_test.go
+++ b/pkg/cache/clusterqueue_test.go
@@ -22,12 +22,12 @@ import (
"github.com/google/go-cmp/cmp"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/utils/ptr"
kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
"sigs.k8s.io/kueue/pkg/features"
"sigs.k8s.io/kueue/pkg/metrics"
utiltesting "sigs.k8s.io/kueue/pkg/util/testing"
- "sigs.k8s.io/kueue/pkg/workload"
)
func TestClusterQueueUpdateWithFlavors(t *testing.T) {
@@ -759,38 +759,60 @@ func TestClusterQueueUpdateWithAdmissionCheck(t *testing.T) {
func TestDominantResourceShare(t *testing.T) {
cases := map[string]struct {
cq ClusterQueue
- workload *workload.Info
+ flvResQ FlavorResourceQuantities
wantDRValue int
wantDRName corev1.ResourceName
}{
"no cohort": {
cq: ClusterQueue{
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 2_000,
- Lendable: 2_000,
- Usage: 1_000,
+ Usage: FlavorResourceQuantities{
+ "default": {
+ corev1.ResourceCPU: 1_000,
+ "example.com/gpu": 2,
},
- "example.com/gpu": {
- Nominal: 5,
- Lendable: 5,
- Usage: 2_000,
+ },
+ ResourceGroups: []ResourceGroup{
+ {
+ Flavors: []FlavorQuotas{
+ {
+ Name: "default",
+ Resources: map[corev1.ResourceName]*ResourceQuota{
+ corev1.ResourceCPU: {
+ Nominal: 2_000,
+ },
+ "example.com/gpu": {
+ Nominal: 5,
+ },
+ },
+ },
+ },
},
},
},
},
"usage below nominal": {
cq: ClusterQueue{
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 2_000,
- Lendable: 2_000,
- Usage: 1_000,
+ Usage: FlavorResourceQuantities{
+ "default": {
+ corev1.ResourceCPU: 1_000,
+ "example.com/gpu": 2,
},
- "example.com/gpu": {
- Nominal: 5,
- Lendable: 5,
- Usage: 2,
+ },
+ ResourceGroups: []ResourceGroup{
+ {
+ Flavors: []FlavorQuotas{
+ {
+ Name: "default",
+ Resources: map[corev1.ResourceName]*ResourceQuota{
+ corev1.ResourceCPU: {
+ Nominal: 2_000,
+ },
+ "example.com/gpu": {
+ Nominal: 5,
+ },
+ },
+ },
+ },
},
},
Cohort: &Cohort{
@@ -808,20 +830,30 @@ func TestDominantResourceShare(t *testing.T) {
},
},
},
- wantDRName: corev1.ResourceCPU, // due to alphabetical order.
},
"usage above nominal": {
cq: ClusterQueue{
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 2_000,
- Lendable: 2_000,
- Usage: 3_000,
+ Usage: FlavorResourceQuantities{
+ "default": {
+ corev1.ResourceCPU: 3_000,
+ "example.com/gpu": 7,
},
- "example.com/gpu": {
- Nominal: 5,
- Lendable: 5,
- Usage: 7,
+ },
+ ResourceGroups: []ResourceGroup{
+ {
+ Flavors: []FlavorQuotas{
+ {
+ Name: "default",
+ Resources: map[corev1.ResourceName]*ResourceQuota{
+ corev1.ResourceCPU: {
+ Nominal: 2_000,
+ },
+ "example.com/gpu": {
+ Nominal: 5,
+ },
+ },
+ },
+ },
},
},
Cohort: &Cohort{
@@ -840,20 +872,31 @@ func TestDominantResourceShare(t *testing.T) {
},
},
wantDRName: "example.com/gpu",
- wantDRValue: 20, // (7-5)/10
+ wantDRValue: 200, // (7-5)*1000/10
},
"one resource above nominal": {
cq: ClusterQueue{
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 2_000,
- Lendable: 2_000,
- Usage: 3_000,
+ Usage: FlavorResourceQuantities{
+ "default": {
+ corev1.ResourceCPU: 3_000,
+ "example.com/gpu": 3,
},
- "example.com/gpu": {
- Nominal: 5,
- Lendable: 5,
- Usage: 3,
+ },
+ ResourceGroups: []ResourceGroup{
+ {
+ Flavors: []FlavorQuotas{
+ {
+ Name: "default",
+ Resources: map[corev1.ResourceName]*ResourceQuota{
+ corev1.ResourceCPU: {
+ Nominal: 2_000,
+ },
+ "example.com/gpu": {
+ Nominal: 5,
+ },
+ },
+ },
+ },
},
},
Cohort: &Cohort{
@@ -872,20 +915,31 @@ func TestDominantResourceShare(t *testing.T) {
},
},
wantDRName: corev1.ResourceCPU,
- wantDRValue: 10, // (3-2)/10
+ wantDRValue: 100, // (3-2)*1000/10
},
"usage with workload above nominal": {
cq: ClusterQueue{
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 2_000,
- Lendable: 2_000,
- Usage: 1_000,
+ Usage: FlavorResourceQuantities{
+ "default": {
+ corev1.ResourceCPU: 1_000,
+ "example.com/gpu": 2,
},
- "example.com/gpu": {
- Nominal: 5,
- Lendable: 5,
- Usage: 2,
+ },
+ ResourceGroups: []ResourceGroup{
+ {
+ Flavors: []FlavorQuotas{
+ {
+ Name: "default",
+ Resources: map[corev1.ResourceName]*ResourceQuota{
+ corev1.ResourceCPU: {
+ Nominal: 2_000,
+ },
+ "example.com/gpu": {
+ Nominal: 5,
+ },
+ },
+ },
+ },
},
},
Cohort: &Cohort{
@@ -903,28 +957,39 @@ func TestDominantResourceShare(t *testing.T) {
},
},
},
- workload: &workload.Info{
- TotalRequests: []workload.PodSetResources{{
- Requests: workload.Requests{
- corev1.ResourceCPU: 4_000,
- "example.com/gpu": 4,
- },
- }},
+ flvResQ: FlavorResourceQuantities{
+ "default": {
+ corev1.ResourceCPU: 4_000,
+ "example.com/gpu": 4,
+ },
},
wantDRName: corev1.ResourceCPU,
- wantDRValue: 30, // (1+4-2)/10
+ wantDRValue: 300, // (1+4-2)*1000/10
},
"A resource with zero lendable": {
cq: ClusterQueue{
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 2_000,
- Lendable: 2_000,
- Usage: 1_000,
+ Usage: FlavorResourceQuantities{
+ "default": {
+ corev1.ResourceCPU: 1_000,
+ "example.com/gpu": 1,
},
- "example.com/gpu": {
- Nominal: 2_000,
- Usage: 1_000,
+ },
+ ResourceGroups: []ResourceGroup{
+ {
+ Flavors: []FlavorQuotas{
+ {
+ Name: "default",
+ Resources: map[corev1.ResourceName]*ResourceQuota{
+ corev1.ResourceCPU: {
+ Nominal: 2_000,
+ },
+ "example.com/gpu": {
+ Nominal: 2,
+ LendingLimit: ptr.To[int64](0),
+ },
+ },
+ },
+ },
},
},
Cohort: &Cohort{
@@ -941,21 +1006,69 @@ func TestDominantResourceShare(t *testing.T) {
},
},
},
- workload: &workload.Info{
- TotalRequests: []workload.PodSetResources{{
- Requests: workload.Requests{
- corev1.ResourceCPU: 4_000,
- "example.com/gpu": 4,
+ flvResQ: FlavorResourceQuantities{
+ "default": {
+ corev1.ResourceCPU: 4_000,
+ "example.com/gpu": 4,
+ },
+ },
+ wantDRName: corev1.ResourceCPU,
+ wantDRValue: 300, // (1+4-2)*1000/10
+ },
+ "multiple flavors": {
+ cq: ClusterQueue{
+ Usage: FlavorResourceQuantities{
+ "on-demand": {
+ corev1.ResourceCPU: 15_000,
+ },
+ "spot": {
+ corev1.ResourceCPU: 5_000,
+ },
+ },
+ ResourceGroups: []ResourceGroup{
+ {
+ Flavors: []FlavorQuotas{
+ {
+ Name: "on-demand",
+ Resources: map[corev1.ResourceName]*ResourceQuota{
+ corev1.ResourceCPU: {
+ Nominal: 20_000,
+ },
+ },
+ },
+ {
+ Name: "spot",
+ Resources: map[corev1.ResourceName]*ResourceQuota{
+ corev1.ResourceCPU: {
+ Nominal: 80_000,
+ },
+ },
+ },
+ },
+ },
+ },
+ Cohort: &Cohort{
+ ResourceStats: ResourceStats{
+ corev1.ResourceCPU: {
+ Nominal: 200_000,
+ Lendable: 200_000,
+ Usage: 20_000,
+ },
},
- }},
+ },
+ },
+ flvResQ: FlavorResourceQuantities{
+ "on-demand": {
+ corev1.ResourceCPU: 10_000,
+ },
},
wantDRName: corev1.ResourceCPU,
- wantDRValue: 30, // (1+4-2)/10
+ wantDRValue: 25, // ((15+10-20)+0)*1000/200 (spot under nominal)
},
}
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
- drValue, drName := tc.cq.DominantResourceShareWith(tc.workload)
+ drValue, drName := tc.cq.DominantResourceShareWith(tc.flvResQ)
if drValue != tc.wantDRValue {
t.Errorf("DominantResourceShare(_) returned value %d, want %d", drValue, tc.wantDRValue)
}
diff --git a/pkg/scheduler/flavorassigner/flavorassigner.go b/pkg/scheduler/flavorassigner/flavorassigner.go
index c36ffc2e2a..7ffe9e0bd5 100644
--- a/pkg/scheduler/flavorassigner/flavorassigner.go
+++ b/pkg/scheduler/flavorassigner/flavorassigner.go
@@ -105,6 +105,22 @@ func (a *Assignment) ToAPI() []kueue.PodSetAssignment {
return psFlavors
}
+func (a *Assignment) TotalRequestsFor(wl *workload.Info) cache.FlavorResourceQuantities {
+ usage := make(cache.FlavorResourceQuantities)
+ for i, ps := range wl.TotalRequests {
+ for res, q := range ps.Requests {
+ flv := a.PodSets[i].Flavors[res].Name
+ resUsage := usage[flv]
+ if resUsage == nil {
+ resUsage = make(map[corev1.ResourceName]int64)
+ usage[flv] = resUsage
+ }
+ resUsage[res] += q
+ }
+ }
+ return usage
+}
+
type Status struct {
reasons []string
err error
diff --git a/pkg/scheduler/preemption/preemption.go b/pkg/scheduler/preemption/preemption.go
index 52681a4fad..e5670de8b3 100644
--- a/pkg/scheduler/preemption/preemption.go
+++ b/pkg/scheduler/preemption/preemption.go
@@ -92,7 +92,7 @@ func (p *Preemptor) GetTargets(wl workload.Info, assignment flavorassigner.Assig
sort.Slice(candidates, candidatesOrdering(candidates, cq.Name, time.Now()))
sameQueueCandidates := candidatesOnlyFromQueue(candidates, wl.ClusterQueue)
- wlReq := totalRequestsForAssignment(&wl, assignment)
+ wlReq := assignment.TotalRequestsFor(&wl)
// To avoid flapping, Kueue only allows preemption of workloads from the same
// queue if borrowing. Preemption of workloads from queues can happen only
@@ -258,8 +258,8 @@ func restoreSnapshot(snapshot *cache.Snapshot, targets []*workload.Info) {
func fairPreemptions(wl *workload.Info, assignment flavorassigner.Assignment, snapshot *cache.Snapshot, resPerFlv resourcesPerFlavor, candidates []*workload.Info, allowBorrowingBelowPriority *int32) []*workload.Info {
cqHeap := cqHeapFromCandidates(candidates, false, snapshot)
nominatedCQ := snapshot.ClusterQueues[wl.ClusterQueue]
- newNominatedShareValue, _ := nominatedCQ.DominantResourceShareWith(wl)
- wlReq := totalRequestsForAssignment(wl, assignment)
+ wlReq := assignment.TotalRequestsFor(wl)
+ newNominatedShareValue, _ := nominatedCQ.DominantResourceShareWith(wlReq)
var targets []*workload.Info
fits := false
var retryCandidates []*workload.Info
@@ -274,7 +274,7 @@ func fairPreemptions(wl *workload.Info, assignment flavorassigner.Assignment, sn
fits = true
break
}
- newNominatedShareValue, _ = nominatedCQ.DominantResourceShareWith(wl)
+ newNominatedShareValue, _ = nominatedCQ.DominantResourceShareWith(wlReq)
candCQ.workloads = candCQ.workloads[1:]
if len(candCQ.workloads) > 0 {
candCQ.share, _ = candCQ.cq.DominantResourceShare()
@@ -469,22 +469,6 @@ func workloadUsesResources(wl *workload.Info, resPerFlv resourcesPerFlavor) bool
return false
}
-func totalRequestsForAssignment(wl *workload.Info, assignment flavorassigner.Assignment) cache.FlavorResourceQuantities {
- usage := make(cache.FlavorResourceQuantities)
- for i, ps := range wl.TotalRequests {
- for res, q := range ps.Requests {
- flv := assignment.PodSets[i].Flavors[res].Name
- resUsage := usage[flv]
- if resUsage == nil {
- resUsage = make(map[corev1.ResourceName]int64)
- usage[flv] = resUsage
- }
- resUsage[res] += q
- }
- }
- return usage
-}
-
// workloadFits determines if the workload requests would fit given the
// requestable resources and simulated usage of the ClusterQueue and its cohort,
// if it belongs to one.
diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go
index 4665047079..d14dbc38e0 100644
--- a/pkg/scheduler/scheduler.go
+++ b/pkg/scheduler/scheduler.go
@@ -158,7 +158,7 @@ func (cu *cohortsUsage) add(cohort string, assignment cache.FlavorResourceQuanti
}
func (cu *cohortsUsage) totalUsageForCommonFlavorResources(cohort string, assignment cache.FlavorResourceQuantities) cache.FlavorResourceQuantities {
- return utilmaps.Intersect((*cu)[cohort], assignment, func(a, b cache.ResourceQuantities) cache.ResourceQuantities {
+ return utilmaps.Intersect((*cu)[cohort], assignment, func(a, b workload.Requests) workload.Requests {
return utilmaps.Intersect(a, b, func(a, b int64) int64 { return a + b })
})
}
@@ -357,7 +357,7 @@ func (s *Scheduler) nominate(ctx context.Context, workloads []workload.Info, sna
e.inadmissibleMsg = e.assignment.Message()
e.Info.LastAssignment = &e.assignment.LastState
if s.enableFairSharing {
- e.dominantResourceShare, e.dominantResourceName = cq.DominantResourceShareWith(&w)
+ e.dominantResourceShare, e.dominantResourceName = cq.DominantResourceShareWith(e.assignment.TotalRequestsFor(&w))
}
}
entries = append(entries, e)
diff --git a/pkg/workload/workload.go b/pkg/workload/workload.go
index d72f8a0fb9..cfce1ea7c8 100644
--- a/pkg/workload/workload.go
+++ b/pkg/workload/workload.go
@@ -159,18 +159,25 @@ func (i *Info) CanBePartiallyAdmitted() bool {
}
// ResourceUsage returns the total resource usage for the workload,
-// per resource.
-func (i *Info) ResourceUsage() Requests {
+// per flavor (if assigned, otherwise flavor shows as empty string), per resource.
+func (i *Info) FlavorResourceUsage() map[kueue.ResourceFlavorReference]Requests {
if i == nil || len(i.TotalRequests) == 0 {
return nil
}
- req := maps.Clone(i.TotalRequests[0].Requests)
- for j := 1; j < len(i.TotalRequests); j++ {
- for rName, rVal := range i.TotalRequests[j].Requests {
- req[rName] += rVal
+ total := make(map[kueue.ResourceFlavorReference]Requests)
+ for _, psReqs := range i.TotalRequests {
+ for res, q := range psReqs.Requests {
+ flv := psReqs.Flavors[res]
+ if requests, found := total[flv]; found {
+ requests[res] += q
+ } else {
+ total[flv] = Requests{
+ res: q,
+ }
+ }
}
}
- return req
+ return total
}
func CanBePartiallyAdmitted(wl *kueue.Workload) bool {
diff --git a/pkg/workload/workload_test.go b/pkg/workload/workload_test.go
index 78a6bc0f54..5dbebf62d5 100644
--- a/pkg/workload/workload_test.go
+++ b/pkg/workload/workload_test.go
@@ -591,13 +591,13 @@ func TestIsEvictedByPodsReadyTimeout(t *testing.T) {
}
}
-func TestResourceUsage(t *testing.T) {
+func TestFlavorResourceUsage(t *testing.T) {
cases := map[string]struct {
info *Info
- want Requests
+ want map[kueue.ResourceFlavorReference]Requests
}{
"nil": {},
- "one podset": {
+ "one podset, no flavors": {
info: &Info{
TotalRequests: []PodSetResources{{
Requests: Requests{
@@ -606,12 +606,36 @@ func TestResourceUsage(t *testing.T) {
},
}},
},
- want: Requests{
- corev1.ResourceCPU: 1_000,
- "example.com/gpu": 3,
+ want: map[kueue.ResourceFlavorReference]Requests{
+ "": {
+ corev1.ResourceCPU: 1_000,
+ "example.com/gpu": 3,
+ },
},
},
- "multiple podsets": {
+ "one podset, multiple flavors": {
+ info: &Info{
+ TotalRequests: []PodSetResources{{
+ Requests: Requests{
+ corev1.ResourceCPU: 1_000,
+ "example.com/gpu": 3,
+ },
+ Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
+ corev1.ResourceCPU: "default",
+ "example.com/gpu": "gpu",
+ },
+ }},
+ },
+ want: map[kueue.ResourceFlavorReference]Requests{
+ "default": {
+ corev1.ResourceCPU: 1_000,
+ },
+ "gpu": {
+ "example.com/gpu": 3,
+ },
+ },
+ },
+ "multiple podsets, multiple flavors": {
info: &Info{
TotalRequests: []PodSetResources{
{
@@ -619,30 +643,48 @@ func TestResourceUsage(t *testing.T) {
corev1.ResourceCPU: 1_000,
"example.com/gpu": 3,
},
+ Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
+ corev1.ResourceCPU: "default",
+ "example.com/gpu": "model_a",
+ },
},
{
Requests: Requests{
corev1.ResourceCPU: 2_000,
corev1.ResourceMemory: 2 * utiltesting.Gi,
},
+ Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
+ corev1.ResourceCPU: "default",
+ corev1.ResourceMemory: "default",
+ },
},
{
Requests: Requests{
"example.com/gpu": 1,
},
+ Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
+ "example.com/gpu": "model_b",
+ },
},
},
},
- want: Requests{
- corev1.ResourceCPU: 3_000,
- corev1.ResourceMemory: 2 * utiltesting.Gi,
- "example.com/gpu": 4,
+ want: map[kueue.ResourceFlavorReference]Requests{
+ "default": {
+ corev1.ResourceCPU: 3_000,
+ corev1.ResourceMemory: 2 * utiltesting.Gi,
+ },
+ "model_a": {
+ "example.com/gpu": 3,
+ },
+ "model_b": {
+ "example.com/gpu": 1,
+ },
},
},
}
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
- got := tc.info.ResourceUsage()
+ got := tc.info.FlavorResourceUsage()
if diff := cmp.Diff(tc.want, got); diff != "" {
t.Errorf("info.ResourceUsage() returned (-want,+got):\n%s", diff)
}
From 36b340e396341c905444576bf797a221cb5f92ac Mon Sep 17 00:00:00 2001
From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com>
Date: Wed, 24 Apr 2024 03:39:25 -0400
Subject: [PATCH 20/49] Fix number of pod reconcilers in default chart values
(#2046)
Change-Id: Ibeaf93a0f897524860e7046586249528161fd65a
---
charts/kueue/values.yaml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/charts/kueue/values.yaml b/charts/kueue/values.yaml
index 02ade6c8c9..e592129ba0 100644
--- a/charts/kueue/values.yaml
+++ b/charts/kueue/values.yaml
@@ -68,7 +68,7 @@ managerConfig:
controller:
groupKindConcurrency:
Job.batch: 5
- Pod.: 5
+ Pod: 5
Workload.kueue.x-k8s.io: 5
LocalQueue.kueue.x-k8s.io: 1
ClusterQueue.kueue.x-k8s.io: 1
@@ -92,7 +92,7 @@ managerConfig:
- "ray.io/raycluster"
- "jobset.x-k8s.io/jobset"
- "kubeflow.org/mxjob"
- - "kubeflow.org/paddlejob"
+ - "kubeflow.org/paddlejob"
- "kubeflow.org/pytorchjob"
- "kubeflow.org/tfjob"
- "kubeflow.org/xgboostjob"
From 9b25d998b4cc317ab9539e55497be49fa19588d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?=
Date: Wed, 24 Apr 2024 11:01:23 +0200
Subject: [PATCH 21/49] [WaitForPodsReady] Make requeue base delay confiurable
(#2040)
* Make requeuing backoff base configurable
* Add a validation test
* Pass config for waitForPodsReady
---
apis/config/v1beta1/configuration_types.go | 10 ++-
apis/config/v1beta1/defaults.go | 13 +++-
apis/config/v1beta1/defaults_test.go | 12 ++-
apis/config/v1beta1/zz_generated.deepcopy.go | 5 ++
.../README.md | 16 +++-
pkg/config/config_test.go | 9 ++-
pkg/config/validation.go | 4 +
pkg/config/validation_test.go | 17 +++++
pkg/controller/core/core.go | 48 ++++--------
pkg/controller/core/workload_controller.go | 75 +++++++------------
.../core/workload_controller_test.go | 30 ++++----
.../en/docs/reference/kueue-config.v1beta1.md | 12 ++-
.../scheduler/podsready/suite_test.go | 8 +-
13 files changed, 144 insertions(+), 115 deletions(-)
diff --git a/apis/config/v1beta1/configuration_types.go b/apis/config/v1beta1/configuration_types.go
index 88f36ddaf5..3c8d27939e 100644
--- a/apis/config/v1beta1/configuration_types.go
+++ b/apis/config/v1beta1/configuration_types.go
@@ -238,7 +238,8 @@ type RequeuingStrategy struct {
// Once the number is reached, the workload is deactivated (`.spec.activate`=`false`).
// When it is null, the workloads will repeatedly and endless re-queueing.
//
- // Every backoff duration is about "10s*2^(n-1)+Rand" where:
+ // Every backoff duration is about "b*2^(n-1)+Rand" where:
+ // - "b" represents the base set by "BackoffBaseSeconds" parameter,
// - "n" represents the "workloadStatus.requeueState.count",
// - "Rand" represents the random jitter.
// During this time, the workload is taken as an inadmissible and
@@ -248,6 +249,13 @@ type RequeuingStrategy struct {
// Defaults to null.
// +optional
BackoffLimitCount *int32 `json:"backoffLimitCount,omitempty"`
+
+ // BackoffBaseSeconds defines the base for the exponential backoff for
+ // re-queuing an evicted workload.
+ //
+ // Defaults to 10.
+ // +optional
+ BackoffBaseSeconds *int32 `json:"backoffBaseSeconds,omitempty"`
}
type RequeuingTimestamp string
diff --git a/apis/config/v1beta1/defaults.go b/apis/config/v1beta1/defaults.go
index 8f11d2cd87..059530eb0f 100644
--- a/apis/config/v1beta1/defaults.go
+++ b/apis/config/v1beta1/defaults.go
@@ -47,6 +47,7 @@ const (
DefaultMultiKueueGCInterval = time.Minute
DefaultMultiKueueOrigin = "multikueue"
DefaultMultiKueueWorkerLostTimeout = 15 * time.Minute
+ DefaultRequeuingBackoffBaseSeconds = 10
)
func getOperatorNamespace() string {
@@ -121,10 +122,14 @@ func SetDefaults_Configuration(cfg *Configuration) {
}
cfg.WaitForPodsReady.BlockAdmission = &defaultBlockAdmission
}
- if cfg.WaitForPodsReady.RequeuingStrategy == nil || cfg.WaitForPodsReady.RequeuingStrategy.Timestamp == nil {
- cfg.WaitForPodsReady.RequeuingStrategy = &RequeuingStrategy{
- Timestamp: ptr.To(EvictionTimestamp),
- }
+ if cfg.WaitForPodsReady.RequeuingStrategy == nil {
+ cfg.WaitForPodsReady.RequeuingStrategy = &RequeuingStrategy{}
+ }
+ if cfg.WaitForPodsReady.RequeuingStrategy.Timestamp == nil {
+ cfg.WaitForPodsReady.RequeuingStrategy.Timestamp = ptr.To(EvictionTimestamp)
+ }
+ if cfg.WaitForPodsReady.RequeuingStrategy.BackoffBaseSeconds == nil {
+ cfg.WaitForPodsReady.RequeuingStrategy.BackoffBaseSeconds = ptr.To[int32](DefaultRequeuingBackoffBaseSeconds)
}
}
if cfg.Integrations == nil {
diff --git a/apis/config/v1beta1/defaults_test.go b/apis/config/v1beta1/defaults_test.go
index 29cd949c9e..3385303c99 100644
--- a/apis/config/v1beta1/defaults_test.go
+++ b/apis/config/v1beta1/defaults_test.go
@@ -364,7 +364,8 @@ func TestSetDefaults_Configuration(t *testing.T) {
BlockAdmission: ptr.To(true),
Timeout: &podsReadyTimeoutTimeout,
RequeuingStrategy: &RequeuingStrategy{
- Timestamp: ptr.To(EvictionTimestamp),
+ Timestamp: ptr.To(EvictionTimestamp),
+ BackoffBaseSeconds: ptr.To[int32](DefaultRequeuingBackoffBaseSeconds),
},
},
Namespace: ptr.To(DefaultNamespace),
@@ -393,7 +394,8 @@ func TestSetDefaults_Configuration(t *testing.T) {
BlockAdmission: ptr.To(false),
Timeout: &podsReadyTimeoutTimeout,
RequeuingStrategy: &RequeuingStrategy{
- Timestamp: ptr.To(EvictionTimestamp),
+ Timestamp: ptr.To(EvictionTimestamp),
+ BackoffBaseSeconds: ptr.To[int32](DefaultRequeuingBackoffBaseSeconds),
},
},
Namespace: ptr.To(DefaultNamespace),
@@ -413,7 +415,8 @@ func TestSetDefaults_Configuration(t *testing.T) {
Enable: true,
Timeout: &podsReadyTimeoutOverwrite,
RequeuingStrategy: &RequeuingStrategy{
- Timestamp: ptr.To(CreationTimestamp),
+ Timestamp: ptr.To(CreationTimestamp),
+ BackoffBaseSeconds: ptr.To[int32](63),
},
},
InternalCertManagement: &InternalCertManagement{
@@ -426,7 +429,8 @@ func TestSetDefaults_Configuration(t *testing.T) {
BlockAdmission: ptr.To(true),
Timeout: &podsReadyTimeoutOverwrite,
RequeuingStrategy: &RequeuingStrategy{
- Timestamp: ptr.To(CreationTimestamp),
+ Timestamp: ptr.To(CreationTimestamp),
+ BackoffBaseSeconds: ptr.To[int32](63),
},
},
Namespace: ptr.To(DefaultNamespace),
diff --git a/apis/config/v1beta1/zz_generated.deepcopy.go b/apis/config/v1beta1/zz_generated.deepcopy.go
index f53a7f8fa7..1a19f9cc08 100644
--- a/apis/config/v1beta1/zz_generated.deepcopy.go
+++ b/apis/config/v1beta1/zz_generated.deepcopy.go
@@ -380,6 +380,11 @@ func (in *RequeuingStrategy) DeepCopyInto(out *RequeuingStrategy) {
*out = new(int32)
**out = **in
}
+ if in.BackoffBaseSeconds != nil {
+ in, out := &in.BackoffBaseSeconds, &out.BackoffBaseSeconds
+ *out = new(int32)
+ **out = **in
+ }
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RequeuingStrategy.
diff --git a/keps/1282-pods-ready-requeue-strategy/README.md b/keps/1282-pods-ready-requeue-strategy/README.md
index b025b3bb9f..60c6944aaa 100644
--- a/keps/1282-pods-ready-requeue-strategy/README.md
+++ b/keps/1282-pods-ready-requeue-strategy/README.md
@@ -153,6 +153,13 @@ type RequeuingStrategy struct {
// Defaults to null.
// +optional
BackoffLimitCount *int32 `json:"backoffLimitCount,omitempty"`
+
+ // BackoffBaseSeconds defines the base for the exponential backoff for
+ // re-queuing an evicted workload.
+ //
+ // Defaults to 10.
+ // +optional
+ BackoffBaseSeconds *int32 `json:"backoffBaseSeconds,omitempty"`
}
type RequeuingTimestamp string
@@ -222,12 +229,15 @@ the queueManager holds the evicted workloads as inadmissible workloads while exp
Duration this time, other workloads will have a chance to be admitted.
The queueManager calculates an exponential backoff duration by [the Step function](https://pkg.go.dev/k8s.io/apimachinery/pkg/util/wait@v0.29.1#Backoff.Step)
-according to the $10s*2^{(n-1)}+Rand$ where the $n$ represents the `workloadStatus.requeueState.count`, and the $Rand$ represents the random jitter.
+according to the $b*2^{(n-1)}+Rand$ where:
+- $b$ represents the base delay, configured by `baseDelaySeconds`
+- $n$ represents the `workloadStatus.requeueState.count`,
+- $Rand$ represents the random jitter.
It will spend awaiting to be requeued after eviction:
-$$\sum_{k=1}^{n}(10s*2^{(k-1)} + Rand)$$
+$$\sum_{k=1}^{n}(b*2^{(k-1)} + Rand)$$
-Assuming `backoffLimitCount` equals 10, and the workload is requeued 10 times
+Assuming `backoffLimitCount` equals 10, and `baseDelaySeconds` equals 10 (default) the workload is requeued 10 times
after failing to have all pods ready, then the total time awaiting for requeue
will take (neglecting the jitter): `10s+20s+40s +...+7680s=2h 8min`.
Also, considering `.waitForPodsReady.timeout=300s` (default),
diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go
index 4caa3d90ea..7a12c73d14 100644
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -218,7 +218,7 @@ clientConnection:
apiVersion: config.kueue.x-k8s.io/v1beta1
kind: Configuration
integrations:
- frameworks:
+ frameworks:
- batch/job
`), os.FileMode(0600)); err != nil {
t.Fatal(err)
@@ -239,7 +239,7 @@ queueVisibility:
apiVersion: config.kueue.x-k8s.io/v1beta1
kind: Configuration
integrations:
- frameworks:
+ frameworks:
- pod
podOptions:
namespaceSelector:
@@ -543,8 +543,9 @@ multiKueue:
BlockAdmission: ptr.To(true),
Timeout: &metav1.Duration{Duration: 5 * time.Minute},
RequeuingStrategy: &configapi.RequeuingStrategy{
- Timestamp: ptr.To(configapi.CreationTimestamp),
- BackoffLimitCount: ptr.To[int32](10),
+ Timestamp: ptr.To(configapi.CreationTimestamp),
+ BackoffLimitCount: ptr.To[int32](10),
+ BackoffBaseSeconds: ptr.To[int32](10),
},
},
ClientConnection: defaultClientConnection,
diff --git a/pkg/config/validation.go b/pkg/config/validation.go
index 6be5d650ac..275170690e 100644
--- a/pkg/config/validation.go
+++ b/pkg/config/validation.go
@@ -72,6 +72,10 @@ func validateWaitForPodsReady(c *configapi.Configuration) field.ErrorList {
allErrs = append(allErrs, field.Invalid(requeuingStrategyPath.Child("backoffLimitCount"),
*strategy.BackoffLimitCount, constants.IsNegativeErrorMsg))
}
+ if strategy.BackoffBaseSeconds != nil && *strategy.BackoffBaseSeconds < 0 {
+ allErrs = append(allErrs, field.Invalid(requeuingStrategyPath.Child("backoffBaseSeconds"),
+ *strategy.BackoffBaseSeconds, constants.IsNegativeErrorMsg))
+ }
}
return allErrs
}
diff --git a/pkg/config/validation_test.go b/pkg/config/validation_test.go
index f23b647bfc..c85322f8b8 100644
--- a/pkg/config/validation_test.go
+++ b/pkg/config/validation_test.go
@@ -271,6 +271,23 @@ func TestValidate(t *testing.T) {
},
},
},
+ "negative waitForPodsReady.requeuingStrategy.backoffBaseSeconds": {
+ cfg: &configapi.Configuration{
+ Integrations: defaultIntegrations,
+ WaitForPodsReady: &configapi.WaitForPodsReady{
+ Enable: true,
+ RequeuingStrategy: &configapi.RequeuingStrategy{
+ BackoffBaseSeconds: ptr.To[int32](-1),
+ },
+ },
+ },
+ wantErr: field.ErrorList{
+ &field.Error{
+ Type: field.ErrorTypeInvalid,
+ Field: "waitForPodsReady.requeuingStrategy.backoffBaseSeconds",
+ },
+ },
+ },
}
for name, tc := range testCases {
diff --git a/pkg/controller/core/core.go b/pkg/controller/core/core.go
index 94beae3232..4f1ac3f5b4 100644
--- a/pkg/controller/core/core.go
+++ b/pkg/controller/core/core.go
@@ -23,31 +23,17 @@ import (
configapi "sigs.k8s.io/kueue/apis/config/v1beta1"
"sigs.k8s.io/kueue/pkg/cache"
- "sigs.k8s.io/kueue/pkg/config"
"sigs.k8s.io/kueue/pkg/constants"
"sigs.k8s.io/kueue/pkg/queue"
)
const (
- updateChBuffer = 10
- defaultRequeuingBaseDelaySeconds = 10
+ updateChBuffer = 10
)
-type ControllerOptions struct {
- requeuingBaseDelaySeconds int32
-}
-
-type ControllerOption func(*ControllerOptions)
-
-func WithControllerRequeuingBaseDelaySeconds(value int32) ControllerOption {
- return func(o *ControllerOptions) {
- o.requeuingBaseDelaySeconds = value
- }
-}
-
// SetupControllers sets up the core controllers. It returns the name of the
// controller that failed to create and an error, if any.
-func SetupControllers(mgr ctrl.Manager, qManager *queue.Manager, cc *cache.Cache, cfg *configapi.Configuration, controllerOpts ...ControllerOption) (string, error) {
+func SetupControllers(mgr ctrl.Manager, qManager *queue.Manager, cc *cache.Cache, cfg *configapi.Configuration) (string, error) {
rfRec := NewResourceFlavorReconciler(mgr.GetClient(), qManager, cc)
if err := rfRec.SetupWithManager(mgr, cfg); err != nil {
return "ResourceFlavor", err
@@ -78,37 +64,29 @@ func SetupControllers(mgr ctrl.Manager, qManager *queue.Manager, cc *cache.Cache
if err := cqRec.SetupWithManager(mgr, cfg); err != nil {
return "ClusterQueue", err
}
- ctrlOpts := ControllerOptions{
- requeuingBaseDelaySeconds: defaultRequeuingBaseDelaySeconds,
- }
- for _, opt := range controllerOpts {
- opt(&ctrlOpts)
- }
if err := NewWorkloadReconciler(mgr.GetClient(), qManager, cc,
mgr.GetEventRecorderFor(constants.WorkloadControllerName),
WithWorkloadUpdateWatchers(qRec, cqRec),
- WithPodsReadyTimeout(podsReadyTimeout(cfg)),
- WithRequeuingBackoffLimitCount(requeuingBackoffLimitCount(cfg)),
- WithRequeuingBaseDelaySeconds(ctrlOpts.requeuingBaseDelaySeconds),
+ WithWaitForPodsReady(waitForPodsReady(cfg.WaitForPodsReady)),
).SetupWithManager(mgr, cfg); err != nil {
return "Workload", err
}
return "", nil
}
-func podsReadyTimeout(cfg *configapi.Configuration) *time.Duration {
- if config.WaitForPodsReadyIsEnabled(cfg) && cfg.WaitForPodsReady.Timeout != nil {
- return &cfg.WaitForPodsReady.Timeout.Duration
+func waitForPodsReady(cfg *configapi.WaitForPodsReady) *waitForPodsReadyConfig {
+ if cfg == nil || !cfg.Enable {
+ return nil
}
- return nil
-}
-
-func requeuingBackoffLimitCount(cfg *configapi.Configuration) *int32 {
- if config.WaitForPodsReadyIsEnabled(cfg) && cfg.WaitForPodsReady.RequeuingStrategy != nil {
- return cfg.WaitForPodsReady.RequeuingStrategy.BackoffLimitCount
+ result := waitForPodsReadyConfig{
+ timeout: cfg.Timeout.Duration,
+ }
+ if cfg.RequeuingStrategy != nil {
+ result.requeuingBackoffBaseSeconds = *cfg.RequeuingStrategy.BackoffBaseSeconds
+ result.requeuingBackoffLimitCount = cfg.RequeuingStrategy.BackoffLimitCount
}
- return nil
+ return &result
}
func queueVisibilityUpdateInterval(cfg *configapi.Configuration) time.Duration {
diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go
index 19f97cb456..f67b0b8dae 100644
--- a/pkg/controller/core/workload_controller.go
+++ b/pkg/controller/core/workload_controller.go
@@ -69,37 +69,24 @@ var (
realClock = clock.RealClock{}
)
+type waitForPodsReadyConfig struct {
+ timeout time.Duration
+ requeuingBackoffLimitCount *int32
+ requeuingBackoffBaseSeconds int32
+}
+
type options struct {
- watchers []WorkloadUpdateWatcher
- podsReadyTimeout *time.Duration
- requeuingBackoffLimitCount *int32
- requeuingBaseDelaySeconds int32
+ watchers []WorkloadUpdateWatcher
+ waitForPodsReadyConfig *waitForPodsReadyConfig
}
// Option configures the reconciler.
type Option func(*options)
-// WithPodsReadyTimeout indicates if the controller should interrupt startup
-// of a workload if it exceeds the timeout to reach the PodsReady=True condition.
-func WithPodsReadyTimeout(value *time.Duration) Option {
- return func(o *options) {
- o.podsReadyTimeout = value
- }
-}
-
-// WithRequeuingBackoffLimitCount indicates if the controller should deactivate a workload
-// if it reaches the limitation.
-func WithRequeuingBackoffLimitCount(value *int32) Option {
- return func(o *options) {
- o.requeuingBackoffLimitCount = value
- }
-}
-
-// WithRequeuingBaseDelaySeconds indicates the base delay for the computation
-// of the requeue delay.
-func WithRequeuingBaseDelaySeconds(value int32) Option {
+// WithWaitForPodsReady indicates the configuration for the WaitForPodsReady feature.
+func WithWaitForPodsReady(value *waitForPodsReadyConfig) Option {
return func(o *options) {
- o.requeuingBaseDelaySeconds = value
+ o.waitForPodsReadyConfig = value
}
}
@@ -118,15 +105,13 @@ type WorkloadUpdateWatcher interface {
// WorkloadReconciler reconciles a Workload object
type WorkloadReconciler struct {
- log logr.Logger
- queues *queue.Manager
- cache *cache.Cache
- client client.Client
- watchers []WorkloadUpdateWatcher
- podsReadyTimeout *time.Duration
- requeuingBackoffLimitCount *int32
- requeuingBaseDelaySeconds int32
- recorder record.EventRecorder
+ log logr.Logger
+ queues *queue.Manager
+ cache *cache.Cache
+ client client.Client
+ watchers []WorkloadUpdateWatcher
+ waitForPodsReady *waitForPodsReadyConfig
+ recorder record.EventRecorder
}
func NewWorkloadReconciler(client client.Client, queues *queue.Manager, cache *cache.Cache, recorder record.EventRecorder, opts ...Option) *WorkloadReconciler {
@@ -136,15 +121,13 @@ func NewWorkloadReconciler(client client.Client, queues *queue.Manager, cache *c
}
return &WorkloadReconciler{
- log: ctrl.Log.WithName("workload-reconciler"),
- client: client,
- queues: queues,
- cache: cache,
- watchers: options.watchers,
- podsReadyTimeout: options.podsReadyTimeout,
- requeuingBackoffLimitCount: options.requeuingBackoffLimitCount,
- requeuingBaseDelaySeconds: options.requeuingBaseDelaySeconds,
- recorder: recorder,
+ log: ctrl.Log.WithName("workload-reconciler"),
+ client: client,
+ queues: queues,
+ cache: cache,
+ watchers: options.watchers,
+ waitForPodsReady: options.waitForPodsReadyConfig,
+ recorder: recorder,
}
}
@@ -396,7 +379,7 @@ func (r *WorkloadReconciler) triggerDeactivationOrBackoffRequeue(ctx context.Con
}
// If requeuingBackoffLimitCount equals to null, the workloads is repeatedly and endless re-queued.
requeuingCount := ptr.Deref(wl.Status.RequeueState.Count, 0) + 1
- if r.requeuingBackoffLimitCount != nil && requeuingCount > *r.requeuingBackoffLimitCount {
+ if r.waitForPodsReady.requeuingBackoffLimitCount != nil && requeuingCount > *r.waitForPodsReady.requeuingBackoffLimitCount {
wl.Spec.Active = ptr.To(false)
if err := r.client.Update(ctx, wl); err != nil {
return false, err
@@ -411,7 +394,7 @@ func (r *WorkloadReconciler) triggerDeactivationOrBackoffRequeue(ctx context.Con
// During this time, the workload is taken as an inadmissible and other
// workloads will have a chance to be admitted.
backoff := &wait.Backoff{
- Duration: time.Duration(r.requeuingBaseDelaySeconds) * time.Second,
+ Duration: time.Duration(r.waitForPodsReady.requeuingBackoffBaseSeconds) * time.Second,
Factor: 2,
Jitter: 0.0001,
Steps: int(requeuingCount),
@@ -645,7 +628,7 @@ func (r *WorkloadReconciler) SetupWithManager(mgr ctrl.Manager, cfg *config.Conf
// specified timeout counted since max of the LastTransitionTime's for the
// Admitted and PodsReady conditions.
func (r *WorkloadReconciler) admittedNotReadyWorkload(wl *kueue.Workload, clock clock.Clock) (bool, time.Duration) {
- if r.podsReadyTimeout == nil {
+ if r.waitForPodsReady == nil {
// the timeout is not configured for the workload controller
return false, 0
}
@@ -663,7 +646,7 @@ func (r *WorkloadReconciler) admittedNotReadyWorkload(wl *kueue.Workload, clock
if podsReadyCond != nil && podsReadyCond.Status == metav1.ConditionFalse && podsReadyCond.LastTransitionTime.After(admittedCond.LastTransitionTime.Time) {
elapsedTime = clock.Since(podsReadyCond.LastTransitionTime.Time)
}
- waitFor := *r.podsReadyTimeout - elapsedTime
+ waitFor := r.waitForPodsReady.timeout - elapsedTime
if waitFor < 0 {
waitFor = 0
}
diff --git a/pkg/controller/core/workload_controller_test.go b/pkg/controller/core/workload_controller_test.go
index 8e16f77cd4..6df7f8edde 100644
--- a/pkg/controller/core/workload_controller_test.go
+++ b/pkg/controller/core/workload_controller_test.go
@@ -48,7 +48,7 @@ func TestAdmittedNotReadyWorkload(t *testing.T) {
testCases := map[string]struct {
workload kueue.Workload
- podsReadyTimeout *time.Duration
+ waitForPodsReady *waitForPodsReadyConfig
wantCountingTowardsTimeout bool
wantRecheckAfter time.Duration
}{
@@ -68,7 +68,7 @@ func TestAdmittedNotReadyWorkload(t *testing.T) {
},
},
},
- podsReadyTimeout: ptr.To(5 * time.Minute),
+ waitForPodsReady: &waitForPodsReadyConfig{timeout: 5 * time.Minute},
wantCountingTowardsTimeout: true,
wantRecheckAfter: 4 * time.Minute,
},
@@ -99,7 +99,7 @@ func TestAdmittedNotReadyWorkload(t *testing.T) {
},
},
},
- podsReadyTimeout: ptr.To(5 * time.Minute),
+ waitForPodsReady: &waitForPodsReadyConfig{timeout: 5 * time.Minute},
wantCountingTowardsTimeout: true,
},
"workload with Admitted=True, PodsReady=False; counting since PodsReady.LastTransitionTime": {
@@ -120,7 +120,7 @@ func TestAdmittedNotReadyWorkload(t *testing.T) {
},
},
},
- podsReadyTimeout: ptr.To(5 * time.Minute),
+ waitForPodsReady: &waitForPodsReadyConfig{timeout: 5 * time.Minute},
wantCountingTowardsTimeout: true,
wantRecheckAfter: 5 * time.Minute,
},
@@ -137,7 +137,7 @@ func TestAdmittedNotReadyWorkload(t *testing.T) {
},
},
},
- podsReadyTimeout: ptr.To(5 * time.Minute),
+ waitForPodsReady: &waitForPodsReadyConfig{timeout: 5 * time.Minute},
},
"workload with Admitted=False, not counting": {
workload: kueue.Workload{
@@ -152,7 +152,7 @@ func TestAdmittedNotReadyWorkload(t *testing.T) {
},
},
},
- podsReadyTimeout: ptr.To(5 * time.Minute),
+ waitForPodsReady: &waitForPodsReadyConfig{timeout: 5 * time.Minute},
},
"workload with Admitted=True, PodsReady=True; not counting": {
workload: kueue.Workload{
@@ -172,13 +172,13 @@ func TestAdmittedNotReadyWorkload(t *testing.T) {
},
},
},
- podsReadyTimeout: ptr.To(5 * time.Minute),
+ waitForPodsReady: &waitForPodsReadyConfig{timeout: 5 * time.Minute},
},
}
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
- wRec := WorkloadReconciler{podsReadyTimeout: tc.podsReadyTimeout}
+ wRec := WorkloadReconciler{waitForPodsReady: tc.waitForPodsReady}
countingTowardsTimeout, recheckAfter := wRec.admittedNotReadyWorkload(&tc.workload, fakeClock)
if tc.wantCountingTowardsTimeout != countingTowardsTimeout {
@@ -506,9 +506,11 @@ func TestReconcile(t *testing.T) {
},
"increment re-queue count": {
reconcilerOpts: []Option{
- WithPodsReadyTimeout(ptr.To(3 * time.Second)),
- WithRequeuingBackoffLimitCount(ptr.To[int32](100)),
- WithRequeuingBaseDelaySeconds(10),
+ WithWaitForPodsReady(&waitForPodsReadyConfig{
+ timeout: 3 * time.Second,
+ requeuingBackoffLimitCount: ptr.To[int32](100),
+ requeuingBackoffBaseSeconds: 10,
+ }),
},
workload: utiltesting.MakeWorkload("wl", "ns").
ReserveQuota(utiltesting.MakeAdmission("q1").Obj()).
@@ -548,8 +550,10 @@ func TestReconcile(t *testing.T) {
},
"deactivated workload": {
reconcilerOpts: []Option{
- WithPodsReadyTimeout(ptr.To(3 * time.Second)),
- WithRequeuingBackoffLimitCount(ptr.To[int32](1)),
+ WithWaitForPodsReady(&waitForPodsReadyConfig{
+ timeout: 3 * time.Second,
+ requeuingBackoffLimitCount: ptr.To[int32](1),
+ }),
},
workload: utiltesting.MakeWorkload("wl", "ns").
ReserveQuota(utiltesting.MakeAdmission("q1").Obj()).
diff --git a/site/content/en/docs/reference/kueue-config.v1beta1.md b/site/content/en/docs/reference/kueue-config.v1beta1.md
index 55fb12778a..040c2fecf9 100644
--- a/site/content/en/docs/reference/kueue-config.v1beta1.md
+++ b/site/content/en/docs/reference/kueue-config.v1beta1.md
@@ -637,8 +637,9 @@ that was evicted due to Pod readiness. The possible values are:
BackoffLimitCount defines the maximum number of re-queuing retries.
Once the number is reached, the workload is deactivated (.spec.activate=false).
When it is null, the workloads will repeatedly and endless re-queueing.
-
Every backoff duration is about "10s*2^(n-1)+Rand" where:
+
Every backoff duration is about "b*2^(n-1)+Rand" where:
+
"b" represents the base set by "BackoffBaseSeconds" parameter,
"n" represents the "workloadStatus.requeueState.count",
"Rand" represents the random jitter.
During this time, the workload is taken as an inadmissible and
@@ -648,6 +649,15 @@ By default, the consecutive requeue delays are around: (10s, 20s, 40s, ...).
Defaults to null.
+
backoffBaseSeconds
+int32
+
+
+
BackoffBaseSeconds defines the base for the exponential backoff for
+re-queuing an evicted workload.
+
Defaults to 10.
+
+
diff --git a/test/integration/scheduler/podsready/suite_test.go b/test/integration/scheduler/podsready/suite_test.go
index bc2daabd4b..45dd9a5c07 100644
--- a/test/integration/scheduler/podsready/suite_test.go
+++ b/test/integration/scheduler/podsready/suite_test.go
@@ -71,8 +71,9 @@ func managerAndSchedulerSetupWithTimeoutAdmission(
BlockAdmission: &blockAdmission,
Timeout: &metav1.Duration{Duration: value},
RequeuingStrategy: &config.RequeuingStrategy{
- Timestamp: ptr.To(requeuingTimestamp),
- BackoffLimitCount: requeuingBackoffLimitCount,
+ Timestamp: ptr.To(requeuingTimestamp),
+ BackoffLimitCount: requeuingBackoffLimitCount,
+ BackoffBaseSeconds: ptr.To[int32](1),
},
},
}
@@ -87,8 +88,7 @@ func managerAndSchedulerSetupWithTimeoutAdmission(
queue.WithPodsReadyRequeuingTimestamp(requeuingTimestamp),
)
- failedCtrl, err := core.SetupControllers(mgr, queues, cCache, cfg,
- core.WithControllerRequeuingBaseDelaySeconds(1))
+ failedCtrl, err := core.SetupControllers(mgr, queues, cCache, cfg)
gomega.Expect(err).ToNot(gomega.HaveOccurred(), "controller", failedCtrl)
failedWebhook, err := webhooks.Setup(mgr)
From a0290b3f67770b34b6169bcd10e74e07ad831945 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?=
Date: Wed, 24 Apr 2024 12:01:10 +0200
Subject: [PATCH 22/49] Use clock in workload controller consistently (#2044)
# Conflicts:
# pkg/controller/core/workload_controller.go
# pkg/controller/core/workload_controller_test.go
---
pkg/controller/core/core.go | 1 +
pkg/controller/core/workload_controller.go | 15 +++++++++------
pkg/controller/core/workload_controller_test.go | 14 +++++++-------
3 files changed, 17 insertions(+), 13 deletions(-)
diff --git a/pkg/controller/core/core.go b/pkg/controller/core/core.go
index 4f1ac3f5b4..15096169bb 100644
--- a/pkg/controller/core/core.go
+++ b/pkg/controller/core/core.go
@@ -85,6 +85,7 @@ func waitForPodsReady(cfg *configapi.WaitForPodsReady) *waitForPodsReadyConfig {
if cfg.RequeuingStrategy != nil {
result.requeuingBackoffBaseSeconds = *cfg.RequeuingStrategy.BackoffBaseSeconds
result.requeuingBackoffLimitCount = cfg.RequeuingStrategy.BackoffLimitCount
+ result.requeuingBackoffJitter = 0.0001
}
return &result
}
diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go
index f67b0b8dae..49c784a184 100644
--- a/pkg/controller/core/workload_controller.go
+++ b/pkg/controller/core/workload_controller.go
@@ -73,6 +73,7 @@ type waitForPodsReadyConfig struct {
timeout time.Duration
requeuingBackoffLimitCount *int32
requeuingBackoffBaseSeconds int32
+ requeuingBackoffJitter float64
}
type options struct {
@@ -112,6 +113,7 @@ type WorkloadReconciler struct {
watchers []WorkloadUpdateWatcher
waitForPodsReady *waitForPodsReadyConfig
recorder record.EventRecorder
+ clock clock.Clock
}
func NewWorkloadReconciler(client client.Client, queues *queue.Manager, cache *cache.Cache, recorder record.EventRecorder, opts ...Option) *WorkloadReconciler {
@@ -128,6 +130,7 @@ func NewWorkloadReconciler(client client.Client, queues *queue.Manager, cache *c
watchers: options.watchers,
waitForPodsReady: options.waitForPodsReadyConfig,
recorder: recorder,
+ clock: realClock,
}
}
@@ -352,7 +355,7 @@ func (r *WorkloadReconciler) reconcileNotReadyTimeout(ctx context.Context, req c
// the workload has already been evicted by the PodsReadyTimeout or been deactivated.
return ctrl.Result{}, nil
}
- countingTowardsTimeout, recheckAfter := r.admittedNotReadyWorkload(wl, realClock)
+ countingTowardsTimeout, recheckAfter := r.admittedNotReadyWorkload(wl)
if !countingTowardsTimeout {
return ctrl.Result{}, nil
}
@@ -396,14 +399,14 @@ func (r *WorkloadReconciler) triggerDeactivationOrBackoffRequeue(ctx context.Con
backoff := &wait.Backoff{
Duration: time.Duration(r.waitForPodsReady.requeuingBackoffBaseSeconds) * time.Second,
Factor: 2,
- Jitter: 0.0001,
+ Jitter: r.waitForPodsReady.requeuingBackoffJitter,
Steps: int(requeuingCount),
}
var waitDuration time.Duration
for backoff.Steps > 0 {
waitDuration = backoff.Step()
}
- wl.Status.RequeueState.RequeueAt = ptr.To(metav1.NewTime(time.Now().Add(waitDuration)))
+ wl.Status.RequeueState.RequeueAt = ptr.To(metav1.NewTime(r.clock.Now().Add(waitDuration)))
wl.Status.RequeueState.Count = &requeuingCount
return false, nil
}
@@ -627,7 +630,7 @@ func (r *WorkloadReconciler) SetupWithManager(mgr ctrl.Manager, cfg *config.Conf
// True (False or not set). The second value is the remaining time to exceed the
// specified timeout counted since max of the LastTransitionTime's for the
// Admitted and PodsReady conditions.
-func (r *WorkloadReconciler) admittedNotReadyWorkload(wl *kueue.Workload, clock clock.Clock) (bool, time.Duration) {
+func (r *WorkloadReconciler) admittedNotReadyWorkload(wl *kueue.Workload) (bool, time.Duration) {
if r.waitForPodsReady == nil {
// the timeout is not configured for the workload controller
return false, 0
@@ -642,9 +645,9 @@ func (r *WorkloadReconciler) admittedNotReadyWorkload(wl *kueue.Workload, clock
return false, 0
}
admittedCond := apimeta.FindStatusCondition(wl.Status.Conditions, kueue.WorkloadAdmitted)
- elapsedTime := clock.Since(admittedCond.LastTransitionTime.Time)
+ elapsedTime := r.clock.Since(admittedCond.LastTransitionTime.Time)
if podsReadyCond != nil && podsReadyCond.Status == metav1.ConditionFalse && podsReadyCond.LastTransitionTime.After(admittedCond.LastTransitionTime.Time) {
- elapsedTime = clock.Since(podsReadyCond.LastTransitionTime.Time)
+ elapsedTime = r.clock.Since(podsReadyCond.LastTransitionTime.Time)
}
waitFor := r.waitForPodsReady.timeout - elapsedTime
if waitFor < 0 {
diff --git a/pkg/controller/core/workload_controller_test.go b/pkg/controller/core/workload_controller_test.go
index 6df7f8edde..e776911b78 100644
--- a/pkg/controller/core/workload_controller_test.go
+++ b/pkg/controller/core/workload_controller_test.go
@@ -178,8 +178,8 @@ func TestAdmittedNotReadyWorkload(t *testing.T) {
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
- wRec := WorkloadReconciler{waitForPodsReady: tc.waitForPodsReady}
- countingTowardsTimeout, recheckAfter := wRec.admittedNotReadyWorkload(&tc.workload, fakeClock)
+ wRec := WorkloadReconciler{waitForPodsReady: tc.waitForPodsReady, clock: fakeClock}
+ countingTowardsTimeout, recheckAfter := wRec.admittedNotReadyWorkload(&tc.workload)
if tc.wantCountingTowardsTimeout != countingTowardsTimeout {
t.Errorf("Unexpected countingTowardsTimeout, want=%v, got=%v", tc.wantCountingTowardsTimeout, countingTowardsTimeout)
@@ -510,6 +510,7 @@ func TestReconcile(t *testing.T) {
timeout: 3 * time.Second,
requeuingBackoffLimitCount: ptr.To[int32](100),
requeuingBackoffBaseSeconds: 10,
+ requeuingBackoffJitter: 0,
}),
},
workload: utiltesting.MakeWorkload("wl", "ns").
@@ -553,6 +554,7 @@ func TestReconcile(t *testing.T) {
WithWaitForPodsReady(&waitForPodsReadyConfig{
timeout: 3 * time.Second,
requeuingBackoffLimitCount: ptr.To[int32](1),
+ requeuingBackoffJitter: 0,
}),
},
workload: utiltesting.MakeWorkload("wl", "ns").
@@ -599,6 +601,8 @@ func TestReconcile(t *testing.T) {
cqCache := cache.New(cl)
qManager := queue.NewManager(cl, cqCache)
reconciler := NewWorkloadReconciler(cl, qManager, cqCache, recorder, tc.reconcilerOpts...)
+ // use a fake clock with jitter = 0 to be able to assert on the requeueAt.
+ reconciler.clock = testingclock.NewFakeClock(testStartTime)
ctxWithLogger, _ := utiltesting.ContextWithLog(t)
ctx, ctxCancel := context.WithCancel(ctxWithLogger)
@@ -638,11 +642,7 @@ func TestReconcile(t *testing.T) {
if requeueState := tc.wantWorkload.Status.RequeueState; requeueState != nil && requeueState.RequeueAt != nil {
gotRequeueState := gotWorkload.Status.RequeueState
if gotRequeueState != nil && gotRequeueState.RequeueAt != nil {
- // We verify the got requeueAt if the got requeueAt is after the desired requeueAt
- // since the requeueAt is included in positive seconds of random jitter.
- // Additionally, we need to verify the requeueAt by "Equal" function
- // as the "After" function evaluates the nanoseconds despite the metav1.Time is seconds level precision.
- if !gotRequeueState.RequeueAt.After(requeueState.RequeueAt.Time) && !gotRequeueState.RequeueAt.Equal(requeueState.RequeueAt) {
+ if !gotRequeueState.RequeueAt.Equal(requeueState.RequeueAt) {
t.Errorf("Unexpected requeueState.requeueAt; gotRequeueAt %v needs to be after requeueAt %v", requeueState.RequeueAt, gotRequeueState.RequeueAt)
}
} else {
From ae3e551b3e0aacf39a075eedad267be3e2e87e64 Mon Sep 17 00:00:00 2001
From: Ryo Tozawa
Date: Wed, 24 Apr 2024 20:58:09 +0900
Subject: [PATCH 23/49] add: custom annotations on service and deployment
(#2030)
Signed-off-by: tozastation
---
charts/kueue/templates/manager/auth_proxy_service.yaml | 4 ++++
charts/kueue/templates/manager/manager.yaml | 3 +++
charts/kueue/values.yaml | 2 ++
3 files changed, 9 insertions(+)
diff --git a/charts/kueue/templates/manager/auth_proxy_service.yaml b/charts/kueue/templates/manager/auth_proxy_service.yaml
index 21352fcc4f..02942cab0b 100644
--- a/charts/kueue/templates/manager/auth_proxy_service.yaml
+++ b/charts/kueue/templates/manager/auth_proxy_service.yaml
@@ -5,6 +5,10 @@ metadata:
namespace: '{{ .Release.Namespace }}'
labels:
{{- include "kueue.labels" . | nindent 4 }}
+ {{- if .Values.metricsService.annotations }}
+ annotations:
+ {{- toYaml .Values.metricsService.annotations | nindent 4 }}
+ {{- end }}
spec:
type: {{ .Values.metricsService.type }}
selector:
diff --git a/charts/kueue/templates/manager/manager.yaml b/charts/kueue/templates/manager/manager.yaml
index 4c2277cdaf..4eaa06c8bc 100644
--- a/charts/kueue/templates/manager/manager.yaml
+++ b/charts/kueue/templates/manager/manager.yaml
@@ -16,6 +16,9 @@ spec:
{{- include "kueue.selectorLabels" . | nindent 8 }}
annotations:
kubectl.kubernetes.io/default-container: manager
+ {{- if .Values.controllerManager.manager.podAnnotations }}
+ {{- toYaml .Values.controllerManager.manager.podAnnotations | nindent 8 }}
+ {{- end }}
spec:
containers:
- args:
diff --git a/charts/kueue/values.yaml b/charts/kueue/values.yaml
index e592129ba0..a382870214 100644
--- a/charts/kueue/values.yaml
+++ b/charts/kueue/values.yaml
@@ -24,6 +24,7 @@ controllerManager:
repository: gcr.io/k8s-staging-kueue/kueue
# This should be set to 'IfNotPresent' for released version
pullPolicy: Always
+ podAnnotations: {}
resources:
limits:
cpu: 500m
@@ -111,6 +112,7 @@ metricsService:
protocol: TCP
targetPort: https
type: ClusterIP
+ annotations: {}
webhookService:
ipDualStack:
enabled: false
From 2bff8c345c9b69cd0da6d4f97cbc0a15c4c6435c Mon Sep 17 00:00:00 2001
From: Mykhailo Bobrovskyi
Date: Wed, 24 Apr 2024 17:43:55 +0300
Subject: [PATCH 24/49] [multikueue] Increase timeout for "Waiting for the
cluster do become active" test. (#2049)
---
test/e2e/multikueue/e2e_test.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/test/e2e/multikueue/e2e_test.go b/test/e2e/multikueue/e2e_test.go
index c2a9c1c675..e24fc09f03 100644
--- a/test/e2e/multikueue/e2e_test.go
+++ b/test/e2e/multikueue/e2e_test.go
@@ -416,7 +416,7 @@ var _ = ginkgo.Describe("MultiKueue", func() {
Message: "Connected",
},
util.IgnoreConditionTimestampsAndObservedGeneration)))
- }, util.Timeout, util.Interval).Should(gomega.Succeed())
+ }, util.LongTimeout, util.Interval).Should(gomega.Succeed())
})
})
})
From 78ccc865b71960d14e45c8db299f8210443a2148 Mon Sep 17 00:00:00 2001
From: Traian Schiau <55734665+trasc@users.noreply.github.com>
Date: Wed, 24 Apr 2024 18:00:34 +0300
Subject: [PATCH 25/49] Scalability scrape (#2018)
* [sclability] Scrape metrics
* Review remarks.
* Review Remarks
* Review Remarks
---
Makefile | 12 +-
test/scalability/README.md | 18 +++
test/scalability/minimalkueue/main.go | 10 ++
test/scalability/runner/main.go | 66 +++++++++--
test/scalability/runner/scraper/scraper.go | 124 +++++++++++++++++++++
5 files changed, 216 insertions(+), 14 deletions(-)
create mode 100644 test/scalability/runner/scraper/scraper.go
diff --git a/Makefile b/Makefile
index 83ce334469..691c5c011f 100644
--- a/Makefile
+++ b/Makefile
@@ -230,6 +230,14 @@ ifdef SCALABILITY_KUEUE_LOGS
SCALABILITY_EXTRA_ARGS += --withLogs=true --logToFile=true
endif
+ifdef SCALABILITY_SCRAPE_INTERVAL
+SCALABILITY_SCRAPE_ARGS += --metricsScrapeInterval=$(SCALABILITY_SCRAPE_INTERVAL)
+endif
+
+ifdef SCALABILITY_SCRAPE_URL
+SCALABILITY_SCRAPE_ARGS += --metricsScrapeURL=$(SCALABILITY_SCRAPE_URL)
+endif
+
SCALABILITY_GENERATOR_CONFIG ?= $(PROJECT_DIR)/test/scalability/default_generator_config.yaml
SCALABILITY_RUN_DIR := $(ARTIFACTS)/run-scalability
@@ -241,7 +249,7 @@ run-scalability: envtest scalability-runner minimalkueue
--o $(SCALABILITY_RUN_DIR) \
--crds=$(PROJECT_DIR)/config/components/crd/bases \
--generatorConfig=$(SCALABILITY_GENERATOR_CONFIG) \
- --minimalKueue=$(ARTIFACTS)/minimalkueue $(SCALABILITY_EXTRA_ARGS)
+ --minimalKueue=$(ARTIFACTS)/minimalkueue $(SCALABILITY_EXTRA_ARGS) $(SCALABILITY_SCRAPE_ARGS)
.PHONY: test-scalability
test-scalability: gotestsum run-scalability
@@ -257,7 +265,7 @@ run-scalability-in-cluster: envtest scalability-runner
$(SCALABILITY_RUNNER) \
--o $(ARTIFACTS)/run-scalability-in-cluster \
--generatorConfig=$(SCALABILITY_GENERATOR_CONFIG) \
- --qps=1000 --burst=2000 --timeout=15m
+ --qps=1000 --burst=2000 --timeout=15m $(SCALABILITY_SCRAPE_ARGS)
.PHONY: ci-lint
ci-lint: golangci-lint
diff --git a/test/scalability/README.md b/test/scalability/README.md
index 492a7edc47..817d9a167e 100644
--- a/test/scalability/README.md
+++ b/test/scalability/README.md
@@ -37,6 +37,8 @@ Will run a scalability scenario against an existing cluster (connectable by the
The generation config to be used can be set in `SCALABILITY_GENERATOR_CONFIG` by default using `$(PROJECT_DIR)/test/scalability/default_generator_config.yaml`
+Setting `SCALABILITY_SCRAPE_INTERVAL` to an interval value and `SCALABILITY_SCRAPE_URL` to an URL exposing kueue's metrics will cause the scalability runner to scrape that URL every interval and store the results in `$(PROJECT_DIR)/bin/run-scalability-in-cluster/metricsDump.tgz`.
+
Check [installation guide](https://kueue.sigs.k8s.io/docs/installation) for cluster and [observability](https://kueue.sigs.k8s.io/docs/installation/#add-metrics-scraping-for-prometheus-operator).
## Run with minimalkueue
@@ -55,6 +57,8 @@ Setting `SCALABILITY_CPU_PROFILE=1` will generate a cpuprofile of minimalkueue i
Setting `SCALABILITY_KUEUE_LOGS=1` will save the logs of minimalkueue in `$(PROJECT_DIR)/bin/run-scalability/minimalkueue.out.log` and `$(PROJECT_DIR)/bin/run-scalability/minimalkueue.err.log`
+Setting `SCALABILITY_SCRAPE_INTERVAL` to an interval value (e.g. `1s`) will expose the metrics of `minimalkueue` and have them collected by the scalability runner in `$(PROJECT_DIR)/bin/run-scalability/metricsDump.tgz` every interval.
+
## Run scalability test
```bash
@@ -62,3 +66,17 @@ make test-scalability
```
Runs the scalability with minimalkueue and checks the results against `$(PROJECT_DIR)/test/scalability/default_rangespec.yaml`
+
+## Scrape result
+
+The scrape result `metricsDump.tgz` contains a set of `.prometheus` files, where `ts` is the millisecond representation of the epoch time at the moment each scrape was stared and can be used during the import in a visualization tool.
+
+If an instance of [VictoriaMetrics](https://docs.victoriametrics.com/) listening at `http://localhost:8428` is used, a metrics dump can be imported like:
+
+```bash
+ TMPDIR=$(mktemp -d)
+ tar -xf ./bin/run-scalability/metricsDump.tgz -C $TMPDIR
+ for file in ${TMPDIR}/*.prometheus; do timestamp=$(basename "$file" .prometheus); curl -vX POST -T "$file" http://localhost:8428/api/v1/import/prometheus?timestamp="$timestamp"; done
+ rm -r $TMPDIR
+
+```
diff --git a/test/scalability/minimalkueue/main.go b/test/scalability/minimalkueue/main.go
index 6beaebcc85..34ef89188a 100644
--- a/test/scalability/minimalkueue/main.go
+++ b/test/scalability/minimalkueue/main.go
@@ -19,6 +19,7 @@ package main
import (
"context"
"flag"
+ "fmt"
"os"
"os/signal"
"runtime/pprof"
@@ -41,12 +42,15 @@ import (
"sigs.k8s.io/kueue/pkg/constants"
"sigs.k8s.io/kueue/pkg/controller/core"
"sigs.k8s.io/kueue/pkg/controller/core/indexer"
+ "sigs.k8s.io/kueue/pkg/metrics"
"sigs.k8s.io/kueue/pkg/queue"
"sigs.k8s.io/kueue/pkg/scheduler"
)
var (
cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")
+
+ metricsPort = flag.Int("metricsPort", 0, "metrics serving port")
)
var (
@@ -119,6 +123,12 @@ func mainWithExitCode() int {
BindAddress: "0",
},
}
+
+ if *metricsPort > 0 {
+ options.Metrics.BindAddress = fmt.Sprintf(":%d", *metricsPort)
+ metrics.Register()
+ }
+
mgr, err := ctrl.NewManager(kubeConfig, options)
if err != nil {
log.Error(err, "Unable to create manager")
diff --git a/test/scalability/runner/main.go b/test/scalability/runner/main.go
index b134845506..08fcf6c7dc 100644
--- a/test/scalability/runner/main.go
+++ b/test/scalability/runner/main.go
@@ -24,6 +24,7 @@ import (
"os/exec"
"os/signal"
"path"
+ "strconv"
"sync"
"syscall"
"time"
@@ -34,6 +35,7 @@ import (
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
+ "k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
crconfig "sigs.k8s.io/controller-runtime/pkg/config"
@@ -49,6 +51,7 @@ import (
"sigs.k8s.io/kueue/test/scalability/runner/controller"
"sigs.k8s.io/kueue/test/scalability/runner/generator"
"sigs.k8s.io/kueue/test/scalability/runner/recorder"
+ "sigs.k8s.io/kueue/test/scalability/runner/scraper"
"sigs.k8s.io/kueue/test/scalability/runner/stats"
)
@@ -60,6 +63,10 @@ var (
qps = flag.Float64("qps", 0, "qps used by the runner clients, use default if 0")
burst = flag.Int("burst", 0, "qps used by the runner clients, use default if 0")
+ // metrics scarping
+ metricsScrapeInterval = flag.Duration("metricsScrapeInterval", 0, "the duration between two metrics scraping, if 0 the metrics scraping is disabled")
+ metricsScrapeURL = flag.String("metricsScrapeURL", "", "the URL to scrape metrics from, ignored when minimal kueue is used")
+
// related to minimalkueue
minimalKueuePath = flag.String("minimalKueue", "", "path to minimalkueue, run in the hosts default cluster if empty")
withCpuProfile = flag.Bool("withCPUProfile", false, "generate a CPU profile for minimalkueue")
@@ -131,9 +138,17 @@ func main() {
os.Exit(1)
}
+ metricsPort := 0
+ if *metricsScrapeInterval != 0 {
+ metricsPort, err = scraper.GetFreePort()
+ if err != nil {
+ log.Error(err, "getting a free port, metrics scraping disabled")
+ }
+ metricsScrapeURL = ptr.To(fmt.Sprintf("http://localhost:%d/metrics", metricsPort))
+ }
+
// start the minimal kueue manager process
- wg.Add(1)
- err = runCommand(ctx, *outputDir, *minimalKueuePath, "kubeconfig", *withCpuProfile, *withLogs, *logToFile, *logLevel, errCh, wg)
+ err = runCommand(ctx, *outputDir, *minimalKueuePath, "kubeconfig", *withCpuProfile, *withLogs, *logToFile, *logLevel, errCh, wg, metricsPort)
if err != nil {
log.Error(err, "MinimalKueue start")
os.Exit(1)
@@ -156,24 +171,31 @@ func main() {
}
generationDoneCh := make(chan struct{})
- wg.Add(1)
err := runGenerator(ctx, cfg, *generatorConfig, errCh, wg, generationDoneCh)
if err != nil {
log.Error(err, "Generator start")
os.Exit(1)
}
- wg.Add(1)
recorder, err := startRecorder(ctx, errCh, wg, generationDoneCh, *timeout)
if err != nil {
log.Error(err, "Recorder start")
os.Exit(1)
}
- wg.Add(1)
+ if *metricsScrapeInterval != 0 && *metricsScrapeURL != "" {
+ dumpTar := path.Join(*outputDir, "metricsDump.tgz")
+ err := runScraper(ctx, *metricsScrapeInterval, dumpTar, *metricsScrapeURL, errCh, wg)
+ if err != nil {
+ log.Error(err, "Scraper start")
+ os.Exit(1)
+ }
+
+ }
+
err = runManager(ctx, cfg, errCh, wg, recorder)
if err != nil {
- log.Error(err, "manager start")
+ log.Error(err, "Failed to start manager")
os.Exit(1)
}
@@ -222,8 +244,7 @@ func main() {
}
}
-func runCommand(ctx context.Context, workDir, cmdPath, kubeconfig string, withCPUProf, withLogs, logToFile bool, logLevel int, errCh chan<- error, wg *sync.WaitGroup) error {
- defer wg.Done()
+func runCommand(ctx context.Context, workDir, cmdPath, kubeconfig string, withCPUProf, withLogs, logToFile bool, logLevel int, errCh chan<- error, wg *sync.WaitGroup, metricsPort int) error {
log := ctrl.LoggerFrom(ctx).WithName("Run command")
cmd := exec.CommandContext(ctx, cmdPath, "--kubeconfig", path.Join(workDir, kubeconfig))
@@ -260,6 +281,10 @@ func runCommand(ctx context.Context, workDir, cmdPath, kubeconfig string, withCP
cmd.Stderr = errWriter
}
+ if metricsPort != 0 {
+ cmd.Args = append(cmd.Args, "--metricsPort", strconv.Itoa(metricsPort))
+ }
+
log.Info("Starting process", "path", cmd.Path, "args", cmd.Args)
err := cmd.Start()
if err != nil {
@@ -305,8 +330,6 @@ func runCommand(ctx context.Context, workDir, cmdPath, kubeconfig string, withCP
}
func runGenerator(ctx context.Context, cfg *rest.Config, generatorConfig string, errCh chan<- error, wg *sync.WaitGroup, genDone chan<- struct{}) error {
- defer wg.Done()
-
log := ctrl.LoggerFrom(ctx).WithName("Run generator")
c, err := client.New(cfg, client.Options{Scheme: scheme})
if err != nil {
@@ -341,7 +364,6 @@ func runGenerator(ctx context.Context, cfg *rest.Config, generatorConfig string,
}
func startRecorder(ctx context.Context, errCh chan<- error, wg *sync.WaitGroup, genDone <-chan struct{}, recordTimeout time.Duration) (*recorder.Recorder, error) {
- defer wg.Done()
log := ctrl.LoggerFrom(ctx).WithName("Start recorder")
recorder := recorder.New(recordTimeout)
wg.Add(1)
@@ -361,7 +383,6 @@ func startRecorder(ctx context.Context, errCh chan<- error, wg *sync.WaitGroup,
}
func runManager(ctx context.Context, cfg *rest.Config, errCh chan<- error, wg *sync.WaitGroup, r *recorder.Recorder) error {
- defer wg.Done()
log := ctrl.LoggerFrom(ctx).WithName("Run manager")
options := ctrl.Options{
@@ -401,3 +422,24 @@ func runManager(ctx context.Context, cfg *rest.Config, errCh chan<- error, wg *s
log.Info("Manager started")
return nil
}
+
+func runScraper(ctx context.Context, interval time.Duration, output, url string, errCh chan<- error, wg *sync.WaitGroup) error {
+ log := ctrl.LoggerFrom(ctx).WithName("Run metrics scraper")
+
+ s := scraper.NewScraper(interval, url, "%d.prometheus")
+
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ err := s.Run(ctx, output)
+ if err != nil {
+ log.Error(err, "Running the scraper")
+ errCh <- err
+ return
+ }
+ log.Info("Scrape done")
+ }()
+
+ log.Info("Scrape started")
+ return nil
+}
diff --git a/test/scalability/runner/scraper/scraper.go b/test/scalability/runner/scraper/scraper.go
new file mode 100644
index 0000000000..298aa7e722
--- /dev/null
+++ b/test/scalability/runner/scraper/scraper.go
@@ -0,0 +1,124 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package scraper
+
+import (
+ "archive/tar"
+ "bytes"
+ "compress/gzip"
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "net"
+ "net/http"
+ "os"
+ "time"
+)
+
+func GetFreePort() (int, error) {
+ l, err := net.Listen("tcp", ":0")
+ if err != nil {
+ return 0, err
+ }
+ defer l.Close()
+ l.Close()
+ if taddr, isTcp := l.Addr().(*net.TCPAddr); isTcp {
+ return taddr.Port, nil
+ }
+ return 0, errors.New("cannot get a free tcp address")
+}
+
+type Scraper struct {
+ interval time.Duration
+ url string
+ fileNameFormat string
+ c http.Client
+}
+
+func NewScraper(interval time.Duration, url, fileNameFormat string) *Scraper {
+ return &Scraper{
+ interval: interval,
+ url: url,
+ fileNameFormat: fileNameFormat,
+ c: http.Client{},
+ }
+}
+
+func (s *Scraper) doScrape(ctx context.Context, tw *tar.Writer) error {
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, s.url, nil)
+ if err != nil {
+ return err
+ }
+
+ start := time.Now()
+
+ resp, err := s.c.Do(req)
+ if err != nil {
+ return err
+ }
+ defer resp.Body.Close()
+
+ tmp := bytes.NewBuffer(nil)
+ contentLen, err := io.Copy(tmp, resp.Body)
+ if err != nil {
+ return err
+ }
+
+ hdr := &tar.Header{
+ Name: fmt.Sprintf(s.fileNameFormat, start.UnixMilli()),
+ Size: contentLen,
+ Mode: 0666,
+ ModTime: start,
+ }
+
+ err = tw.WriteHeader(hdr)
+ if err != nil {
+ return err
+ }
+
+ _, err = io.Copy(tw, tmp)
+ return err
+}
+
+func (s *Scraper) Run(ctx context.Context, output string) error {
+ ticker := time.NewTicker(s.interval)
+ defer ticker.Stop()
+
+ out, err := os.Create(output)
+ if err != nil {
+ return err
+ }
+
+ defer out.Close()
+ gw := gzip.NewWriter(out)
+ defer gw.Close()
+ tw := tar.NewWriter(gw)
+ defer tw.Close()
+
+ for {
+ select {
+ case <-ctx.Done():
+ return nil
+ case <-ticker.C:
+ err := s.doScrape(ctx, tw)
+ if err != nil {
+ return err
+ }
+ }
+ }
+}
From 13750338549f986efda121583e6261afedca8cc6 Mon Sep 17 00:00:00 2001
From: jiangjiang <86391540+googs1025@users.noreply.github.com>
Date: Wed, 24 Apr 2024 23:33:15 +0800
Subject: [PATCH 26/49] docs: web url (#2055)
---
site/content/en/docs/tasks/run/jobsets.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/site/content/en/docs/tasks/run/jobsets.md b/site/content/en/docs/tasks/run/jobsets.md
index 5d57953578..f9ab64b9a0 100644
--- a/site/content/en/docs/tasks/run/jobsets.md
+++ b/site/content/en/docs/tasks/run/jobsets.md
@@ -7,7 +7,7 @@ description: >
Run a Kueue scheduled JobSet.
---
-This document explains how you can use Kueue’s scheduling and resource management functionality when running [JobSet Operator](https://github.com/kubernetes-sigs/jobset) [JobSets](https://github.com/kubernetes-sigs/jobset/blob/main/docs/concepts/README.md).
+This document explains how you can use Kueue’s scheduling and resource management functionality when running [JobSet Operator](https://github.com/kubernetes-sigs/jobset) [JobSet](https://jobset.sigs.k8s.io/docs/concepts/).
This guide is for [batch users](/docs/tasks#batch-user) that have a basic understanding of Kueue. For more information, see [Kueue's overview](/docs/overview).
From 9890f41dfbdaef632ffcc17cfb99e61ae00d9ca7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?=
Date: Wed, 24 Apr 2024 17:59:57 +0200
Subject: [PATCH 27/49] Add a note on PodsReady timeout requeuing (#2053)
Co-authored-by: Yuki Iwai
---
.../en/docs/tasks/manage/setup_sequential_admission.md | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/site/content/en/docs/tasks/manage/setup_sequential_admission.md b/site/content/en/docs/tasks/manage/setup_sequential_admission.md
index b2d63cc24a..70cc1f3333 100644
--- a/site/content/en/docs/tasks/manage/setup_sequential_admission.md
+++ b/site/content/en/docs/tasks/manage/setup_sequential_admission.md
@@ -47,6 +47,7 @@ fields:
requeuingStrategy:
timestamp: Eviction | Creation
backoffLimitCount: 5
+ backoffBaseSeconds: 10
```
{{% alert title="Note" color="primary" %}}
@@ -93,6 +94,14 @@ If you don't specify any value for `backoffLimitCount`,
a Workload is repeatedly and endlessly re-queued to the queue based on the `timestamp`.
Once the number of re-queues reaches the limit, Kueue [deactivates the Workload](/docs/concepts/workload/#active).
+{{% alert title="Note" color="primary" %}}
+_The `backoffBaseSeconds` is available in Kueue v0.7.0 and later_
+{{% /alert %}}
+The time to re-queue a workload after each consecutive timeout is increased
+exponentially, with the exponent of 2. The first delay is determined by the
+`backoffBaseSeconds` parameter (defaulting to 10). So, after the consecutive timeouts
+the evicted workload is re-queued after approximately `10, 20, 40, ...` seconds.
+
## Example
In this example we demonstrate the impact of enabling `waitForPodsReady` in Kueue.
From 9ea94ac20f4a4b5546ab899c60f3627b23bd0a74 Mon Sep 17 00:00:00 2001
From: jiangjiang <86391540+googs1025@users.noreply.github.com>
Date: Thu, 25 Apr 2024 12:09:41 +0800
Subject: [PATCH 28/49] cleanup useless comments, and fix receiver names are
different (#2060)
---
.../admissionchecks/multikueue/workload.go | 40 +++++++++----------
pkg/controller/jobframework/reconciler.go | 2 +-
pkg/queue/cluster_queue.go | 10 ++---
pkg/util/testingjobs/jobset/wrappers.go | 3 +-
pkg/util/testingjobs/mxjob/wrappers.go | 2 +-
pkg/util/testingjobs/pod/wrappers.go | 2 +-
pkg/util/testingjobs/raycluster/wrappers.go | 2 +-
7 files changed, 31 insertions(+), 30 deletions(-)
diff --git a/pkg/controller/admissionchecks/multikueue/workload.go b/pkg/controller/admissionchecks/multikueue/workload.go
index 35caa886ec..dd2d4f0f64 100644
--- a/pkg/controller/admissionchecks/multikueue/workload.go
+++ b/pkg/controller/admissionchecks/multikueue/workload.go
@@ -152,11 +152,11 @@ func (g *wlGroup) RemoveRemoteObjects(ctx context.Context, cluster string) error
return nil
}
-func (a *wlReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) {
+func (w *wlReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) {
log := ctrl.LoggerFrom(ctx)
log.V(2).Info("Reconcile Workload")
wl := &kueue.Workload{}
- if err := a.client.Get(ctx, req.NamespacedName, wl); err != nil {
+ if err := w.client.Get(ctx, req.NamespacedName, wl); err != nil {
return reconcile.Result{}, client.IgnoreNotFound(err)
}
// NOTE: the not found needs to be treated and should result in the deletion of all the remote workloads.
@@ -164,7 +164,7 @@ func (a *wlReconciler) Reconcile(ctx context.Context, req reconcile.Request) (re
// 1. use a finalizer
// 2. try to trigger the remote deletion from an event filter.
- mkAc, err := a.multikueueAC(ctx, wl)
+ mkAc, err := w.multikueueAC(ctx, wl)
if err != nil {
return reconcile.Result{}, err
}
@@ -174,7 +174,7 @@ func (a *wlReconciler) Reconcile(ctx context.Context, req reconcile.Request) (re
return reconcile.Result{}, nil
}
- adapter, owner := a.adapter(wl)
+ adapter, owner := w.adapter(wl)
if adapter == nil {
// Reject the workload since there is no chance for it to run.
var rejectionMessage string
@@ -183,24 +183,24 @@ func (a *wlReconciler) Reconcile(ctx context.Context, req reconcile.Request) (re
} else {
rejectionMessage = "No multikueue adapter found"
}
- return reconcile.Result{}, a.updateACS(ctx, wl, mkAc, kueue.CheckStateRejected, rejectionMessage)
+ return reconcile.Result{}, w.updateACS(ctx, wl, mkAc, kueue.CheckStateRejected, rejectionMessage)
}
- managed, unmanagedReason, err := adapter.IsJobManagedByKueue(ctx, a.client, types.NamespacedName{Name: owner.Name, Namespace: wl.Namespace})
+ managed, unmanagedReason, err := adapter.IsJobManagedByKueue(ctx, w.client, types.NamespacedName{Name: owner.Name, Namespace: wl.Namespace})
if err != nil {
return reconcile.Result{}, err
}
if !managed {
- return reconcile.Result{}, a.updateACS(ctx, wl, mkAc, kueue.CheckStateRejected, fmt.Sprintf("The owner is not managed by Kueue: %s", unmanagedReason))
+ return reconcile.Result{}, w.updateACS(ctx, wl, mkAc, kueue.CheckStateRejected, fmt.Sprintf("The owner is not managed by Kueue: %s", unmanagedReason))
}
- grp, err := a.readGroup(ctx, wl, mkAc.Name, adapter, owner.Name)
+ grp, err := w.readGroup(ctx, wl, mkAc.Name, adapter, owner.Name)
if err != nil {
return reconcile.Result{}, err
}
- return a.reconcileGroup(ctx, grp)
+ return w.reconcileGroup(ctx, grp)
}
func (w *wlReconciler) updateACS(ctx context.Context, wl *kueue.Workload, acs *kueue.AdmissionCheckState, status kueue.CheckState, message string) error {
@@ -252,8 +252,8 @@ func (w *wlReconciler) adapter(local *kueue.Workload) (jobAdapter, *metav1.Owner
return nil, nil
}
-func (a *wlReconciler) readGroup(ctx context.Context, local *kueue.Workload, acName string, adapter jobAdapter, controllerName string) (*wlGroup, error) {
- rClients, err := a.remoteClientsForAC(ctx, acName)
+func (w *wlReconciler) readGroup(ctx context.Context, local *kueue.Workload, acName string, adapter jobAdapter, controllerName string) (*wlGroup, error) {
+ rClients, err := w.remoteClientsForAC(ctx, acName)
if err != nil {
return nil, fmt.Errorf("admission check %q: %w", acName, err)
}
@@ -281,7 +281,7 @@ func (a *wlReconciler) readGroup(ctx context.Context, local *kueue.Workload, acN
return &grp, nil
}
-func (a *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reconcile.Result, error) {
+func (w *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reconcile.Result, error) {
log := ctrl.LoggerFrom(ctx).WithValues("op", "reconcileGroup")
log.V(3).Info("Reconcile Workload Group")
@@ -298,7 +298,7 @@ func (a *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reco
}
if !workload.HasQuotaReservation(group.local) && acs.State == kueue.CheckStateRetry {
- errs = append(errs, a.updateACS(ctx, group.local, acs, kueue.CheckStatePending, "Requeued"))
+ errs = append(errs, w.updateACS(ctx, group.local, acs, kueue.CheckStatePending, "Requeued"))
}
return reconcile.Result{}, errors.Join(errs...)
@@ -309,7 +309,7 @@ func (a *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reco
// it should not be problematic but the "From remote xxxx:" could be lost ....
if group.jobAdapter != nil {
- if err := group.jobAdapter.SyncJob(ctx, a.client, group.remoteClients[remote].client, group.controllerKey, group.local.Name, a.origin); err != nil {
+ if err := group.jobAdapter.SyncJob(ctx, w.client, group.remoteClients[remote].client, group.controllerKey, group.local.Name, w.origin); err != nil {
log.V(2).Error(err, "copying remote controller status", "workerCluster", remote)
// we should retry this
return reconcile.Result{}, err
@@ -326,7 +326,7 @@ func (a *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reco
Reason: remoteFinishedCond.Reason,
Message: remoteFinishedCond.Message,
})
- return reconcile.Result{}, a.client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(ControllerName+"-finish"), client.ForceOwnership)
+ return reconcile.Result{}, w.client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(ControllerName+"-finish"), client.ForceOwnership)
}
// 2. delete all workloads that are out of sync or are not in the chosen worker
@@ -355,7 +355,7 @@ func (a *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reco
}
acs := workload.FindAdmissionCheck(group.local.Status.AdmissionChecks, group.acName)
- if err := group.jobAdapter.SyncJob(ctx, a.client, group.remoteClients[reservingRemote].client, group.controllerKey, group.local.Name, a.origin); err != nil {
+ if err := group.jobAdapter.SyncJob(ctx, w.client, group.remoteClients[reservingRemote].client, group.controllerKey, group.local.Name, w.origin); err != nil {
log.V(2).Error(err, "creating remote controller object", "remote", reservingRemote)
// We'll retry this in the next reconcile.
return reconcile.Result{}, err
@@ -374,16 +374,16 @@ func (a *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reco
wlPatch := workload.BaseSSAWorkload(group.local)
workload.SetAdmissionCheckState(&wlPatch.Status.AdmissionChecks, *acs)
- err := a.client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(ControllerName), client.ForceOwnership)
+ err := w.client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(ControllerName), client.ForceOwnership)
if err != nil {
return reconcile.Result{}, err
}
}
- return reconcile.Result{RequeueAfter: a.workerLostTimeout}, nil
+ return reconcile.Result{RequeueAfter: w.workerLostTimeout}, nil
} else if acs.State == kueue.CheckStateReady {
// If there is no reserving and the AC is ready, the connection with the reserving remote might
// be lost, keep the workload admitted for keepReadyTimeout and put it back in the queue after that.
- remainingWaitTime := a.workerLostTimeout - time.Since(acs.LastTransitionTime.Time)
+ remainingWaitTime := w.workerLostTimeout - time.Since(acs.LastTransitionTime.Time)
if remainingWaitTime > 0 {
log.V(3).Info("Reserving remote lost, retry", "retryAfter", remainingWaitTime)
return reconcile.Result{RequeueAfter: remainingWaitTime}, nil
@@ -393,7 +393,7 @@ func (a *wlReconciler) reconcileGroup(ctx context.Context, group *wlGroup) (reco
acs.LastTransitionTime = metav1.NewTime(time.Now())
wlPatch := workload.BaseSSAWorkload(group.local)
workload.SetAdmissionCheckState(&wlPatch.Status.AdmissionChecks, *acs)
- return reconcile.Result{}, a.client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(ControllerName), client.ForceOwnership)
+ return reconcile.Result{}, w.client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(ControllerName), client.ForceOwnership)
}
}
diff --git a/pkg/controller/jobframework/reconciler.go b/pkg/controller/jobframework/reconciler.go
index e61257e4ac..2e412cde88 100644
--- a/pkg/controller/jobframework/reconciler.go
+++ b/pkg/controller/jobframework/reconciler.go
@@ -142,7 +142,7 @@ func WithManagerName(n string) Option {
}
}
-// WithLabelKeysToCopy
+// WithLabelKeysToCopy adds the label keys
func WithLabelKeysToCopy(n []string) Option {
return func(o *Options) {
o.LabelKeysToCopy = n
diff --git a/pkg/queue/cluster_queue.go b/pkg/queue/cluster_queue.go
index d252e07591..8d5179d5f4 100644
--- a/pkg/queue/cluster_queue.go
+++ b/pkg/queue/cluster_queue.go
@@ -372,7 +372,7 @@ func (c *ClusterQueue) totalElements() []*workload.Info {
return elements
}
-// Returns true if the queue is active
+// Active returns true if the queue is active
func (c *ClusterQueue) Active() bool {
c.rwm.RLock()
defer c.rwm.RUnlock()
@@ -388,11 +388,11 @@ func (c *ClusterQueue) Active() bool {
// compete with other workloads, until cluster events free up quota.
// The workload should not be reinserted if it's already in the ClusterQueue.
// Returns true if the workload was inserted.
-func (cq *ClusterQueue) RequeueIfNotPresent(wInfo *workload.Info, reason RequeueReason) bool {
- if cq.queueingStrategy == kueue.StrictFIFO {
- return cq.requeueIfNotPresent(wInfo, reason != RequeueReasonNamespaceMismatch)
+func (c *ClusterQueue) RequeueIfNotPresent(wInfo *workload.Info, reason RequeueReason) bool {
+ if c.queueingStrategy == kueue.StrictFIFO {
+ return c.requeueIfNotPresent(wInfo, reason != RequeueReasonNamespaceMismatch)
}
- return cq.requeueIfNotPresent(wInfo, reason == RequeueReasonFailedAfterNomination || reason == RequeueReasonPendingPreemption)
+ return c.requeueIfNotPresent(wInfo, reason == RequeueReasonFailedAfterNomination || reason == RequeueReasonPendingPreemption)
}
// queueOrderingFunc returns a function used by the clusterQueue heap algorithm
diff --git a/pkg/util/testingjobs/jobset/wrappers.go b/pkg/util/testingjobs/jobset/wrappers.go
index 1b4a746f88..f1a167c196 100644
--- a/pkg/util/testingjobs/jobset/wrappers.go
+++ b/pkg/util/testingjobs/jobset/wrappers.go
@@ -109,7 +109,7 @@ func (j *JobSetWrapper) Label(k, v string) *JobSetWrapper {
return j
}
-// Annotation sets annotations to the JobSet.
+// Annotations sets annotations to the JobSet.
func (j *JobSetWrapper) Annotations(annotations map[string]string) *JobSetWrapper {
j.ObjectMeta.Annotations = annotations
return j
@@ -162,6 +162,7 @@ func (j *JobSetWrapper) Condition(c metav1.Condition) *JobSetWrapper {
return j
}
+// ManagedBy adds a managedby.
func (j *JobSetWrapper) ManagedBy(c string) *JobSetWrapper {
j.Spec.ManagedBy = &c
return j
diff --git a/pkg/util/testingjobs/mxjob/wrappers.go b/pkg/util/testingjobs/mxjob/wrappers.go
index 5e06fe95bc..22fe790d25 100644
--- a/pkg/util/testingjobs/mxjob/wrappers.go
+++ b/pkg/util/testingjobs/mxjob/wrappers.go
@@ -185,7 +185,7 @@ func (j *MXJobWrapper) NodeSelector(k, v string) *MXJobWrapper {
RoleNodeSelector(kftraining.MXJobReplicaTypeWorker, k, v)
}
-// NodeSelector updates the nodeSelector of job.
+// RoleNodeSelector updates the nodeSelector of job.
func (j *MXJobWrapper) RoleNodeSelector(role kftraining.ReplicaType, k, v string) *MXJobWrapper {
if j.Spec.MXReplicaSpecs[role].Template.Spec.NodeSelector == nil {
j.Spec.MXReplicaSpecs[role].Template.Spec.NodeSelector = make(map[string]string)
diff --git a/pkg/util/testingjobs/pod/wrappers.go b/pkg/util/testingjobs/pod/wrappers.go
index f62390749c..a6e95ff337 100644
--- a/pkg/util/testingjobs/pod/wrappers.go
+++ b/pkg/util/testingjobs/pod/wrappers.go
@@ -84,7 +84,7 @@ func (p *PodWrapper) Queue(q string) *PodWrapper {
return p.Label(constants.QueueLabel, q)
}
-// Queue updates the queue name of the Pod
+// PriorityClass updates the priority class name of the Pod
func (p *PodWrapper) PriorityClass(pc string) *PodWrapper {
p.Spec.PriorityClassName = pc
return p
diff --git a/pkg/util/testingjobs/raycluster/wrappers.go b/pkg/util/testingjobs/raycluster/wrappers.go
index 7d55e93bdf..08aa3820d7 100644
--- a/pkg/util/testingjobs/raycluster/wrappers.go
+++ b/pkg/util/testingjobs/raycluster/wrappers.go
@@ -74,7 +74,7 @@ func MakeCluster(name, ns string) *ClusterWrapper {
}}
}
-// NodeSelector adds a node selector to the job's head.
+// NodeSelectorHeadGroup adds a node selector to the job's head.
func (j *ClusterWrapper) NodeSelectorHeadGroup(k, v string) *ClusterWrapper {
j.Spec.HeadGroupSpec.Template.Spec.NodeSelector[k] = v
return j
From 36486486c80d31e3435fdb56e8debe801dcf246d Mon Sep 17 00:00:00 2001
From: Traian Schiau <55734665+trasc@users.noreply.github.com>
Date: Thu, 25 Apr 2024 16:53:38 +0300
Subject: [PATCH 29/49] Adapt scalability range spec to the CI results (#2043)
* Adapt scalability rangespec to the CI results
* Round up
---
test/scalability/default_rangespec.yaml | 37 ++++++++++++++++++-------
1 file changed, 27 insertions(+), 10 deletions(-)
diff --git a/test/scalability/default_rangespec.yaml b/test/scalability/default_rangespec.yaml
index cd13b6714b..018c9472c7 100644
--- a/test/scalability/default_rangespec.yaml
+++ b/test/scalability/default_rangespec.yaml
@@ -1,15 +1,32 @@
-# Until we have a clear picture on how the setup
-# performs in CI keep the values "very relaxed"
+# The values are computed based on the result of 5 trial runs:
+# - #1782760671465705472
+# - #1782764439129296896
+# - #1782768037514973184
+# - #1782772615836864512
+# - #1782775995984515072
cmd:
- maxWallMs: 3600_000 #1h
- maxUserMs: 3600_000
- maxSysMs: 3600_000
- maxrss: 1024_000 #1000MiB
+ # Average value 351116.4 (+/- 0.9%), setting at +5%
+ maxWallMs: 368_000
+
+ # Average value 111500 (+/- 14%), setting at +20%
+ maxUserMs: 134_000
+
+ # Average value 27875 (+/- 16%), setting at +20%
+ maxSysMs: 34_000
+
+ # Average value 445012 (+/- 0.3%), setting at +5%
+ maxrss: 468_000
clusterQueueClassesMinUsage:
- cq: 10 #10%
+ # Average value 58.7 (+/- 1.2%), setting at -5%
+ cq: 56 #%
wlClassesMaxAvgTimeToAdmissionMs:
- large: 3600_000 #1h
- medium: 3600_000
- small: 3600_000
+ # Average value 6666 (+/- 14%), setting at +20%
+ large: 8_000
+
+ # Average value 76768 (+/- 2%), setting at +5%
+ medium: 81_000
+
+ # Average value 215468 (+/- 2%), setting at +5%
+ small: 227_000
From abc2a8b819f16310f684e470c00ca29ae3d9dbf5 Mon Sep 17 00:00:00 2001
From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com>
Date: Thu, 25 Apr 2024 10:21:23 -0400
Subject: [PATCH 30/49] Cleanup CQ and cohort resource stats (#2058)
* Cleanup CQ and cohort resource stats
Change-Id: I8831ccc0cf566058ac1dbd11591ad3d761121a75
* Remove unused types
Change-Id: Ib5afb1319644e05a008c84fbd90de0625c266a60
---
pkg/cache/cache_test.go | 386 ++++++++++++---------------------
pkg/cache/clusterqueue.go | 55 +----
pkg/cache/clusterqueue_test.go | 77 ++-----
pkg/cache/snapshot.go | 24 +-
pkg/cache/snapshot_test.go | 252 +++++++--------------
5 files changed, 251 insertions(+), 543 deletions(-)
diff --git a/pkg/cache/cache_test.go b/pkg/cache/cache_test.go
index 98c9d312cc..77c85dfadb 100644
--- a/pkg/cache/cache_test.go
+++ b/pkg/cache/cache_test.go
@@ -127,11 +127,8 @@ func TestCacheClusterQueueOperations(t *testing.T) {
AdmittedUsage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 10_000,
- Lendable: 10_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
},
Status: active,
Preemption: defaultPreemption,
@@ -159,11 +156,8 @@ func TestCacheClusterQueueOperations(t *testing.T) {
AdmittedUsage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 15_000,
- Lendable: 15_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 15_000,
},
Status: active,
Preemption: defaultPreemption,
@@ -210,11 +204,8 @@ func TestCacheClusterQueueOperations(t *testing.T) {
AdmittedUsage: FlavorResourceQuantities{
"nonexistent-flavor": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 15_000,
- Lendable: 15_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 15_000,
},
Status: pending,
Preemption: defaultPreemption,
@@ -303,11 +294,8 @@ func TestCacheClusterQueueOperations(t *testing.T) {
AdmittedUsage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 10_000,
- Lendable: 10_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
},
Status: active,
Preemption: defaultPreemption,
@@ -335,11 +323,8 @@ func TestCacheClusterQueueOperations(t *testing.T) {
AdmittedUsage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 15_000,
- Lendable: 15_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 15_000,
},
Status: active,
Preemption: defaultPreemption,
@@ -388,11 +373,8 @@ func TestCacheClusterQueueOperations(t *testing.T) {
AdmittedUsage: FlavorResourceQuantities{
"nonexistent-flavor": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 15_000,
- Lendable: 15_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 15_000,
},
Status: pending,
Preemption: defaultPreemption,
@@ -478,11 +460,8 @@ func TestCacheClusterQueueOperations(t *testing.T) {
AdmittedUsage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 5_000,
- Lendable: 5_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 5_000,
},
Status: active,
Preemption: defaultPreemption,
@@ -547,11 +526,8 @@ func TestCacheClusterQueueOperations(t *testing.T) {
AdmittedUsage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 5_000,
- Lendable: 4_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 4_000,
},
Status: active,
Preemption: defaultPreemption,
@@ -614,11 +590,8 @@ func TestCacheClusterQueueOperations(t *testing.T) {
AdmittedUsage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 15_000,
- Lendable: 15_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 15_000,
},
Status: active,
Preemption: defaultPreemption,
@@ -657,11 +630,8 @@ func TestCacheClusterQueueOperations(t *testing.T) {
AdmittedUsage: FlavorResourceQuantities{
"nonexistent-flavor": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 15_000,
- Lendable: 15_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 15_000,
},
Status: pending,
Preemption: defaultPreemption,
@@ -722,11 +692,8 @@ func TestCacheClusterQueueOperations(t *testing.T) {
AdmittedUsage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 10_000,
- Lendable: 10_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
},
Status: active,
Preemption: defaultPreemption,
@@ -754,11 +721,8 @@ func TestCacheClusterQueueOperations(t *testing.T) {
AdmittedUsage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 15_000,
- Lendable: 15_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 15_000,
},
Status: active,
Preemption: defaultPreemption,
@@ -805,11 +769,8 @@ func TestCacheClusterQueueOperations(t *testing.T) {
AdmittedUsage: FlavorResourceQuantities{
"nonexistent-flavor": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 15_000,
- Lendable: 15_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 15_000,
},
Status: active,
Preemption: defaultPreemption,
@@ -934,10 +895,10 @@ func TestCacheClusterQueueOperations(t *testing.T) {
"example.com/gpu": 0,
},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {},
- corev1.ResourceMemory: {},
- "example.com/gpu": {},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 0,
+ corev1.ResourceMemory: 0,
+ "example.com/gpu": 0,
},
Status: pending,
Preemption: defaultPreemption,
@@ -1108,12 +1069,8 @@ func TestCacheClusterQueueOperations(t *testing.T) {
FlavorFungibility: defaultFlavorFungibility,
Usage: FlavorResourceQuantities{"f1": {corev1.ResourceCPU: 2000}},
AdmittedUsage: FlavorResourceQuantities{"f1": {corev1.ResourceCPU: 1000}},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: &QuotaStats{
- Nominal: 10_000,
- Lendable: 10_000,
- Usage: 2_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
},
Workloads: map[string]*workload.Info{
"ns/reserving": {
@@ -1147,6 +1104,113 @@ func TestCacheClusterQueueOperations(t *testing.T) {
},
wantCohorts: map[string]sets.Set[string]{},
},
+ {
+ name: "add CQ with multiple resource groups and flavors",
+ enableLendingLimit: true,
+ operation: func(cache *Cache) error {
+ cq := utiltesting.MakeClusterQueue("foo").
+ ResourceGroup(
+ kueue.FlavorQuotas{
+ Name: "on-demand",
+ Resources: []kueue.ResourceQuota{
+ {
+ Name: corev1.ResourceCPU,
+ NominalQuota: resource.MustParse("10"),
+ LendingLimit: ptr.To(resource.MustParse("8")),
+ },
+ {
+ Name: corev1.ResourceMemory,
+ NominalQuota: resource.MustParse("10Gi"),
+ LendingLimit: ptr.To(resource.MustParse("8Gi")),
+ },
+ },
+ },
+ kueue.FlavorQuotas{
+ Name: "spot",
+ Resources: []kueue.ResourceQuota{
+ {
+ Name: corev1.ResourceCPU,
+ NominalQuota: resource.MustParse("20"),
+ LendingLimit: ptr.To(resource.MustParse("20")),
+ },
+ {
+ Name: corev1.ResourceMemory,
+ NominalQuota: resource.MustParse("20Gi"),
+ LendingLimit: ptr.To(resource.MustParse("20Gi")),
+ },
+ },
+ },
+ ).
+ ResourceGroup(
+ kueue.FlavorQuotas{
+ Name: "license",
+ Resources: []kueue.ResourceQuota{
+ {
+ Name: "license",
+ NominalQuota: resource.MustParse("8"),
+ LendingLimit: ptr.To(resource.MustParse("4")),
+ },
+ },
+ },
+ ).
+ Obj()
+ return cache.AddClusterQueue(context.Background(), cq)
+ },
+ wantClusterQueues: map[string]*ClusterQueue{
+ "foo": {
+ Name: "foo",
+ NamespaceSelector: labels.Everything(),
+ Status: pending,
+ Preemption: defaultPreemption,
+ AllocatableResourceGeneration: 1,
+ FlavorFungibility: defaultFlavorFungibility,
+ GuaranteedQuota: FlavorResourceQuantities{
+ "on-demand": {
+ corev1.ResourceCPU: 2_000,
+ corev1.ResourceMemory: 2 * utiltesting.Gi,
+ },
+ "spot": {
+ corev1.ResourceCPU: 0,
+ corev1.ResourceMemory: 0,
+ },
+ "license": {
+ "license": 4,
+ },
+ },
+ Usage: FlavorResourceQuantities{
+ "on-demand": {
+ corev1.ResourceCPU: 0,
+ corev1.ResourceMemory: 0,
+ },
+ "spot": {
+ corev1.ResourceCPU: 0,
+ corev1.ResourceMemory: 0,
+ },
+ "license": {
+ "license": 0,
+ },
+ },
+ AdmittedUsage: FlavorResourceQuantities{
+ "on-demand": {
+ corev1.ResourceCPU: 0,
+ corev1.ResourceMemory: 0,
+ },
+ "spot": {
+ corev1.ResourceCPU: 0,
+ corev1.ResourceMemory: 0,
+ },
+ "license": {
+ "license": 0,
+ },
+ },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 28_000,
+ corev1.ResourceMemory: 28 * utiltesting.Gi,
+ "license": 4,
+ },
+ },
+ },
+ },
}
for _, tc := range cases {
@@ -1250,7 +1314,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
type result struct {
Workloads sets.Set[string]
UsedResources FlavorResourceQuantities
- ResourceStats ResourceStats
}
steps := []struct {
@@ -1285,11 +1348,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
"two": {
Workloads: sets.New("/c", "/d"),
@@ -1297,11 +1355,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
},
},
@@ -1324,11 +1377,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
"two": {
Workloads: sets.New("/c"),
@@ -1336,11 +1384,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
},
},
@@ -1362,11 +1405,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
"two": {
Workloads: sets.New("/c"),
@@ -1374,11 +1412,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
},
},
@@ -1401,11 +1434,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
"two": {
Workloads: sets.New("/a", "/c"),
@@ -1413,11 +1441,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
},
},
@@ -1440,11 +1463,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
"two": {
Workloads: sets.New("/c"),
@@ -1452,11 +1470,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
},
},
@@ -1479,11 +1492,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
"two": {
Workloads: sets.New("/c"),
@@ -1491,11 +1499,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
},
},
@@ -1517,11 +1520,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
"two": {
Workloads: sets.New("/c", "/d"),
@@ -1529,11 +1527,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
},
},
@@ -1552,11 +1545,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
"two": {
Workloads: sets.New("/c"),
@@ -1564,11 +1552,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
},
},
@@ -1585,11 +1568,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
"two": {
Workloads: sets.New("/c"),
@@ -1597,11 +1575,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
},
},
@@ -1619,11 +1592,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
"two": {
Workloads: sets.New("/c"),
@@ -1631,11 +1599,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
},
},
@@ -1655,11 +1618,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
"two": {
Workloads: sets.New("/c"),
@@ -1667,11 +1625,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
},
},
@@ -1690,11 +1643,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
"two": {
Workloads: sets.New("/c"),
@@ -1702,11 +1650,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
},
},
@@ -1737,11 +1680,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 20},
"spot": {corev1.ResourceCPU: 30},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 50,
- },
- },
},
"two": {
Workloads: sets.New("/c", "/e"),
@@ -1749,11 +1687,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
},
wantAssumedWorkloads: map[string]string{
@@ -1780,11 +1713,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
"two": {
Workloads: sets.New("/c"),
@@ -1792,11 +1720,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
},
wantAssumedWorkloads: map[string]string{},
@@ -1830,11 +1753,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
"two": {
Workloads: sets.New("/c", "/e"),
@@ -1842,11 +1760,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
},
wantAssumedWorkloads: map[string]string{
@@ -1872,11 +1785,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
"two": {
Workloads: sets.New("/c"),
@@ -1884,11 +1792,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 0},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 0,
- },
- },
},
},
},
@@ -1924,11 +1827,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 20},
"spot": {corev1.ResourceCPU: 30},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 50,
- },
- },
},
"two": {
Workloads: sets.New("/c", "/e"),
@@ -1936,11 +1834,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
"on-demand": {corev1.ResourceCPU: 10},
"spot": {corev1.ResourceCPU: 15},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Usage: 25,
- },
- },
},
},
wantAssumedWorkloads: map[string]string{
@@ -1967,7 +1860,6 @@ func TestCacheWorkloadOperations(t *testing.T) {
gotResult[name] = result{
Workloads: sets.KeySet(cq.Workloads),
UsedResources: cq.Usage,
- ResourceStats: cq.ResourceStats,
}
}
if diff := cmp.Diff(step.wantResults, gotResult); diff != "" {
diff --git a/pkg/cache/clusterqueue.go b/pkg/cache/clusterqueue.go
index 142ee5ad73..abccb9f8e3 100644
--- a/pkg/cache/clusterqueue.go
+++ b/pkg/cache/clusterqueue.go
@@ -40,16 +40,6 @@ var (
errQueueAlreadyExists = errors.New("queue already exists")
)
-// QuotaStats holds the nominal quota and usage for a resource.
-type QuotaStats struct {
- Nominal int64
- Lendable int64
- Usage int64
-}
-
-// ResourceStats holds QuotaStats for resources.
-type ResourceStats map[corev1.ResourceName]*QuotaStats
-
// ClusterQueue is the internal implementation of kueue.ClusterQueue that
// holds admitted workloads.
type ClusterQueue struct {
@@ -75,8 +65,8 @@ type ClusterQueue struct {
// deleted, or the resource groups are changed.
AllocatableResourceGeneration int64
- // ResourceStats holds nominal quota and usage for the resources of the ClusterQueue, independent of the flavor.
- ResourceStats ResourceStats
+ // Lendable holds the total lendable quota for the resources of the ClusterQueue, independent of the flavor.
+ Lendable map[corev1.ResourceName]int64
// The following fields are not populated in a snapshot.
@@ -101,7 +91,7 @@ type Cohort struct {
// RequestableResources equals to the sum of LendingLimit when feature LendingLimit enabled.
RequestableResources FlavorResourceQuantities
Usage FlavorResourceQuantities
- ResourceStats ResourceStats
+ Lendable map[corev1.ResourceName]int64
// AllocatableResourceGeneration equals to
// the sum of allocatable generation among its members.
AllocatableResourceGeneration int64
@@ -265,29 +255,10 @@ func filterFlavorQuantities(orig FlavorResourceQuantities, resourceGroups []kueu
return ret
}
-// resetResourceStatsFromResourceGroups maintains the Usage stats for the given resource groups
-// and resets Nominal and Lendable values. They are calculated again in updateResourceGroups.
-func (c *ClusterQueue) resetResourceStatsFromResourceGroups(resourceGroups []kueue.ResourceGroup) {
- updatedResourceStats := make(ResourceStats, len(resourceGroups))
- for _, rg := range resourceGroups {
- for _, res := range rg.CoveredResources {
- if oStats := c.ResourceStats[res]; oStats != nil {
- updatedResourceStats[res] = &QuotaStats{
- Usage: c.ResourceStats[res].Usage,
- // Reset Nominal and Lendable.
- }
- } else {
- updatedResourceStats[res] = &QuotaStats{}
- }
- }
- }
- c.ResourceStats = updatedResourceStats
-}
-
func (c *ClusterQueue) updateResourceGroups(in []kueue.ResourceGroup) {
oldRG := c.ResourceGroups
c.ResourceGroups = make([]ResourceGroup, len(in))
- c.resetResourceStatsFromResourceGroups(in)
+ c.Lendable = make(map[corev1.ResourceName]int64)
for i, rgIn := range in {
rg := &c.ResourceGroups[i]
*rg = ResourceGroup{
@@ -305,15 +276,14 @@ func (c *ClusterQueue) updateResourceGroups(in []kueue.ResourceGroup) {
rQuota := ResourceQuota{
Nominal: nominal,
}
- c.ResourceStats[rIn.Name].Nominal += nominal
if rIn.BorrowingLimit != nil {
rQuota.BorrowingLimit = ptr.To(workload.ResourceValue(rIn.Name, *rIn.BorrowingLimit))
}
if features.Enabled(features.LendingLimit) && rIn.LendingLimit != nil {
rQuota.LendingLimit = ptr.To(workload.ResourceValue(rIn.Name, *rIn.LendingLimit))
- c.ResourceStats[rIn.Name].Lendable += *rQuota.LendingLimit
+ c.Lendable[rIn.Name] += *rQuota.LendingLimit
} else {
- c.ResourceStats[rIn.Name].Lendable += nominal
+ c.Lendable[rIn.Name] += nominal
}
fQuotas.Resources[rIn.Name] = &rQuota
}
@@ -499,7 +469,6 @@ func (c *ClusterQueue) reportActiveWorkloads() {
func (c *ClusterQueue) updateWorkloadUsage(wi *workload.Info, m int64) {
admitted := workload.IsAdmitted(wi.Obj)
updateFlavorUsage(wi, c.Usage, m)
- updateResourceStats(wi, c.ResourceStats, m)
if admitted {
updateFlavorUsage(wi, c.AdmittedUsage, m)
c.admittedWorkloadsCount += int(m)
@@ -515,16 +484,6 @@ func (c *ClusterQueue) updateWorkloadUsage(wi *workload.Info, m int64) {
}
}
-func updateResourceStats(wi *workload.Info, rStats ResourceStats, m int64) {
- for _, ps := range wi.TotalRequests {
- for res, v := range ps.Requests {
- if _, exists := rStats[res]; exists {
- rStats[res].Usage += v * m
- }
- }
- }
-}
-
func updateFlavorUsage(wi *workload.Info, flvUsage FlavorResourceQuantities, m int64) {
for _, ps := range wi.TotalRequests {
for wlRes, wlResFlv := range ps.Flavors {
@@ -723,7 +682,7 @@ func (c *ClusterQueue) dominantResourceShare(wlReq FlavorResourceQuantities, m i
var drs int64 = -1
var dRes corev1.ResourceName
for rName, b := range borrowing {
- if lendable := c.Cohort.ResourceStats[rName].Lendable; lendable > 0 {
+ if lendable := c.Cohort.Lendable[rName]; lendable > 0 {
ratio := b * 1000 / lendable
// Use alphabetical order to get a deterministic resource name.
if ratio > drs || (ratio == drs && rName < dRes) {
diff --git a/pkg/cache/clusterqueue_test.go b/pkg/cache/clusterqueue_test.go
index 9ad9d5bd8b..79666a99a9 100644
--- a/pkg/cache/clusterqueue_test.go
+++ b/pkg/cache/clusterqueue_test.go
@@ -816,17 +816,9 @@ func TestDominantResourceShare(t *testing.T) {
},
},
Cohort: &Cohort{
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 10_000,
- Lendable: 10_000,
- Usage: 2_000,
- },
- "example.com/gpu": {
- Nominal: 10,
- Lendable: 10,
- Usage: 6,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
+ "example.com/gpu": 10,
},
},
},
@@ -857,17 +849,9 @@ func TestDominantResourceShare(t *testing.T) {
},
},
Cohort: &Cohort{
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 10_000,
- Lendable: 10_000,
- Usage: 10_000,
- },
- "example.com/gpu": {
- Nominal: 10,
- Lendable: 10,
- Usage: 10,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
+ "example.com/gpu": 10,
},
},
},
@@ -900,17 +884,9 @@ func TestDominantResourceShare(t *testing.T) {
},
},
Cohort: &Cohort{
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 10_000,
- Lendable: 10_000,
- Usage: 10_000,
- },
- "example.com/gpu": {
- Nominal: 10,
- Lendable: 10,
- Usage: 10,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
+ "example.com/gpu": 10,
},
},
},
@@ -943,17 +919,9 @@ func TestDominantResourceShare(t *testing.T) {
},
},
Cohort: &Cohort{
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 10_000,
- Lendable: 10_000,
- Usage: 2_000,
- },
- "example.com/gpu": {
- Nominal: 10,
- Lendable: 10,
- Usage: 6,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
+ "example.com/gpu": 10,
},
},
},
@@ -993,16 +961,9 @@ func TestDominantResourceShare(t *testing.T) {
},
},
Cohort: &Cohort{
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 10_000,
- Lendable: 10_000,
- Usage: 2_000,
- },
- "example.com/gpu": {
- Nominal: 10_000,
- Usage: 5_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
+ "example.com/gpu": 0,
},
},
},
@@ -1048,12 +1009,8 @@ func TestDominantResourceShare(t *testing.T) {
},
},
Cohort: &Cohort{
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 200_000,
- Lendable: 200_000,
- Usage: 20_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 200_000,
},
},
},
diff --git a/pkg/cache/snapshot.go b/pkg/cache/snapshot.go
index de5fa3ec35..c7da94daaa 100644
--- a/pkg/cache/snapshot.go
+++ b/pkg/cache/snapshot.go
@@ -23,7 +23,6 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
- "k8s.io/utils/ptr"
kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
"sigs.k8s.io/kueue/pkg/features"
@@ -55,14 +54,12 @@ func (s *Snapshot) AddWorkload(wl *workload.Info) {
func (c *ClusterQueue) addOrRemoveWorkload(wl *workload.Info, m int64) {
updateFlavorUsage(wl, c.Usage, m)
- updateResourceStats(wl, c.ResourceStats, m)
if c.Cohort != nil {
if features.Enabled(features.LendingLimit) {
updateCohortUsage(wl, c, m)
} else {
updateFlavorUsage(wl, c.Cohort.Usage, m)
}
- updateResourceStats(wl, c.Cohort.ResourceStats, m)
}
}
@@ -138,7 +135,7 @@ func (c *ClusterQueue) snapshot() *ClusterQueue {
FlavorFungibility: c.FlavorFungibility,
AllocatableResourceGeneration: c.AllocatableResourceGeneration,
Usage: make(FlavorResourceQuantities, len(c.Usage)),
- ResourceStats: make(ResourceStats, len(c.ResourceStats)),
+ Lendable: maps.Clone(c.Lendable),
Workloads: maps.Clone(c.Workloads),
Preemption: c.Preemption,
NamespaceSelector: c.NamespaceSelector,
@@ -152,9 +149,6 @@ func (c *ClusterQueue) snapshot() *ClusterQueue {
if features.Enabled(features.LendingLimit) {
cc.GuaranteedQuota = c.GuaranteedQuota
}
- for rName, rStats := range c.ResourceStats {
- cc.ResourceStats[rName] = ptr.To(*rStats)
- }
return cc
}
@@ -203,17 +197,11 @@ func (c *ClusterQueue) accumulateResources(cohort *Cohort) {
used[res] += val
}
}
- if cohort.ResourceStats == nil {
- cohort.ResourceStats = make(ResourceStats, len(c.ResourceStats))
- }
- for rName, rStats := range c.ResourceStats {
- cohortRStats := cohort.ResourceStats[rName]
- if cohortRStats == nil {
- cohort.ResourceStats[rName] = ptr.To(*rStats)
- continue
+ if cohort.Lendable == nil {
+ cohort.Lendable = maps.Clone(c.Lendable)
+ } else {
+ for res, v := range c.Lendable {
+ cohort.Lendable[res] += v
}
- cohortRStats.Nominal += rStats.Nominal
- cohortRStats.Lendable += rStats.Lendable
- cohortRStats.Usage += rStats.Usage
}
}
diff --git a/pkg/cache/snapshot_test.go b/pkg/cache/snapshot_test.go
index 787c33c6fa..3a869f3aa5 100644
--- a/pkg/cache/snapshot_test.go
+++ b/pkg/cache/snapshot_test.go
@@ -223,17 +223,9 @@ func TestSnapshot(t *testing.T) {
"example.com/gpu": 15,
},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 400_000,
- Lendable: 400_000,
- Usage: 20_000,
- },
- "example.com/gpu": {
- Nominal: 50,
- Lendable: 50,
- Usage: 15,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 400_000,
+ "example.com/gpu": 50,
},
}
return Snapshot{
@@ -267,12 +259,8 @@ func TestSnapshot(t *testing.T) {
"demand": {corev1.ResourceCPU: 10_000},
"spot": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 300_000,
- Lendable: 300_000,
- Usage: 10_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 300_000,
},
Workloads: map[string]*workload.Info{
"/alpha": workload.NewInfo(utiltesting.MakeWorkload("alpha", "").
@@ -322,17 +310,9 @@ func TestSnapshot(t *testing.T) {
"example.com/gpu": 15,
},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 100_000,
- Lendable: 100_000,
- Usage: 10_000,
- },
- "example.com/gpu": {
- Nominal: 50,
- Lendable: 50,
- Usage: 15,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 100_000,
+ "example.com/gpu": 50,
},
Workloads: map[string]*workload.Info{
"/beta": workload.NewInfo(utiltesting.MakeWorkload("beta", "").
@@ -383,11 +363,8 @@ func TestSnapshot(t *testing.T) {
corev1.ResourceCPU: 0,
},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 100_000,
- Lendable: 100_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 100_000,
},
Preemption: defaultPreemption,
NamespaceSelector: labels.Everything(),
@@ -492,12 +469,8 @@ func TestSnapshot(t *testing.T) {
corev1.ResourceCPU: 0,
},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 60_000,
- Lendable: 30_000,
- Usage: 25_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 30_000,
},
}
return Snapshot{
@@ -539,12 +512,8 @@ func TestSnapshot(t *testing.T) {
"arm": {corev1.ResourceCPU: 15_000},
"x86": {corev1.ResourceCPU: 10_000},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 30_000,
- Lendable: 15_000,
- Usage: 25_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 15_000,
},
Workloads: map[string]*workload.Info{
"/alpha": workload.NewInfo(utiltesting.MakeWorkload("alpha", "").
@@ -618,11 +587,8 @@ func TestSnapshot(t *testing.T) {
"arm": {corev1.ResourceCPU: 0},
"x86": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 30_000,
- Lendable: 15_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 15_000,
},
Preemption: defaultPreemption,
NamespaceSelector: labels.Everything(),
@@ -771,15 +737,9 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) {
"alpha": {corev1.ResourceMemory: 0},
"beta": {corev1.ResourceMemory: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 12_000,
- Lendable: 12_000,
- },
- corev1.ResourceMemory: {
- Nominal: 12 * utiltesting.Gi,
- Lendable: 12 * utiltesting.Gi,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 12_000,
+ corev1.ResourceMemory: 12 * utiltesting.Gi,
},
}
return Snapshot{
@@ -796,15 +756,9 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) {
"alpha": {corev1.ResourceMemory: 0},
"beta": {corev1.ResourceMemory: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 6_000,
- Lendable: 6_000,
- },
- corev1.ResourceMemory: {
- Nominal: 12 * utiltesting.Gi,
- Lendable: 12 * utiltesting.Gi,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 6_000,
+ corev1.ResourceMemory: 12 * utiltesting.Gi,
},
},
"c2": {
@@ -817,11 +771,8 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) {
Usage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 6_000,
- Lendable: 6_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 6_000,
},
},
},
@@ -840,17 +791,9 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) {
"alpha": {corev1.ResourceMemory: utiltesting.Gi},
"beta": {corev1.ResourceMemory: utiltesting.Gi},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 12_000,
- Lendable: 12_000,
- Usage: 2_000,
- },
- corev1.ResourceMemory: {
- Nominal: 12 * utiltesting.Gi,
- Lendable: 12 * utiltesting.Gi,
- Usage: 2 * utiltesting.Gi,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 12_000,
+ corev1.ResourceMemory: 12 * utiltesting.Gi,
},
}
return Snapshot{
@@ -870,16 +813,9 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) {
"alpha": {corev1.ResourceMemory: utiltesting.Gi},
"beta": {corev1.ResourceMemory: utiltesting.Gi},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 6_000,
- Lendable: 6_000,
- },
- corev1.ResourceMemory: {
- Nominal: 12 * utiltesting.Gi,
- Lendable: 12 * utiltesting.Gi,
- Usage: 2 * utiltesting.Gi,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 6_000,
+ corev1.ResourceMemory: 12 * utiltesting.Gi,
},
},
"c2": {
@@ -895,12 +831,8 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) {
Usage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 2_000},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 6_000,
- Lendable: 6_000,
- Usage: 2_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 6_000,
},
},
},
@@ -919,17 +851,9 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) {
"alpha": {corev1.ResourceMemory: 0},
"beta": {corev1.ResourceMemory: utiltesting.Gi},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 12_000,
- Lendable: 12_000,
- Usage: 3_000,
- },
- corev1.ResourceMemory: {
- Nominal: 12 * utiltesting.Gi,
- Lendable: 12 * utiltesting.Gi,
- Usage: utiltesting.Gi,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 12_000,
+ corev1.ResourceMemory: 12 * utiltesting.Gi,
},
}
return Snapshot{
@@ -949,17 +873,9 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) {
"alpha": {corev1.ResourceMemory: 0},
"beta": {corev1.ResourceMemory: utiltesting.Gi},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 6_000,
- Lendable: 6_000,
- Usage: 1_000,
- },
- corev1.ResourceMemory: {
- Nominal: 12 * utiltesting.Gi,
- Lendable: 12 * utiltesting.Gi,
- Usage: utiltesting.Gi,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 6_000,
+ corev1.ResourceMemory: 12 * utiltesting.Gi,
},
},
"c2": {
@@ -975,12 +891,8 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) {
Usage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 2_000},
},
- ResourceStats: ResourceStats{
- corev1.ResourceCPU: {
- Nominal: 6_000,
- Lendable: 6_000,
- Usage: 2_000,
- },
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 6_000,
},
},
},
@@ -1095,8 +1007,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
Usage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 20_000, Lendable: 10_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
},
}
return Snapshot{
@@ -1116,8 +1028,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
corev1.ResourceCPU: 6_000,
},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 10_000, Lendable: 4_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 4_000,
},
},
"lend-b": {
@@ -1135,8 +1047,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
corev1.ResourceCPU: 4_000,
},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 10_000, Lendable: 6_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 6_000,
},
},
},
@@ -1153,8 +1065,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
Usage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 1_000},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 20_000, Lendable: 10_000, Usage: 11_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
},
}
return Snapshot{
@@ -1174,8 +1086,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
corev1.ResourceCPU: 6_000,
},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 10_000, Lendable: 4_000, Usage: 7_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 4_000,
},
},
"lend-b": {
@@ -1193,8 +1105,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
corev1.ResourceCPU: 4_000,
},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 10_000, Lendable: 6_000, Usage: 4_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 6_000,
},
},
},
@@ -1211,8 +1123,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
Usage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 20_000, Lendable: 10_000, Usage: 10_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
},
}
return Snapshot{
@@ -1232,8 +1144,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
corev1.ResourceCPU: 6_000,
},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 10_000, Lendable: 4_000, Usage: 6_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 4_000,
},
},
"lend-b": {
@@ -1251,8 +1163,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
corev1.ResourceCPU: 4_000,
},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 10_000, Lendable: 6_000, Usage: 4_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 6_000,
},
},
},
@@ -1269,8 +1181,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
Usage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 20_000, Lendable: 10_000, Usage: 5_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
},
}
return Snapshot{
@@ -1290,8 +1202,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
corev1.ResourceCPU: 6_000,
},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 10_000, Lendable: 4_000, Usage: 1_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 4_000,
},
},
"lend-b": {
@@ -1309,8 +1221,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
corev1.ResourceCPU: 4_000,
},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 10_000, Lendable: 6_000, Usage: 4_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 6_000,
},
},
},
@@ -1328,8 +1240,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
Usage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 20_000, Lendable: 10_000, Usage: 1_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
},
}
return Snapshot{
@@ -1349,8 +1261,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
corev1.ResourceCPU: 6_000,
},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 10_000, Lendable: 4_000, Usage: 1_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 4_000,
},
},
"lend-b": {
@@ -1368,8 +1280,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
corev1.ResourceCPU: 4_000,
},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 10_000, Lendable: 6_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 6_000,
},
},
},
@@ -1387,8 +1299,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
Usage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 0},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 20_000, Lendable: 10_000, Usage: 6_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
},
}
return Snapshot{
@@ -1408,8 +1320,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
corev1.ResourceCPU: 6_000,
},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 10_000, Lendable: 4_000, Usage: 6_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 4_000,
},
},
"lend-b": {
@@ -1427,8 +1339,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
corev1.ResourceCPU: 4_000,
},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 10_000, Lendable: 6_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 6_000,
},
},
},
@@ -1446,8 +1358,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
Usage: FlavorResourceQuantities{
"default": {corev1.ResourceCPU: 3_000},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 20_000, Lendable: 10_000, Usage: 9_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 10_000,
},
}
return Snapshot{
@@ -1467,8 +1379,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
corev1.ResourceCPU: 6_000,
},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 10_000, Lendable: 4_000, Usage: 9_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 4_000,
},
},
"lend-b": {
@@ -1486,8 +1398,8 @@ func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
corev1.ResourceCPU: 4_000,
},
},
- ResourceStats: ResourceStats{
- "cpu": {Nominal: 10_000, Lendable: 6_000},
+ Lendable: map[corev1.ResourceName]int64{
+ corev1.ResourceCPU: 6_000,
},
},
},
From 8b4e4b34573d3257ce5db15425fe1c9aed7b1eee Mon Sep 17 00:00:00 2001
From: Patryk Bundyra <73306396+PBundyra@users.noreply.github.com>
Date: Thu, 25 Apr 2024 17:54:18 +0200
Subject: [PATCH 31/49] Add AdmissionCheckStrategy documentation (#1996)
* Add AdmissionCheckStrategy documentation
* Add an AdmissionCheck section in the ClusterQueue's site
* Update site/content/en/docs/concepts/admission_check.md
Co-authored-by: Yuki Iwai
* Apply suggestions from code review
Co-authored-by: Yuki Iwai
* Apply suggestions from code review
Co-authored-by: Yuki Iwai
* Link ClusterQueue documentation, improve the Usage section
---------
Co-authored-by: Yuki Iwai
---
.../en/docs/concepts/admission_check.md | 44 ++++++++++++++++++-
.../content/en/docs/concepts/cluster_queue.md | 4 ++
site/static/examples/admin/minimal-cq.yaml | 11 +++++
3 files changed, 57 insertions(+), 2 deletions(-)
create mode 100644 site/static/examples/admin/minimal-cq.yaml
diff --git a/site/content/en/docs/concepts/admission_check.md b/site/content/en/docs/concepts/admission_check.md
index 23058cb88d..d6f1d48e92 100644
--- a/site/content/en/docs/concepts/admission_check.md
+++ b/site/content/en/docs/concepts/admission_check.md
@@ -38,11 +38,51 @@ spec:
name: prov-test-config
```
-### ClusterQueue admissionChecks
+### Usage
-Once defined, an AdmissionCheck can be referenced in the ClusterQueues' spec. All Workloads associated with the queue need to be evaluated by the AdmissionCheck's controller before being admitted.
+Once defined, an AdmissionCheck can be referenced in the [ClusterQueue's spec](/docs/concepts/cluster_queue). All Workloads associated with the queue need to be evaluated by the AdmissionCheck's controller before being admitted.
Similarly to `ResourceFlavors`, if an `AdmissionCheck` is not found or its controller has not marked it as `Active`, the ClusterQueue will be marked as Inactive.
+There are two ways of referencing AdmissionChecks in the ClusterQueue's spec:
+
+- `.spec.admissionChecks` - is the list of AdmissionChecks that will be run for all Workloads submitted to the ClusterQueue
+- `.spec.admissionCheckStrategy` - wraps the list of `admissionCheckStrategyRules` that give you more flexibility. It allows you to both run an AdmissionCheck for all Workloads or to associate an AdmissionCheck
+with a specific ResourceFlavor. To specify ResourceFlavors that an AdmissionCheck should run for use the `admissionCheckStrategyRule.onFlavors` field, and if you want to run AdmissionCheck for all Workloads, simply leave the field empty.
+
+Only one of the above-mentioned fields can be specified at the time.
+
+See examples below:
+
+Using `.spec.admissionChecks`
+
+```yaml
+apiVersion: kueue.x-k8s.io/v1beta1
+kind: ClusterQueue
+metadata:
+ name: "cluster-queue"
+spec:
+<...>
+ admissionChecks:
+ - sample-prov
+```
+
+Using `.spec.admissionCheckStrategy`
+
+```yaml
+apiVersion: kueue.x-k8s.io/v1beta1
+kind: ClusterQueue
+metadata:
+ name: "cluster-queue"
+spec:
+<...>
+ admissionChecksStrategy:
+ admissionChecks:
+ - name: "sample-prov" # Name of the AdmissionCheck to be run
+ onFlavors: ["default-flavor"] # This AdmissionCheck will only run for Workloads that use default-flavor
+ - name: "sample-prov-2" # This AdmissionCheck will run for all Workloads regardless of a used ResourceFlavor
+```
+
+
### AdmissionCheckState
AdmissionCheckState is the way the state of an AdmissionCheck for a specific Workload is tracked.
diff --git a/site/content/en/docs/concepts/cluster_queue.md b/site/content/en/docs/concepts/cluster_queue.md
index 3679d3b09a..0b3e38a1ea 100644
--- a/site/content/en/docs/concepts/cluster_queue.md
+++ b/site/content/en/docs/concepts/cluster_queue.md
@@ -527,6 +527,10 @@ The `HoldAndDrain` will have a similar effect but, in addition, it will trigger
If set to `None` or `spec.stopPolicy` is removed the ClusterQueue will to normal admission behavior.
+## AdmissionChecks
+
+AdmissionChecks are a mechanism that allows Kueue to consider additional criteria before admitting a Workload. See [Admission Checks](/docs/concepts/admission_check#usage) for ClusterQueue's example configuration.
+
## What's next?
- Create [local queues](/docs/concepts/local_queue)
diff --git a/site/static/examples/admin/minimal-cq.yaml b/site/static/examples/admin/minimal-cq.yaml
new file mode 100644
index 0000000000..4718cc3887
--- /dev/null
+++ b/site/static/examples/admin/minimal-cq.yaml
@@ -0,0 +1,11 @@
+apiVersion: kueue.x-k8s.io/v1beta1
+kind: ClusterQueue
+metadata:
+ name: "cluster-queue-2"
+spec:
+ namespaceSelector: {} # match all.
+ admissionChecksStrategy:
+ admissionChecks:
+ - name: "sample-prov" # Name of the AdmissionCheck to be run
+ onFlavors: ["default-flavor"] # This AdmissionCheck will only run for Workloads that use default-flavor
+ - name: "sample-prov-2" # This AdmissionCheck will run for all Workloads regardless of a used ResourceFlavor
From ab2dac1db7c3f265a56ce0b0d8ff06678fca0b3d Mon Sep 17 00:00:00 2001
From: Patryk Bundyra <73306396+PBundyra@users.noreply.github.com>
Date: Thu, 25 Apr 2024 17:54:36 +0200
Subject: [PATCH 32/49] Add the ProvisioningRequest's classname annotation
(#2052)
* Add the ProvisioningRequest's classname annotation
* Update integration tests
---
.../admissionchecks/provisioning/constants.go | 7 ++++---
.../admissionchecks/provisioning/controller.go | 6 ++++--
.../admissionchecks/provisioning/controller_test.go | 11 +++++++----
.../provisioning/provisioning_test.go | 12 ++++++++----
4 files changed, 23 insertions(+), 13 deletions(-)
diff --git a/pkg/controller/admissionchecks/provisioning/constants.go b/pkg/controller/admissionchecks/provisioning/constants.go
index 6b421232a9..cb6f5051e9 100644
--- a/pkg/controller/admissionchecks/provisioning/constants.go
+++ b/pkg/controller/admissionchecks/provisioning/constants.go
@@ -17,9 +17,10 @@ limitations under the License.
package provisioning
const (
- ConfigKind = "ProvisioningRequestConfig"
- ControllerName = "kueue.x-k8s.io/provisioning-request"
- ConsumesAnnotationKey = "cluster-autoscaler.kubernetes.io/consume-provisioning-request"
+ ConfigKind = "ProvisioningRequestConfig"
+ ControllerName = "kueue.x-k8s.io/provisioning-request"
+ ConsumesAnnotationKey = "cluster-autoscaler.kubernetes.io/consume-provisioning-request"
+ ClassNameAnnotationKey = "cluster-autoscaler.kubernetes.io/provisioning-class-name"
CheckInactiveMessage = "the check is not active"
NoRequestNeeded = "the provisioning request is not needed"
diff --git a/pkg/controller/admissionchecks/provisioning/controller.go b/pkg/controller/admissionchecks/provisioning/controller.go
index d2a828c142..6ec7cc7165 100644
--- a/pkg/controller/admissionchecks/provisioning/controller.go
+++ b/pkg/controller/admissionchecks/provisioning/controller.go
@@ -573,8 +573,10 @@ func podSetUpdates(wl *kueue.Workload, pr *autoscaling.ProvisioningRequest) []ku
})
return slices.Map(pr.Spec.PodSets, func(ps *autoscaling.PodSet) kueue.PodSetUpdate {
return kueue.PodSetUpdate{
- Name: refMap[ps.PodTemplateRef.Name],
- Annotations: map[string]string{ConsumesAnnotationKey: pr.Name},
+ Name: refMap[ps.PodTemplateRef.Name],
+ Annotations: map[string]string{
+ ConsumesAnnotationKey: pr.Name,
+ ClassNameAnnotationKey: pr.Spec.ProvisioningClassName},
}
})
}
diff --git a/pkg/controller/admissionchecks/provisioning/controller_test.go b/pkg/controller/admissionchecks/provisioning/controller_test.go
index 1ceccd38b4..2600a86db8 100644
--- a/pkg/controller/admissionchecks/provisioning/controller_test.go
+++ b/pkg/controller/admissionchecks/provisioning/controller_test.go
@@ -547,12 +547,15 @@ func TestReconcile(t *testing.T) {
State: kueue.CheckStateReady,
PodSetUpdates: []kueue.PodSetUpdate{
{
- Name: "ps1",
- Annotations: map[string]string{"cluster-autoscaler.kubernetes.io/consume-provisioning-request": "wl-check1-1"},
+ Name: "ps1",
+ Annotations: map[string]string{
+ "cluster-autoscaler.kubernetes.io/consume-provisioning-request": "wl-check1-1",
+ "cluster-autoscaler.kubernetes.io/provisioning-class-name": "class1"},
},
{
- Name: "ps2",
- Annotations: map[string]string{"cluster-autoscaler.kubernetes.io/consume-provisioning-request": "wl-check1-1"},
+ Name: "ps2",
+ Annotations: map[string]string{"cluster-autoscaler.kubernetes.io/consume-provisioning-request": "wl-check1-1",
+ "cluster-autoscaler.kubernetes.io/provisioning-class-name": "class1"},
},
},
}, kueue.AdmissionCheckState{
diff --git a/test/integration/controller/admissionchecks/provisioning/provisioning_test.go b/test/integration/controller/admissionchecks/provisioning/provisioning_test.go
index 554ed4b055..34b6e11719 100644
--- a/test/integration/controller/admissionchecks/provisioning/provisioning_test.go
+++ b/test/integration/controller/admissionchecks/provisioning/provisioning_test.go
@@ -376,13 +376,15 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
{
Name: "ps1",
Annotations: map[string]string{
- provisioning.ConsumesAnnotationKey: provReqKey.Name,
+ provisioning.ConsumesAnnotationKey: provReqKey.Name,
+ provisioning.ClassNameAnnotationKey: prc.Spec.ProvisioningClassName,
},
},
{
Name: "ps2",
Annotations: map[string]string{
- provisioning.ConsumesAnnotationKey: provReqKey.Name,
+ provisioning.ConsumesAnnotationKey: provReqKey.Name,
+ provisioning.ClassNameAnnotationKey: prc.Spec.ProvisioningClassName,
},
},
}))
@@ -641,13 +643,15 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure
{
Name: "ps1",
Annotations: map[string]string{
- provisioning.ConsumesAnnotationKey: provReqKey.Name,
+ provisioning.ConsumesAnnotationKey: provReqKey.Name,
+ provisioning.ClassNameAnnotationKey: prc.Spec.ProvisioningClassName,
},
},
{
Name: "ps2",
Annotations: map[string]string{
- provisioning.ConsumesAnnotationKey: provReqKey.Name,
+ provisioning.ConsumesAnnotationKey: provReqKey.Name,
+ provisioning.ClassNameAnnotationKey: prc.Spec.ProvisioningClassName,
},
},
}))
From 8d5eba2c748778bd3529373f5a047cf1c1083d18 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Irving=20Mondrag=C3=B3n?=
Date: Thu, 25 Apr 2024 20:11:33 +0200
Subject: [PATCH 33/49] Validate admission check applies to all flavors (#2047)
* Add admission check to validate that it applies to all flavors
* Rename variables and constants
* Rename variables and constants
* Update unit test
* Rename variables and constants
* Rename variables and constants
---
apis/kueue/v1beta1/admissioncheck_types.go | 3 ++
pkg/cache/admissioncheck.go | 1 +
pkg/cache/cache.go | 1 +
pkg/cache/clusterqueue.go | 32 +++++++++++++------
pkg/cache/clusterqueue_test.go | 22 +++++++++++++
.../multikueue/admissioncheck.go | 19 +++++++++--
.../multikueue/admissioncheck_test.go | 6 ++++
pkg/util/testing/wrappers.go | 16 ++++++++++
.../integration/multikueue/multikueue_test.go | 6 ++++
9 files changed, 94 insertions(+), 12 deletions(-)
diff --git a/apis/kueue/v1beta1/admissioncheck_types.go b/apis/kueue/v1beta1/admissioncheck_types.go
index 0d5268fc9d..d22878ca1c 100644
--- a/apis/kueue/v1beta1/admissioncheck_types.go
+++ b/apis/kueue/v1beta1/admissioncheck_types.go
@@ -104,6 +104,9 @@ const (
// Having multiple AdmissionChecks managed by the same controller where at least one has this condition
// set to true will cause the ClusterQueue to be marked as Inactive.
AdmissionChecksSingleInstanceInClusterQueue string = "SingleInstanceInClusterQueue"
+
+ // FlavorIndependentAdmissionCheck indicates if the AdmissionCheck cannot be applied at ResourceFlavor level.
+ FlavorIndependentAdmissionCheck string = "FlavorIndependent"
)
// +genclient
diff --git a/pkg/cache/admissioncheck.go b/pkg/cache/admissioncheck.go
index 0e8d105390..85bc49a6eb 100644
--- a/pkg/cache/admissioncheck.go
+++ b/pkg/cache/admissioncheck.go
@@ -20,4 +20,5 @@ type AdmissionCheck struct {
Active bool
Controller string
SingleInstanceInClusterQueue bool
+ FlavorIndependent bool
}
diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go
index a6ef5912e0..30ebff183f 100644
--- a/pkg/cache/cache.go
+++ b/pkg/cache/cache.go
@@ -211,6 +211,7 @@ func (c *Cache) AddOrUpdateAdmissionCheck(ac *kueue.AdmissionCheck) sets.Set[str
Active: apimeta.IsStatusConditionTrue(ac.Status.Conditions, kueue.AdmissionCheckActive),
Controller: ac.Spec.ControllerName,
SingleInstanceInClusterQueue: apimeta.IsStatusConditionTrue(ac.Status.Conditions, kueue.AdmissionChecksSingleInstanceInClusterQueue),
+ FlavorIndependent: apimeta.IsStatusConditionTrue(ac.Status.Conditions, kueue.FlavorIndependentAdmissionCheck),
}
return c.updateClusterQueues()
diff --git a/pkg/cache/clusterqueue.go b/pkg/cache/clusterqueue.go
index abccb9f8e3..341465a7fa 100644
--- a/pkg/cache/clusterqueue.go
+++ b/pkg/cache/clusterqueue.go
@@ -72,13 +72,14 @@ type ClusterQueue struct {
AdmittedUsage FlavorResourceQuantities
// localQueues by (namespace/name).
- localQueues map[string]*queue
- podsReadyTracking bool
- hasMissingFlavors bool
- hasMissingOrInactiveAdmissionChecks bool
- hasMultipleSingleInstanceControllersChecks bool
- admittedWorkloadsCount int
- isStopped bool
+ localQueues map[string]*queue
+ podsReadyTracking bool
+ hasMissingFlavors bool
+ hasMissingOrInactiveAdmissionChecks bool
+ hasMultipleSingleInstanceControllersChecks bool
+ hasFlavorIndependentAdmissionCheckAppliedPerFlavor bool
+ admittedWorkloadsCount int
+ isStopped bool
}
// Cohort is a set of ClusterQueues that can borrow resources from each other.
@@ -309,7 +310,7 @@ func (c *ClusterQueue) UpdateRGByResource() {
func (c *ClusterQueue) updateQueueStatus() {
status := active
- if c.hasMissingFlavors || c.hasMissingOrInactiveAdmissionChecks || c.isStopped || c.hasMultipleSingleInstanceControllersChecks {
+ if c.hasMissingFlavors || c.hasMissingOrInactiveAdmissionChecks || c.isStopped || c.hasMultipleSingleInstanceControllersChecks || c.hasFlavorIndependentAdmissionCheckAppliedPerFlavor {
status = pending
}
if c.Status == terminating {
@@ -341,6 +342,10 @@ func (c *ClusterQueue) inactiveReason() (string, string) {
reasons = append(reasons, "MultipleSingleInstanceControllerChecks")
}
+ if c.hasFlavorIndependentAdmissionCheckAppliedPerFlavor {
+ reasons = append(reasons, "FlavorIndependentAdmissionCheckAppliedPerFlavor")
+ }
+
if len(reasons) == 0 {
return "Unknown", "Can't admit new workloads."
}
@@ -387,9 +392,10 @@ func (c *ClusterQueue) updateLabelKeys(flavors map[kueue.ResourceFlavorReference
// updateWithAdmissionChecks updates a ClusterQueue based on the passed AdmissionChecks set.
func (c *ClusterQueue) updateWithAdmissionChecks(checks map[string]AdmissionCheck) {
hasMissing := false
+ hasSpecificChecks := false
checksPerController := make(map[string]int, len(c.AdmissionChecks))
singleInstanceControllers := sets.New[string]()
- for acName := range c.AdmissionChecks {
+ for acName, flavors := range c.AdmissionChecks {
if ac, found := checks[acName]; !found {
hasMissing = true
} else {
@@ -400,6 +406,9 @@ func (c *ClusterQueue) updateWithAdmissionChecks(checks map[string]AdmissionChec
if ac.SingleInstanceInClusterQueue {
singleInstanceControllers.Insert(ac.Controller)
}
+ if ac.FlavorIndependent && flavors.Len() != 0 {
+ hasSpecificChecks = true
+ }
}
}
@@ -421,6 +430,11 @@ func (c *ClusterQueue) updateWithAdmissionChecks(checks map[string]AdmissionChec
update = true
}
+ if c.hasFlavorIndependentAdmissionCheckAppliedPerFlavor != hasSpecificChecks {
+ c.hasFlavorIndependentAdmissionCheckAppliedPerFlavor = hasSpecificChecks
+ update = true
+ }
+
if update {
c.updateQueueStatus()
}
diff --git a/pkg/cache/clusterqueue_test.go b/pkg/cache/clusterqueue_test.go
index 79666a99a9..427cfa4b25 100644
--- a/pkg/cache/clusterqueue_test.go
+++ b/pkg/cache/clusterqueue_test.go
@@ -474,6 +474,12 @@ func TestClusterQueueUpdateWithAdmissionCheck(t *testing.T) {
*utiltesting.MakeAdmissionCheckStrategyRule("check3").Obj()).
Obj()
+ cqWithACPerFlavor := utiltesting.MakeClusterQueue("cq3").
+ AdmissionCheckStrategy(
+ *utiltesting.MakeAdmissionCheckStrategyRule("check1", "flavor1", "flavor2", "flavor3").Obj(),
+ ).
+ Obj()
+
testcases := []struct {
name string
cq *kueue.ClusterQueue
@@ -646,6 +652,20 @@ func TestClusterQueueUpdateWithAdmissionCheck(t *testing.T) {
wantStatus: pending,
wantReason: "MultipleSingleInstanceControllerChecks",
},
+ {
+ name: "Active clusterQueue with a FlavorIndependent AC applied per ResourceFlavor",
+ cq: cqWithACPerFlavor,
+ cqStatus: pending,
+ admissionChecks: map[string]AdmissionCheck{
+ "check1": {
+ Active: true,
+ Controller: "controller1",
+ FlavorIndependent: true,
+ },
+ },
+ wantStatus: pending,
+ wantReason: "FlavorIndependentAdmissionCheckAppliedPerFlavor",
+ },
{
name: "Terminating clusterQueue updated with valid AC list",
cq: cqWithAC,
@@ -738,9 +758,11 @@ func TestClusterQueueUpdateWithAdmissionCheck(t *testing.T) {
if tc.cqStatus == active {
cq.hasMultipleSingleInstanceControllersChecks = false
cq.hasMissingOrInactiveAdmissionChecks = false
+ cq.hasFlavorIndependentAdmissionCheckAppliedPerFlavor = false
} else {
cq.hasMultipleSingleInstanceControllersChecks = true
cq.hasMissingOrInactiveAdmissionChecks = true
+ cq.hasFlavorIndependentAdmissionCheckAppliedPerFlavor = true
}
cq.updateWithAdmissionChecks(tc.admissionChecks)
diff --git a/pkg/controller/admissionchecks/multikueue/admissioncheck.go b/pkg/controller/admissionchecks/multikueue/admissioncheck.go
index c41745eb7d..b1e67906cc 100644
--- a/pkg/controller/admissionchecks/multikueue/admissioncheck.go
+++ b/pkg/controller/admissionchecks/multikueue/admissioncheck.go
@@ -39,9 +39,11 @@ import (
)
const (
- ControllerName = "kueue.x-k8s.io/multikueue"
- SingleInstanceReason = "MultiKueue"
- SingleInstanceMessage = "only one multikueue managed admission check can be used in one ClusterQueue"
+ ControllerName = "kueue.x-k8s.io/multikueue"
+ SingleInstanceReason = "MultiKueue"
+ SingleInstanceMessage = "only one multikueue managed admission check can be used in one ClusterQueue"
+ FlavorIndependentCheckReason = "MultiKueue"
+ FlavorIndependentCheckMessage = "admission check cannot be applied at ResourceFlavor level"
)
type multiKueueStoreHelper = admissioncheck.ConfigHelper[*kueuealpha.MultiKueueConfig, kueuealpha.MultiKueueConfig]
@@ -140,6 +142,17 @@ func (a *ACReconciler) Reconcile(ctx context.Context, req reconcile.Request) (re
needsUpdate = true
}
+ if !apimeta.IsStatusConditionTrue(ac.Status.Conditions, kueue.FlavorIndependentAdmissionCheck) {
+ apimeta.SetStatusCondition(&ac.Status.Conditions, metav1.Condition{
+ Type: kueue.FlavorIndependentAdmissionCheck,
+ Status: metav1.ConditionTrue,
+ Reason: FlavorIndependentCheckReason,
+ Message: FlavorIndependentCheckMessage,
+ ObservedGeneration: ac.Generation,
+ })
+ needsUpdate = true
+ }
+
if needsUpdate {
err := a.client.Status().Update(ctx, ac)
if err != nil {
diff --git a/pkg/controller/admissionchecks/multikueue/admissioncheck_test.go b/pkg/controller/admissionchecks/multikueue/admissioncheck_test.go
index 65cd958b38..204984c3a4 100644
--- a/pkg/controller/admissionchecks/multikueue/admissioncheck_test.go
+++ b/pkg/controller/admissionchecks/multikueue/admissioncheck_test.go
@@ -57,6 +57,7 @@ func TestReconcile(t *testing.T) {
ControllerName(ControllerName).
Parameters(kueuealpha.GroupVersion.Group, "MultiKueueConfig", "config1").
SingleInstanceInClusterQueue(true, SingleInstanceReason, SingleInstanceMessage, 1).
+ ApplyToAllFlavors(true, FlavorIndependentCheckReason, FlavorIndependentCheckMessage, 1).
Condition(metav1.Condition{
Type: kueue.AdmissionCheckActive,
Status: metav1.ConditionFalse,
@@ -97,6 +98,7 @@ func TestReconcile(t *testing.T) {
ControllerName(ControllerName).
Parameters(kueuealpha.GroupVersion.Group, "MultiKueueConfig", "config1").
SingleInstanceInClusterQueue(true, SingleInstanceReason, SingleInstanceMessage, 1).
+ ApplyToAllFlavors(true, FlavorIndependentCheckReason, FlavorIndependentCheckMessage, 1).
Condition(metav1.Condition{
Type: kueue.AdmissionCheckActive,
Status: metav1.ConditionFalse,
@@ -129,6 +131,7 @@ func TestReconcile(t *testing.T) {
ControllerName(ControllerName).
Parameters(kueuealpha.GroupVersion.Group, "MultiKueueConfig", "config1").
SingleInstanceInClusterQueue(true, SingleInstanceReason, SingleInstanceMessage, 1).
+ ApplyToAllFlavors(true, FlavorIndependentCheckReason, FlavorIndependentCheckMessage, 1).
Condition(metav1.Condition{
Type: kueue.AdmissionCheckActive,
Status: metav1.ConditionFalse,
@@ -164,6 +167,7 @@ func TestReconcile(t *testing.T) {
ControllerName(ControllerName).
Parameters(kueuealpha.GroupVersion.Group, "MultiKueueConfig", "config1").
SingleInstanceInClusterQueue(true, SingleInstanceReason, SingleInstanceMessage, 1).
+ ApplyToAllFlavors(true, FlavorIndependentCheckReason, FlavorIndependentCheckMessage, 1).
Condition(metav1.Condition{
Type: kueue.AdmissionCheckActive,
Status: metav1.ConditionFalse,
@@ -199,6 +203,7 @@ func TestReconcile(t *testing.T) {
ControllerName(ControllerName).
Parameters(kueuealpha.GroupVersion.Group, "MultiKueueConfig", "config1").
SingleInstanceInClusterQueue(true, SingleInstanceReason, SingleInstanceMessage, 1).
+ ApplyToAllFlavors(true, FlavorIndependentCheckReason, FlavorIndependentCheckMessage, 1).
Condition(metav1.Condition{
Type: kueue.AdmissionCheckActive,
Status: metav1.ConditionTrue,
@@ -231,6 +236,7 @@ func TestReconcile(t *testing.T) {
ControllerName(ControllerName).
Parameters(kueuealpha.GroupVersion.Group, "MultiKueueConfig", "config1").
SingleInstanceInClusterQueue(true, SingleInstanceReason, SingleInstanceMessage, 1).
+ ApplyToAllFlavors(true, FlavorIndependentCheckReason, FlavorIndependentCheckMessage, 1).
Condition(metav1.Condition{
Type: kueue.AdmissionCheckActive,
Status: metav1.ConditionTrue,
diff --git a/pkg/util/testing/wrappers.go b/pkg/util/testing/wrappers.go
index 036b0af2b2..6efa611e7e 100644
--- a/pkg/util/testing/wrappers.go
+++ b/pkg/util/testing/wrappers.go
@@ -918,6 +918,22 @@ func (ac *AdmissionCheckWrapper) SingleInstanceInClusterQueue(singleInstance boo
return ac
}
+func (ac *AdmissionCheckWrapper) ApplyToAllFlavors(applyToAllFlavors bool, reason, message string, observedGeneration int64) *AdmissionCheckWrapper {
+ cond := metav1.Condition{
+ Type: kueue.FlavorIndependentAdmissionCheck,
+ Status: metav1.ConditionTrue,
+ Reason: reason,
+ Message: message,
+ ObservedGeneration: observedGeneration,
+ }
+ if !applyToAllFlavors {
+ cond.Status = metav1.ConditionFalse
+ }
+
+ apimeta.SetStatusCondition(&ac.Status.Conditions, cond)
+ return ac
+}
+
func (ac *AdmissionCheckWrapper) Obj() *kueue.AdmissionCheck {
return &ac.AdmissionCheck
}
diff --git a/test/integration/multikueue/multikueue_test.go b/test/integration/multikueue/multikueue_test.go
index 6ac25042f1..980620202f 100644
--- a/test/integration/multikueue/multikueue_test.go
+++ b/test/integration/multikueue/multikueue_test.go
@@ -199,6 +199,12 @@ var _ = ginkgo.Describe("Multikueue", func() {
Reason: multikueue.SingleInstanceReason,
Message: multikueue.SingleInstanceMessage,
}, util.IgnoreConditionTimestampsAndObservedGeneration),
+ gomega.BeComparableTo(metav1.Condition{
+ Type: kueue.FlavorIndependentAdmissionCheck,
+ Status: metav1.ConditionTrue,
+ Reason: multikueue.FlavorIndependentCheckReason,
+ Message: multikueue.FlavorIndependentCheckMessage,
+ }, util.IgnoreConditionTimestampsAndObservedGeneration),
))
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})
From 3d764d5086ea156ae11f3d4b8bf60eef9fff7e3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?=
Date: Thu, 25 Apr 2024 20:11:42 +0200
Subject: [PATCH 34/49] Improve logging of workload status (#2062)
---
pkg/controller/core/workload_controller.go | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go
index 49c784a184..28fe8cd4b2 100644
--- a/pkg/controller/core/workload_controller.go
+++ b/pkg/controller/core/workload_controller.go
@@ -60,9 +60,10 @@ import (
const (
// statuses for logging purposes
- pending = "pending"
- admitted = "admitted"
- finished = "finished"
+ pending = "pending"
+ quotaReserved = "quotaReserved"
+ admitted = "admitted"
+ finished = "finished"
)
var (
@@ -539,12 +540,12 @@ func (r *WorkloadReconciler) Update(e event.UpdateEvent) bool {
log.V(2).Info("Queue for updated workload didn't exist; ignoring for now")
}
- case prevStatus == pending && status == admitted:
+ case prevStatus == pending && (status == quotaReserved || status == admitted):
r.queues.DeleteWorkload(oldWl)
if !r.cache.AddOrUpdateWorkload(wlCopy) {
log.V(2).Info("ClusterQueue for workload didn't exist; ignored for now")
}
- case prevStatus == admitted && status == pending:
+ case (prevStatus == quotaReserved || prevStatus == admitted) && status == pending:
// trigger the move of associated inadmissibleWorkloads, if there are any.
r.queues.QueueAssociatedInadmissibleWorkloadsAfter(ctx, wl, func() {
// Delete the workload from cache while holding the queues lock
@@ -660,9 +661,12 @@ func workloadStatus(w *kueue.Workload) string {
if apimeta.IsStatusConditionTrue(w.Status.Conditions, kueue.WorkloadFinished) {
return finished
}
- if workload.HasQuotaReservation(w) {
+ if workload.IsAdmitted(w) {
return admitted
}
+ if workload.HasQuotaReservation(w) {
+ return quotaReserved
+ }
return pending
}
From ce4be24ea11edc26fa207b9247ffd23e62bbd633 Mon Sep 17 00:00:00 2001
From: vladikkuzn <51460778+vladikkuzn@users.noreply.github.com>
Date: Thu, 25 Apr 2024 22:47:11 +0300
Subject: [PATCH 35/49] * Enable gci (#2069)
* Add make lint-fix for dev use
---
.golangci.yaml | 18 ++++++++++++++++++
Makefile | 4 ++++
2 files changed, 22 insertions(+)
diff --git a/.golangci.yaml b/.golangci.yaml
index 6c0fdd3289..227d26731f 100644
--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -19,6 +19,24 @@ linters-settings:
govet:
enable:
- nilness
+ gci:
+ # Section configuration to compare against.
+ # Section names are case-insensitive and may contain parameters in ().
+ # The default order of sections is `standard > default > custom > blank > dot > alias`,
+ # If `custom-order` is `true`, it follows the order of `sections` option.
+ # Default: ["standard", "default"]
+ sections:
+ - standard # Standard section: captures all standard packages.
+ - default # Default section: contains all imports that could not be matched to another section type.
+ - prefix(sigs.k8s.io/kueue) # Custom section: groups all imports with the specified Prefix.
+ # Skip generated files.
+ # Default: true
+ skip-generated: true
+ # Enable custom order of sections.
+ # If `true`, make the section order the same as the order of `sections`.
+ # Default: false
+ custom-order: true
+
# Settings for enabling and disabling linters
linters:
diff --git a/Makefile b/Makefile
index 691c5c011f..3b8ab0b7b0 100644
--- a/Makefile
+++ b/Makefile
@@ -271,6 +271,10 @@ run-scalability-in-cluster: envtest scalability-runner
ci-lint: golangci-lint
$(GOLANGCI_LINT) run --timeout 15m0s
+.PHONY: lint-fix
+lint-fix: golangci-lint
+ $(GOLANGCI_LINT) run --fix --timeout 15m0s
+
.PHONY: verify
verify: gomod-verify ci-lint fmt-verify shell-lint toc-verify manifests generate update-helm generate-apiref prepare-release-branch
git --no-pager diff --exit-code config/components apis charts/kueue/templates client-go site/
From e9915ae16962c0a26b5fcaf60e658f3f3a6290ce Mon Sep 17 00:00:00 2001
From: Traian Schiau <55734665+trasc@users.noreply.github.com>
Date: Fri, 26 Apr 2024 10:37:08 +0300
Subject: [PATCH 36/49] [Scalability] rename to performance/scheduler (#2065)
* Move `test/performance` to `test/performance/e2e`
* Move `test/scalability` to `test/performance/scheduler`.
* Rename scalability make targets and artifacts dirs.
* Add temporary test-scalability alias.
* Review
---
Makefile | 38 ++++++++++---------
test/performance/{ => e2e}/jobs/.env.example | 0
test/performance/{ => e2e}/jobs/.gitignore | 0
test/performance/{ => e2e}/jobs/README.md | 2 +-
test/performance/{ => e2e}/jobs/config.yaml | 0
test/performance/{ => e2e}/jobs/job.yaml | 0
.../{ => e2e}/jobs/local-queue.yaml | 0
.../jobs/prerequisites/cluster-queue.template | 0
.../jobs/prerequisites/resource-flavor.yaml | 0
test/performance/{ => e2e}/jobs/run-test.sh | 0
.../{ => e2e}/podgroups/.gitignore | 0
.../performance/{ => e2e}/podgroups/README.md | 2 +-
.../{ => e2e}/podgroups/manifest.diff | 0
.../{ => e2e}/podgroups/run-test.sh | 0
.../podgroups/templates/cluster-queue.yaml | 0
.../podgroups/templates/local-queue.yaml | 0
.../{ => e2e}/podgroups/templates/pod.yaml | 0
.../podgroups/templates/resource-flavor.yaml | 0
.../{ => e2e}/podgroups/test-config.yaml | 0
.../scheduler}/README.md | 32 ++++++++--------
.../scheduler}/checker/checker_test.go | 4 +-
.../scheduler}/default_generator_config.yaml | 0
.../scheduler}/default_rangespec.yaml | 0
.../scheduler}/minimalkueue/main.go | 0
.../runner/controller/controller.go | 4 +-
.../scheduler}/runner/generator/generator.go | 6 +--
.../runner/generator/generator_test.go | 0
.../scheduler}/runner/main.go | 10 ++---
.../scheduler}/runner/recorder/recorder.go | 2 +-
.../scheduler}/runner/scraper/scraper.go | 0
.../scheduler}/runner/stats/stats.go | 0
31 files changed, 52 insertions(+), 48 deletions(-)
rename test/performance/{ => e2e}/jobs/.env.example (100%)
rename test/performance/{ => e2e}/jobs/.gitignore (100%)
rename test/performance/{ => e2e}/jobs/README.md (98%)
rename test/performance/{ => e2e}/jobs/config.yaml (100%)
rename test/performance/{ => e2e}/jobs/job.yaml (100%)
rename test/performance/{ => e2e}/jobs/local-queue.yaml (100%)
rename test/performance/{ => e2e}/jobs/prerequisites/cluster-queue.template (100%)
rename test/performance/{ => e2e}/jobs/prerequisites/resource-flavor.yaml (100%)
rename test/performance/{ => e2e}/jobs/run-test.sh (100%)
rename test/performance/{ => e2e}/podgroups/.gitignore (100%)
rename test/performance/{ => e2e}/podgroups/README.md (97%)
rename test/performance/{ => e2e}/podgroups/manifest.diff (100%)
rename test/performance/{ => e2e}/podgroups/run-test.sh (100%)
rename test/performance/{ => e2e}/podgroups/templates/cluster-queue.yaml (100%)
rename test/performance/{ => e2e}/podgroups/templates/local-queue.yaml (100%)
rename test/performance/{ => e2e}/podgroups/templates/pod.yaml (100%)
rename test/performance/{ => e2e}/podgroups/templates/resource-flavor.yaml (100%)
rename test/performance/{ => e2e}/podgroups/test-config.yaml (100%)
rename test/{scalability => performance/scheduler}/README.md (61%)
rename test/{scalability => performance/scheduler}/checker/checker_test.go (96%)
rename test/{scalability => performance/scheduler}/default_generator_config.yaml (100%)
rename test/{scalability => performance/scheduler}/default_rangespec.yaml (100%)
rename test/{scalability => performance/scheduler}/minimalkueue/main.go (100%)
rename test/{scalability => performance/scheduler}/runner/controller/controller.go (97%)
rename test/{scalability => performance/scheduler}/runner/generator/generator.go (97%)
rename test/{scalability => performance/scheduler}/runner/generator/generator_test.go (100%)
rename test/{scalability => performance/scheduler}/runner/main.go (97%)
rename test/{scalability => performance/scheduler}/runner/recorder/recorder.go (99%)
rename test/{scalability => performance/scheduler}/runner/scraper/scraper.go (100%)
rename test/{scalability => performance/scheduler}/runner/stats/stats.go (100%)
diff --git a/Makefile b/Makefile
index 3b8ab0b7b0..5e07776412 100644
--- a/Makefile
+++ b/Makefile
@@ -213,14 +213,14 @@ run-test-multikueue-e2e-%: FORCE
@echo Running multikueue e2e for k8s ${K8S_VERSION}
E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" JOBSET_VERSION=$(JOBSET_VERSION) ./hack/multikueue-e2e-test.sh
-SCALABILITY_RUNNER := $(ARTIFACTS)/scalability-runner
-.PHONY: scalability-runner
-scalability-runner:
- $(GO_BUILD_ENV) $(GO_CMD) build -ldflags="$(LD_FLAGS)" -o $(SCALABILITY_RUNNER) test/scalability/runner/main.go
+SCALABILITY_RUNNER := $(ARTIFACTS)/performance-scheduler-runner
+.PHONY: performance-scheduler-runner
+performance-scheduler-runner:
+ $(GO_BUILD_ENV) $(GO_CMD) build -ldflags="$(LD_FLAGS)" -o $(SCALABILITY_RUNNER) test/performance/scheduler/runner/main.go
.PHONY: minimalkueue
minimalkueue:
- $(GO_BUILD_ENV) $(GO_CMD) build -ldflags="$(LD_FLAGS)" -o $(ARTIFACTS)/minimalkueue test/scalability/minimalkueue/main.go
+ $(GO_BUILD_ENV) $(GO_CMD) build -ldflags="$(LD_FLAGS)" -o $(ARTIFACTS)/minimalkueue test/performance/scheduler/minimalkueue/main.go
ifdef SCALABILITY_CPU_PROFILE
SCALABILITY_EXTRA_ARGS += --withCPUProfile=true
@@ -238,11 +238,11 @@ ifdef SCALABILITY_SCRAPE_URL
SCALABILITY_SCRAPE_ARGS += --metricsScrapeURL=$(SCALABILITY_SCRAPE_URL)
endif
-SCALABILITY_GENERATOR_CONFIG ?= $(PROJECT_DIR)/test/scalability/default_generator_config.yaml
+SCALABILITY_GENERATOR_CONFIG ?= $(PROJECT_DIR)/test/performance/scheduler/default_generator_config.yaml
-SCALABILITY_RUN_DIR := $(ARTIFACTS)/run-scalability
-.PHONY: run-scalability
-run-scalability: envtest scalability-runner minimalkueue
+SCALABILITY_RUN_DIR := $(ARTIFACTS)/run-performance-scheduler
+.PHONY: run-performance-scheduler
+run-performance-scheduler: envtest performance-scheduler-runner minimalkueue
mkdir -p $(SCALABILITY_RUN_DIR)
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" \
$(SCALABILITY_RUNNER) \
@@ -251,19 +251,23 @@ run-scalability: envtest scalability-runner minimalkueue
--generatorConfig=$(SCALABILITY_GENERATOR_CONFIG) \
--minimalKueue=$(ARTIFACTS)/minimalkueue $(SCALABILITY_EXTRA_ARGS) $(SCALABILITY_SCRAPE_ARGS)
-.PHONY: test-scalability
-test-scalability: gotestsum run-scalability
- $(GOTESTSUM) --junitfile $(ARTIFACTS)/junit.xml -- $(GO_TEST_FLAGS) ./test/scalability/checker \
+.PHONY: test-performance-scheduler
+test-performance-scheduler: gotestsum run-performance-scheduler
+ $(GOTESTSUM) --junitfile $(ARTIFACTS)/junit.xml -- $(GO_TEST_FLAGS) ./test/performance/scheduler/checker \
--summary=$(SCALABILITY_RUN_DIR)/summary.yaml \
--cmdStats=$(SCALABILITY_RUN_DIR)/minimalkueue.stats.yaml \
- --range=$(PROJECT_DIR)/test/scalability/default_rangespec.yaml
+ --range=$(PROJECT_DIR)/test/performance/scheduler/default_rangespec.yaml
+
+# drop this once is no longer used by CI
+.PHONY: test-scalability
+test-scalability: test-performance-scheduler
-.PHONY: run-scalability-in-cluster
-run-scalability-in-cluster: envtest scalability-runner
- mkdir -p $(ARTIFACTS)/run-scalability-in-cluster
+.PHONY: run-performance-scheduler-in-cluster
+run-performance-scheduler-in-cluster: envtest performance-scheduler-runner
+ mkdir -p $(ARTIFACTS)/run-performance-scheduler-in-cluster
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" \
$(SCALABILITY_RUNNER) \
- --o $(ARTIFACTS)/run-scalability-in-cluster \
+ --o $(ARTIFACTS)/run-performance-scheduler-in-cluster \
--generatorConfig=$(SCALABILITY_GENERATOR_CONFIG) \
--qps=1000 --burst=2000 --timeout=15m $(SCALABILITY_SCRAPE_ARGS)
diff --git a/test/performance/jobs/.env.example b/test/performance/e2e/jobs/.env.example
similarity index 100%
rename from test/performance/jobs/.env.example
rename to test/performance/e2e/jobs/.env.example
diff --git a/test/performance/jobs/.gitignore b/test/performance/e2e/jobs/.gitignore
similarity index 100%
rename from test/performance/jobs/.gitignore
rename to test/performance/e2e/jobs/.gitignore
diff --git a/test/performance/jobs/README.md b/test/performance/e2e/jobs/README.md
similarity index 98%
rename from test/performance/jobs/README.md
rename to test/performance/e2e/jobs/README.md
index 6fa1581041..37da22f0f1 100644
--- a/test/performance/jobs/README.md
+++ b/test/performance/e2e/jobs/README.md
@@ -1,4 +1,4 @@
-# Kueue Performance Testing
+# Kueue e2e Performance Testing
## Measurements
diff --git a/test/performance/jobs/config.yaml b/test/performance/e2e/jobs/config.yaml
similarity index 100%
rename from test/performance/jobs/config.yaml
rename to test/performance/e2e/jobs/config.yaml
diff --git a/test/performance/jobs/job.yaml b/test/performance/e2e/jobs/job.yaml
similarity index 100%
rename from test/performance/jobs/job.yaml
rename to test/performance/e2e/jobs/job.yaml
diff --git a/test/performance/jobs/local-queue.yaml b/test/performance/e2e/jobs/local-queue.yaml
similarity index 100%
rename from test/performance/jobs/local-queue.yaml
rename to test/performance/e2e/jobs/local-queue.yaml
diff --git a/test/performance/jobs/prerequisites/cluster-queue.template b/test/performance/e2e/jobs/prerequisites/cluster-queue.template
similarity index 100%
rename from test/performance/jobs/prerequisites/cluster-queue.template
rename to test/performance/e2e/jobs/prerequisites/cluster-queue.template
diff --git a/test/performance/jobs/prerequisites/resource-flavor.yaml b/test/performance/e2e/jobs/prerequisites/resource-flavor.yaml
similarity index 100%
rename from test/performance/jobs/prerequisites/resource-flavor.yaml
rename to test/performance/e2e/jobs/prerequisites/resource-flavor.yaml
diff --git a/test/performance/jobs/run-test.sh b/test/performance/e2e/jobs/run-test.sh
similarity index 100%
rename from test/performance/jobs/run-test.sh
rename to test/performance/e2e/jobs/run-test.sh
diff --git a/test/performance/podgroups/.gitignore b/test/performance/e2e/podgroups/.gitignore
similarity index 100%
rename from test/performance/podgroups/.gitignore
rename to test/performance/e2e/podgroups/.gitignore
diff --git a/test/performance/podgroups/README.md b/test/performance/e2e/podgroups/README.md
similarity index 97%
rename from test/performance/podgroups/README.md
rename to test/performance/e2e/podgroups/README.md
index 9bd75d3787..6831196fb7 100644
--- a/test/performance/podgroups/README.md
+++ b/test/performance/e2e/podgroups/README.md
@@ -1,4 +1,4 @@
-# Kueue Pod Integration Performance Testing
+# Kueue Pod Integration e2e Performance Testing
## Introduction
A minimal setup for performance testing Plain Pods integration using
clusterloader2.
diff --git a/test/performance/podgroups/manifest.diff b/test/performance/e2e/podgroups/manifest.diff
similarity index 100%
rename from test/performance/podgroups/manifest.diff
rename to test/performance/e2e/podgroups/manifest.diff
diff --git a/test/performance/podgroups/run-test.sh b/test/performance/e2e/podgroups/run-test.sh
similarity index 100%
rename from test/performance/podgroups/run-test.sh
rename to test/performance/e2e/podgroups/run-test.sh
diff --git a/test/performance/podgroups/templates/cluster-queue.yaml b/test/performance/e2e/podgroups/templates/cluster-queue.yaml
similarity index 100%
rename from test/performance/podgroups/templates/cluster-queue.yaml
rename to test/performance/e2e/podgroups/templates/cluster-queue.yaml
diff --git a/test/performance/podgroups/templates/local-queue.yaml b/test/performance/e2e/podgroups/templates/local-queue.yaml
similarity index 100%
rename from test/performance/podgroups/templates/local-queue.yaml
rename to test/performance/e2e/podgroups/templates/local-queue.yaml
diff --git a/test/performance/podgroups/templates/pod.yaml b/test/performance/e2e/podgroups/templates/pod.yaml
similarity index 100%
rename from test/performance/podgroups/templates/pod.yaml
rename to test/performance/e2e/podgroups/templates/pod.yaml
diff --git a/test/performance/podgroups/templates/resource-flavor.yaml b/test/performance/e2e/podgroups/templates/resource-flavor.yaml
similarity index 100%
rename from test/performance/podgroups/templates/resource-flavor.yaml
rename to test/performance/e2e/podgroups/templates/resource-flavor.yaml
diff --git a/test/performance/podgroups/test-config.yaml b/test/performance/e2e/podgroups/test-config.yaml
similarity index 100%
rename from test/performance/podgroups/test-config.yaml
rename to test/performance/e2e/podgroups/test-config.yaml
diff --git a/test/scalability/README.md b/test/performance/scheduler/README.md
similarity index 61%
rename from test/scalability/README.md
rename to test/performance/scheduler/README.md
index 817d9a167e..7daef3844f 100644
--- a/test/scalability/README.md
+++ b/test/performance/scheduler/README.md
@@ -23,49 +23,49 @@ It is designed to offer the Kueue scheduling capabilities without any additional
## Checker
-Checks the results of a scalability against a set of expected value defined as [default_rangespec](./default_rangespec.yaml).
+Checks the results of a performance-scheduler against a set of expected value defined as [default_rangespec](./default_rangespec.yaml).
# Usage
## Run in an existing cluster
```bash
-make run-scalability-in-cluster
+make run-performance-scheduler-in-cluster
```
-Will run a scalability scenario against an existing cluster (connectable by the host's default kubeconfig), and store the resulting artifacts are stored in `$(PROJECT_DIR)/bin/run-scalability-in-cluster`.
+Will run a performance-scheduler scenario against an existing cluster (connectable by the host's default kubeconfig), and store the resulting artifacts are stored in `$(PROJECT_DIR)/bin/run-performance-scheduler-in-cluster`.
-The generation config to be used can be set in `SCALABILITY_GENERATOR_CONFIG` by default using `$(PROJECT_DIR)/test/scalability/default_generator_config.yaml`
+The generation config to be used can be set in `SCALABILITY_GENERATOR_CONFIG` by default using `$(PROJECT_DIR)/test/performance/scheduler/default_generator_config.yaml`
-Setting `SCALABILITY_SCRAPE_INTERVAL` to an interval value and `SCALABILITY_SCRAPE_URL` to an URL exposing kueue's metrics will cause the scalability runner to scrape that URL every interval and store the results in `$(PROJECT_DIR)/bin/run-scalability-in-cluster/metricsDump.tgz`.
+Setting `SCALABILITY_SCRAPE_INTERVAL` to an interval value and `SCALABILITY_SCRAPE_URL` to an URL exposing kueue's metrics will cause the scalability runner to scrape that URL every interval and store the results in `$(PROJECT_DIR)/bin/run-performance-scheduler-in-cluster/metricsDump.tgz`.
Check [installation guide](https://kueue.sigs.k8s.io/docs/installation) for cluster and [observability](https://kueue.sigs.k8s.io/docs/installation/#add-metrics-scraping-for-prometheus-operator).
## Run with minimalkueue
```bash
-make run-scalability
+make run-performance-scheduler
```
-Will run a scalability scenario against an [envtest](https://book.kubebuilder.io/reference/envtest.html) environment
+Will run a performance-scheduler scenario against an [envtest](https://book.kubebuilder.io/reference/envtest.html) environment
and an instance of minimalkueue.
-The resulting artifacts are stored in `$(PROJECT_DIR)/bin/run-scalability`.
+The resulting artifacts are stored in `$(PROJECT_DIR)/bin/run-performance-scheduler`.
-The generation config to be used can be set in `SCALABILITY_GENERATOR_CONFIG` by default using `$(PROJECT_DIR)/test/scalability/default_generator_config.yaml`
+The generation config to be used can be set in `SCALABILITY_GENERATOR_CONFIG` by default using `$(PROJECT_DIR)/test/performance/scheduler/default_generator_config.yaml`
-Setting `SCALABILITY_CPU_PROFILE=1` will generate a cpuprofile of minimalkueue in `$(PROJECT_DIR)/bin/run-scalability/minimalkueue.cpu.prof`
+Setting `SCALABILITY_CPU_PROFILE=1` will generate a cpuprofile of minimalkueue in `$(PROJECT_DIR)/bin/run-performance-scheduler/minimalkueue.cpu.prof`
-Setting `SCALABILITY_KUEUE_LOGS=1` will save the logs of minimalkueue in `$(PROJECT_DIR)/bin/run-scalability/minimalkueue.out.log` and `$(PROJECT_DIR)/bin/run-scalability/minimalkueue.err.log`
+Setting `SCALABILITY_KUEUE_LOGS=1` will save the logs of minimalkueue in `$(PROJECT_DIR)/bin/run-performance-scheduler/minimalkueue.out.log` and `$(PROJECT_DIR)/bin/run-performance-scheduler/minimalkueue.err.log`
-Setting `SCALABILITY_SCRAPE_INTERVAL` to an interval value (e.g. `1s`) will expose the metrics of `minimalkueue` and have them collected by the scalability runner in `$(PROJECT_DIR)/bin/run-scalability/metricsDump.tgz` every interval.
+Setting `SCALABILITY_SCRAPE_INTERVAL` to an interval value (e.g. `1s`) will expose the metrics of `minimalkueue` and have them collected by the scalability runner in `$(PROJECT_DIR)/bin/run-performance-scheduler/metricsDump.tgz` every interval.
-## Run scalability test
+## Run performance-scheduler test
```bash
-make test-scalability
+make test-performance-scheduler
```
-Runs the scalability with minimalkueue and checks the results against `$(PROJECT_DIR)/test/scalability/default_rangespec.yaml`
+Runs the performance-scheduler with minimalkueue and checks the results against `$(PROJECT_DIR)/test/performance-scheduler/default_rangespec.yaml`
## Scrape result
@@ -75,7 +75,7 @@ If an instance of [VictoriaMetrics](https://docs.victoriametrics.com/) listening
```bash
TMPDIR=$(mktemp -d)
- tar -xf ./bin/run-scalability/metricsDump.tgz -C $TMPDIR
+ tar -xf ./bin/run-performance-scheduler/metricsDump.tgz -C $TMPDIR
for file in ${TMPDIR}/*.prometheus; do timestamp=$(basename "$file" .prometheus); curl -vX POST -T "$file" http://localhost:8428/api/v1/import/prometheus?timestamp="$timestamp"; done
rm -r $TMPDIR
diff --git a/test/scalability/checker/checker_test.go b/test/performance/scheduler/checker/checker_test.go
similarity index 96%
rename from test/scalability/checker/checker_test.go
rename to test/performance/scheduler/checker/checker_test.go
index 09a0d40d91..86feffc9f3 100644
--- a/test/scalability/checker/checker_test.go
+++ b/test/performance/scheduler/checker/checker_test.go
@@ -23,8 +23,8 @@ import (
"sigs.k8s.io/yaml"
- "sigs.k8s.io/kueue/test/scalability/runner/recorder"
- "sigs.k8s.io/kueue/test/scalability/runner/stats"
+ "sigs.k8s.io/kueue/test/performance/scheduler/runner/recorder"
+ "sigs.k8s.io/kueue/test/performance/scheduler/runner/stats"
)
var (
diff --git a/test/scalability/default_generator_config.yaml b/test/performance/scheduler/default_generator_config.yaml
similarity index 100%
rename from test/scalability/default_generator_config.yaml
rename to test/performance/scheduler/default_generator_config.yaml
diff --git a/test/scalability/default_rangespec.yaml b/test/performance/scheduler/default_rangespec.yaml
similarity index 100%
rename from test/scalability/default_rangespec.yaml
rename to test/performance/scheduler/default_rangespec.yaml
diff --git a/test/scalability/minimalkueue/main.go b/test/performance/scheduler/minimalkueue/main.go
similarity index 100%
rename from test/scalability/minimalkueue/main.go
rename to test/performance/scheduler/minimalkueue/main.go
diff --git a/test/scalability/runner/controller/controller.go b/test/performance/scheduler/runner/controller/controller.go
similarity index 97%
rename from test/scalability/runner/controller/controller.go
rename to test/performance/scheduler/runner/controller/controller.go
index 3e6cab6ca2..10b1492440 100644
--- a/test/scalability/runner/controller/controller.go
+++ b/test/performance/scheduler/runner/controller/controller.go
@@ -38,8 +38,8 @@ import (
kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
"sigs.k8s.io/kueue/pkg/constants"
"sigs.k8s.io/kueue/pkg/workload"
- "sigs.k8s.io/kueue/test/scalability/runner/generator"
- "sigs.k8s.io/kueue/test/scalability/runner/recorder"
+ "sigs.k8s.io/kueue/test/performance/scheduler/runner/generator"
+ "sigs.k8s.io/kueue/test/performance/scheduler/runner/recorder"
)
type reconciler struct {
diff --git a/test/scalability/runner/generator/generator.go b/test/performance/scheduler/runner/generator/generator.go
similarity index 97%
rename from test/scalability/runner/generator/generator.go
rename to test/performance/scheduler/runner/generator/generator.go
index b0914ee5e4..2450eae89b 100644
--- a/test/scalability/runner/generator/generator.go
+++ b/test/performance/scheduler/runner/generator/generator.go
@@ -35,9 +35,9 @@ import (
const (
resourceFlavorName = "rf"
- RunningTimeLabel = "kueue.x-k8s.io/scalability-running-ms"
- ClassLabel = "kueue.x-k8s.io/scalability-class"
- CleanupLabel = "kueue.x-k8s.io/scalability-cleanup"
+ RunningTimeLabel = "kueue.x-k8s.io/performance-scheduler-running-ms"
+ ClassLabel = "kueue.x-k8s.io/performance-scheduler-class"
+ CleanupLabel = "kueue.x-k8s.io/performance-scheduler-cleanup"
)
type WorkloadTemplate struct {
diff --git a/test/scalability/runner/generator/generator_test.go b/test/performance/scheduler/runner/generator/generator_test.go
similarity index 100%
rename from test/scalability/runner/generator/generator_test.go
rename to test/performance/scheduler/runner/generator/generator_test.go
diff --git a/test/scalability/runner/main.go b/test/performance/scheduler/runner/main.go
similarity index 97%
rename from test/scalability/runner/main.go
rename to test/performance/scheduler/runner/main.go
index 08fcf6c7dc..608eec6f91 100644
--- a/test/scalability/runner/main.go
+++ b/test/performance/scheduler/runner/main.go
@@ -48,11 +48,11 @@ import (
kueuealpha "sigs.k8s.io/kueue/apis/kueue/v1alpha1"
kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
utiltesting "sigs.k8s.io/kueue/pkg/util/testing"
- "sigs.k8s.io/kueue/test/scalability/runner/controller"
- "sigs.k8s.io/kueue/test/scalability/runner/generator"
- "sigs.k8s.io/kueue/test/scalability/runner/recorder"
- "sigs.k8s.io/kueue/test/scalability/runner/scraper"
- "sigs.k8s.io/kueue/test/scalability/runner/stats"
+ "sigs.k8s.io/kueue/test/performance/scheduler/runner/controller"
+ "sigs.k8s.io/kueue/test/performance/scheduler/runner/generator"
+ "sigs.k8s.io/kueue/test/performance/scheduler/runner/recorder"
+ "sigs.k8s.io/kueue/test/performance/scheduler/runner/scraper"
+ "sigs.k8s.io/kueue/test/performance/scheduler/runner/stats"
)
var (
diff --git a/test/scalability/runner/recorder/recorder.go b/test/performance/scheduler/runner/recorder/recorder.go
similarity index 99%
rename from test/scalability/runner/recorder/recorder.go
rename to test/performance/scheduler/runner/recorder/recorder.go
index 065addc4ac..bbc0d41cda 100644
--- a/test/scalability/runner/recorder/recorder.go
+++ b/test/performance/scheduler/runner/recorder/recorder.go
@@ -29,7 +29,7 @@ import (
"sigs.k8s.io/yaml"
kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
- "sigs.k8s.io/kueue/test/scalability/runner/generator"
+ "sigs.k8s.io/kueue/test/performance/scheduler/runner/generator"
)
type CQEvent struct {
diff --git a/test/scalability/runner/scraper/scraper.go b/test/performance/scheduler/runner/scraper/scraper.go
similarity index 100%
rename from test/scalability/runner/scraper/scraper.go
rename to test/performance/scheduler/runner/scraper/scraper.go
diff --git a/test/scalability/runner/stats/stats.go b/test/performance/scheduler/runner/stats/stats.go
similarity index 100%
rename from test/scalability/runner/stats/stats.go
rename to test/performance/scheduler/runner/stats/stats.go
From 944eea2f863a30c5077d8935d7489a42e8000ffa Mon Sep 17 00:00:00 2001
From: Tomas Tormo
Date: Fri, 26 Apr 2024 09:56:24 +0200
Subject: [PATCH 37/49] Conditions creation code cleanup (#2071)
---
pkg/workload/workload.go | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/pkg/workload/workload.go b/pkg/workload/workload.go
index cfce1ea7c8..ea93fcbf92 100644
--- a/pkg/workload/workload.go
+++ b/pkg/workload/workload.go
@@ -365,7 +365,6 @@ func UnsetQuotaReservationWithCondition(wl *kueue.Workload, reason, message stri
condition := metav1.Condition{
Type: kueue.WorkloadQuotaReserved,
Status: metav1.ConditionFalse,
- LastTransitionTime: metav1.Now(),
Reason: reason,
Message: api.TruncateConditionMessage(message),
ObservedGeneration: wl.Generation,
@@ -429,12 +428,12 @@ func BaseSSAWorkload(w *kueue.Workload) *kueue.Workload {
// The WorkloadAdmitted and WorkloadEvicted are added or updated if necessary.
func SetQuotaReservation(w *kueue.Workload, admission *kueue.Admission) {
w.Status.Admission = admission
+ message := fmt.Sprintf("Quota reserved in ClusterQueue %s", w.Status.Admission.ClusterQueue)
admittedCond := metav1.Condition{
Type: kueue.WorkloadQuotaReserved,
Status: metav1.ConditionTrue,
- LastTransitionTime: metav1.Now(),
Reason: "QuotaReserved",
- Message: fmt.Sprintf("Quota reserved in ClusterQueue %s", w.Status.Admission.ClusterQueue),
+ Message: api.TruncateConditionMessage(message),
ObservedGeneration: w.Generation,
}
apimeta.SetStatusCondition(&w.Status.Conditions, admittedCond)
@@ -443,14 +442,14 @@ func SetQuotaReservation(w *kueue.Workload, admission *kueue.Admission) {
if evictedCond := apimeta.FindStatusCondition(w.Status.Conditions, kueue.WorkloadEvicted); evictedCond != nil {
evictedCond.Status = metav1.ConditionFalse
evictedCond.Reason = "QuotaReserved"
- evictedCond.Message = "Previously: " + evictedCond.Message
+ evictedCond.Message = api.TruncateConditionMessage("Previously: " + evictedCond.Message)
evictedCond.LastTransitionTime = metav1.Now()
}
// reset Preempted condition if present.
if preemptedCond := apimeta.FindStatusCondition(w.Status.Conditions, kueue.WorkloadPreempted); preemptedCond != nil {
preemptedCond.Status = metav1.ConditionFalse
preemptedCond.Reason = "QuotaReserved"
- preemptedCond.Message = "Previously: " + preemptedCond.Message
+ preemptedCond.Message = api.TruncateConditionMessage("Previously: " + preemptedCond.Message)
preemptedCond.LastTransitionTime = metav1.Now()
}
}
@@ -460,7 +459,7 @@ func SetPreemptedCondition(w *kueue.Workload, reason string, message string) {
Type: kueue.WorkloadPreempted,
Status: metav1.ConditionTrue,
Reason: reason,
- Message: message,
+ Message: api.TruncateConditionMessage(message),
}
apimeta.SetStatusCondition(&w.Status.Conditions, condition)
}
@@ -469,9 +468,8 @@ func SetEvictedCondition(w *kueue.Workload, reason string, message string) {
condition := metav1.Condition{
Type: kueue.WorkloadEvicted,
Status: metav1.ConditionTrue,
- LastTransitionTime: metav1.Now(),
Reason: reason,
- Message: message,
+ Message: api.TruncateConditionMessage(message),
ObservedGeneration: w.Generation,
}
apimeta.SetStatusCondition(&w.Status.Conditions, condition)
From 62e0a81bb42b058f36462db192f5ddd52e1d1901 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?=
Date: Fri, 26 Apr 2024 12:11:29 +0200
Subject: [PATCH 38/49] Adjust the flaky test for preemption (#2061)
---
pkg/controller/core/workload_controller.go | 23 +++++++++++++---------
pkg/queue/manager.go | 6 +++---
2 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go
index 28fe8cd4b2..f7d09da624 100644
--- a/pkg/controller/core/workload_controller.go
+++ b/pkg/controller/core/workload_controller.go
@@ -546,6 +546,11 @@ func (r *WorkloadReconciler) Update(e event.UpdateEvent) bool {
log.V(2).Info("ClusterQueue for workload didn't exist; ignored for now")
}
case (prevStatus == quotaReserved || prevStatus == admitted) && status == pending:
+ var backoff time.Duration
+ if wlCopy.Status.RequeueState != nil && wlCopy.Status.RequeueState.RequeueAt != nil {
+ backoff = time.Until(wl.Status.RequeueState.RequeueAt.Time)
+ }
+ immediate := backoff <= 0
// trigger the move of associated inadmissibleWorkloads, if there are any.
r.queues.QueueAssociatedInadmissibleWorkloadsAfter(ctx, wl, func() {
// Delete the workload from cache while holding the queues lock
@@ -554,16 +559,16 @@ func (r *WorkloadReconciler) Update(e event.UpdateEvent) bool {
if err := r.cache.DeleteWorkload(wl); err != nil {
log.Error(err, "Failed to delete workload from cache")
}
- })
- var backoff time.Duration
- if wlCopy.Status.RequeueState != nil && wlCopy.Status.RequeueState.RequeueAt != nil {
- backoff = time.Until(wl.Status.RequeueState.RequeueAt.Time)
- }
- if backoff <= 0 {
- if !r.queues.AddOrUpdateWorkload(wlCopy) {
- log.V(2).Info("Queue for workload didn't exist; ignored for now")
+ // Here we don't take the lock as it is already taken by the wrapping
+ // function.
+ if immediate {
+ if !r.queues.AddOrUpdateWorkloadWithoutLock(wlCopy) {
+ log.V(2).Info("Queue for workload didn't exist; ignored for now")
+ }
}
- } else {
+ })
+
+ if !immediate {
log.V(3).Info("Workload to be requeued after backoff", "backoff", backoff, "requeueAt", wl.Status.RequeueState.RequeueAt.Time)
time.AfterFunc(backoff, func() {
updatedWl := kueue.Workload{}
diff --git a/pkg/queue/manager.go b/pkg/queue/manager.go
index c465cfcdf7..631dc492e5 100644
--- a/pkg/queue/manager.go
+++ b/pkg/queue/manager.go
@@ -293,10 +293,10 @@ func (m *Manager) ClusterQueueForWorkload(wl *kueue.Workload) (string, bool) {
func (m *Manager) AddOrUpdateWorkload(w *kueue.Workload) bool {
m.Lock()
defer m.Unlock()
- return m.addOrUpdateWorkload(w)
+ return m.AddOrUpdateWorkloadWithoutLock(w)
}
-func (m *Manager) addOrUpdateWorkload(w *kueue.Workload) bool {
+func (m *Manager) AddOrUpdateWorkloadWithoutLock(w *kueue.Workload) bool {
qKey := workload.QueueKey(w)
q := m.localQueues[qKey]
if q == nil {
@@ -453,7 +453,7 @@ func (m *Manager) UpdateWorkload(oldW, w *kueue.Workload) bool {
if oldW.Spec.QueueName != w.Spec.QueueName {
m.deleteWorkloadFromQueueAndClusterQueue(w, workload.QueueKey(oldW))
}
- return m.addOrUpdateWorkload(w)
+ return m.AddOrUpdateWorkloadWithoutLock(w)
}
// CleanUpOnContext tracks the context. When closed, it wakes routines waiting
From 0446cb5e54655d3057e2e96a50de1f9da03b0a0e Mon Sep 17 00:00:00 2001
From: Traian Schiau <55734665+trasc@users.noreply.github.com>
Date: Fri, 26 Apr 2024 13:42:54 +0300
Subject: [PATCH 39/49] [makefile] Drop old `test-scalability` make target.
(#2078)
---
Makefile | 4 ----
1 file changed, 4 deletions(-)
diff --git a/Makefile b/Makefile
index 5e07776412..9f22c73847 100644
--- a/Makefile
+++ b/Makefile
@@ -258,10 +258,6 @@ test-performance-scheduler: gotestsum run-performance-scheduler
--cmdStats=$(SCALABILITY_RUN_DIR)/minimalkueue.stats.yaml \
--range=$(PROJECT_DIR)/test/performance/scheduler/default_rangespec.yaml
-# drop this once is no longer used by CI
-.PHONY: test-scalability
-test-scalability: test-performance-scheduler
-
.PHONY: run-performance-scheduler-in-cluster
run-performance-scheduler-in-cluster: envtest performance-scheduler-runner
mkdir -p $(ARTIFACTS)/run-performance-scheduler-in-cluster
From 9d77f2fe59729435e05c1be29cb6373a2eabfa26 Mon Sep 17 00:00:00 2001
From: vladikkuzn <51460778+vladikkuzn@users.noreply.github.com>
Date: Fri, 26 Apr 2024 18:03:32 +0300
Subject: [PATCH 40/49] Include a Failed condition in Workloads (#2026)
* * Replace workload finished reason with succeeded and failed reasons
* * Replace workload finished interface `Finished() (metav1.Condition, bool)` with `Finished() (reason, message string, finished bool)`
* * Update comments for WorkloadFinishedReason in workload_types.go
* Update Finished() (message string, success, finished bool) to return success instead of reason
* * Copy message from underlying job condition
---
apis/kueue/v1beta1/workload_types.go | 14 +++++++++
pkg/controller/core/workload_controller.go | 2 +-
.../core/workload_controller_test.go | 2 +-
pkg/controller/jobframework/interface.go | 3 +-
pkg/controller/jobframework/reconciler.go | 14 +++++----
pkg/controller/jobs/job/job_controller.go | 17 ++++-------
.../jobs/job/job_controller_test.go | 14 +++++++--
.../jobs/jobset/jobset_controller.go | 24 ++++-----------
.../kubeflowjob/kubeflowjob_controller.go | 23 ++++++--------
.../jobs/mpijob/mpijob_controller.go | 18 ++++-------
pkg/controller/jobs/pod/pod_controller.go | 30 +++++++++----------
.../jobs/pod/pod_controller_test.go | 22 +++++++++-----
.../jobs/raycluster/raycluster_controller.go | 13 ++++----
.../jobs/rayjob/rayjob_controller.go | 16 +++++-----
pkg/util/testingjobs/pod/wrappers.go | 6 ++++
test/e2e/multikueue/e2e_test.go | 13 ++++----
test/e2e/singlecluster/e2e_test.go | 2 +-
test/e2e/singlecluster/jobset_test.go | 7 +++--
.../core/workload_controller_test.go | 4 +--
.../jobs/job/job_controller_test.go | 4 +--
.../integration/multikueue/multikueue_test.go | 11 +++----
21 files changed, 131 insertions(+), 128 deletions(-)
diff --git a/apis/kueue/v1beta1/workload_types.go b/apis/kueue/v1beta1/workload_types.go
index fd96a53704..74470ec182 100644
--- a/apis/kueue/v1beta1/workload_types.go
+++ b/apis/kueue/v1beta1/workload_types.go
@@ -329,6 +329,20 @@ const (
WorkloadEvictedByDeactivation = "InactiveWorkload"
)
+const (
+ // WorkloadFinishedReasonSucceeded indicates that the workload's job finished successfully.
+ WorkloadFinishedReasonSucceeded = "Succeeded"
+
+ // WorkloadFinishedReasonFailed indicates that the workload's job finished with an error.
+ WorkloadFinishedReasonFailed = "Failed"
+
+ // WorkloadFinishedReasonAdmissionChecksRejected indicates that the workload was rejected by admission checks.
+ WorkloadFinishedReasonAdmissionChecksRejected = "AdmissionChecksRejected"
+
+ // WorkloadFinishedReasonOutOfSync indicates that the prebuilt workload is not in sync with its parent job.
+ WorkloadFinishedReasonOutOfSync = "OutOfSync"
+)
+
// +genclient
// +kubebuilder:object:root=true
// +kubebuilder:storageversion
diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go
index f7d09da624..7cf592752c 100644
--- a/pkg/controller/core/workload_controller.go
+++ b/pkg/controller/core/workload_controller.go
@@ -207,7 +207,7 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
log.V(3).Info("Workload has Rejected admission checks, Finish with failure")
err := workload.UpdateStatus(ctx, r.client, &wl, kueue.WorkloadFinished,
metav1.ConditionTrue,
- "AdmissionChecksRejected",
+ kueue.WorkloadFinishedReasonAdmissionChecksRejected,
fmt.Sprintf("Admission checks %v are rejected", rejectedChecks),
constants.KueueName)
if err == nil {
diff --git a/pkg/controller/core/workload_controller_test.go b/pkg/controller/core/workload_controller_test.go
index e776911b78..cd28f01638 100644
--- a/pkg/controller/core/workload_controller_test.go
+++ b/pkg/controller/core/workload_controller_test.go
@@ -466,7 +466,7 @@ func TestReconcile(t *testing.T) {
Condition(metav1.Condition{
Type: "Finished",
Status: "True",
- Reason: "AdmissionChecksRejected",
+ Reason: kueue.WorkloadFinishedReasonAdmissionChecksRejected,
Message: "Admission checks [check] are rejected",
}).
Obj(),
diff --git a/pkg/controller/jobframework/interface.go b/pkg/controller/jobframework/interface.go
index f264e7db5a..2408a054ea 100644
--- a/pkg/controller/jobframework/interface.go
+++ b/pkg/controller/jobframework/interface.go
@@ -16,7 +16,6 @@ package jobframework
import (
"context"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
@@ -44,7 +43,7 @@ type GenericJob interface {
// Finished means whether the job is completed/failed or not,
// condition represents the workload finished condition.
// Observed generation of the workload is set by the jobframework.
- Finished() (condition metav1.Condition, finished bool)
+ Finished() (message string, success, finished bool)
// PodSets will build workload podSets corresponding to the job.
PodSets() []kueue.PodSet
// IsActive returns true if there are any running pods.
diff --git a/pkg/controller/jobframework/reconciler.go b/pkg/controller/jobframework/reconciler.go
index 2e412cde88..c18d1604de 100644
--- a/pkg/controller/jobframework/reconciler.go
+++ b/pkg/controller/jobframework/reconciler.go
@@ -256,7 +256,7 @@ func (r *JobReconciler) ReconcileGenericJob(ctx context.Context, req ctrl.Reques
// if this is a non-standalone job, suspend the job if its parent workload is not found or not admitted.
if !isStandaloneJob {
- _, finished := job.Finished()
+ _, _, finished := job.Finished()
if !finished && !job.IsSuspended() {
if parentWorkload, err := r.getParentWorkload(ctx, job, object); err != nil {
log.Error(err, "couldn't get the parent job workload")
@@ -307,9 +307,13 @@ func (r *JobReconciler) ReconcileGenericJob(ctx context.Context, req ctrl.Reques
}
// 2. handle job is finished.
- if condition, finished := job.Finished(); finished {
+ if message, success, finished := job.Finished(); finished {
if wl != nil && !apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadFinished) {
- err := workload.UpdateStatus(ctx, r.client, wl, condition.Type, condition.Status, condition.Reason, condition.Message, constants.JobControllerName)
+ reason := kueue.WorkloadFinishedReasonSucceeded
+ if !success {
+ reason = kueue.WorkloadFinishedReasonFailed
+ }
+ err := workload.UpdateStatus(ctx, r.client, wl, kueue.WorkloadFinished, metav1.ConditionTrue, reason, message, constants.JobControllerName)
if err != nil && !apierrors.IsNotFound(err) {
return ctrl.Result{}, err
}
@@ -541,7 +545,7 @@ func (r *JobReconciler) ensureOneWorkload(ctx context.Context, job GenericJob, o
w = toDelete[0]
}
- if _, finished := job.Finished(); !finished {
+ if _, _, finished := job.Finished(); !finished {
var msg string
if w == nil {
msg = "Missing Workload; unable to restore pod templates"
@@ -636,7 +640,7 @@ func (r *JobReconciler) ensurePrebuiltWorkloadInSync(ctx context.Context, wl *ku
err := workload.UpdateStatus(ctx, r.client, wl,
kueue.WorkloadFinished,
metav1.ConditionTrue,
- "OutOfSync",
+ kueue.WorkloadFinishedReasonOutOfSync,
"The prebuilt workload is out of sync with its user job",
constants.JobControllerName)
return false, err
diff --git a/pkg/controller/jobs/job/job_controller.go b/pkg/controller/jobs/job/job_controller.go
index b8a0d1ab53..5c05d70300 100644
--- a/pkg/controller/jobs/job/job_controller.go
+++ b/pkg/controller/jobs/job/job_controller.go
@@ -277,30 +277,23 @@ func (j *Job) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool {
return changed
}
-func (j *Job) Finished() (metav1.Condition, bool) {
+func (j *Job) Finished() (message string, success, finished bool) {
var conditionType batchv1.JobConditionType
- var finished bool
-
for _, c := range j.Status.Conditions {
if (c.Type == batchv1.JobComplete || c.Type == batchv1.JobFailed) && c.Status == corev1.ConditionTrue {
conditionType = c.Type
finished = true
+ message = c.Message
break
}
}
- condition := metav1.Condition{
- Type: kueue.WorkloadFinished,
- Status: metav1.ConditionTrue,
- Reason: "JobFinished",
- Message: "Job finished successfully",
- ObservedGeneration: j.Generation,
- }
+ success = true
if conditionType == batchv1.JobFailed {
- condition.Message = "Job failed"
+ success = false
}
- return condition, finished
+ return message, success, finished
}
func (j *Job) PodsReady() bool {
diff --git a/pkg/controller/jobs/job/job_controller_test.go b/pkg/controller/jobs/job/job_controller_test.go
index a6eee4360e..b8ae8ff329 100644
--- a/pkg/controller/jobs/job/job_controller_test.go
+++ b/pkg/controller/jobs/job/job_controller_test.go
@@ -1700,7 +1700,11 @@ func TestReconciler(t *testing.T) {
},
"when job completes, workload is marked as finished": {
job: *baseJobWrapper.Clone().
- Condition(batchv1.JobCondition{Type: batchv1.JobComplete, Status: corev1.ConditionTrue}).
+ Condition(batchv1.JobCondition{
+ Type: batchv1.JobComplete,
+ Status: corev1.ConditionTrue,
+ Message: "Job finished successfully",
+ }).
Obj(),
workloads: []kueue.Workload{
*baseWorkloadWrapper.Clone().
@@ -1709,7 +1713,11 @@ func TestReconciler(t *testing.T) {
Obj(),
},
wantJob: *baseJobWrapper.Clone().
- Condition(batchv1.JobCondition{Type: batchv1.JobComplete, Status: corev1.ConditionTrue}).
+ Condition(batchv1.JobCondition{
+ Type: batchv1.JobComplete,
+ Status: corev1.ConditionTrue,
+ Message: "Job finished successfully",
+ }).
Obj(),
wantWorkloads: []kueue.Workload{
*baseWorkloadWrapper.Clone().
@@ -1717,7 +1725,7 @@ func TestReconciler(t *testing.T) {
Condition(metav1.Condition{
Type: kueue.WorkloadFinished,
Status: metav1.ConditionTrue,
- Reason: "JobFinished",
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
Message: "Job finished successfully",
ObservedGeneration: 1,
}).
diff --git a/pkg/controller/jobs/jobset/jobset_controller.go b/pkg/controller/jobs/jobset/jobset_controller.go
index 6011ad56b7..959fd8c9cd 100644
--- a/pkg/controller/jobs/jobset/jobset_controller.go
+++ b/pkg/controller/jobs/jobset/jobset_controller.go
@@ -146,26 +146,14 @@ func (j *JobSet) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool {
return changed
}
-func (j *JobSet) Finished() (metav1.Condition, bool) {
- if apimeta.IsStatusConditionTrue(j.Status.Conditions, string(jobsetapi.JobSetCompleted)) {
- condition := metav1.Condition{
- Type: kueue.WorkloadFinished,
- Status: metav1.ConditionTrue,
- Reason: "JobSetFinished",
- Message: "JobSet finished successfully",
- }
- return condition, true
+func (j *JobSet) Finished() (message string, success, finished bool) {
+ if c := apimeta.FindStatusCondition(j.Status.Conditions, string(jobsetapi.JobSetCompleted)); c != nil && c.Status == metav1.ConditionTrue {
+ return c.Message, true, true
}
- if apimeta.IsStatusConditionTrue(j.Status.Conditions, string(jobsetapi.JobSetFailed)) {
- condition := metav1.Condition{
- Type: kueue.WorkloadFinished,
- Status: metav1.ConditionTrue,
- Reason: "JobSetFinished",
- Message: "JobSet failed",
- }
- return condition, true
+ if c := apimeta.FindStatusCondition(j.Status.Conditions, string(jobsetapi.JobSetFailed)); c != nil && c.Status == metav1.ConditionTrue {
+ return c.Message, false, true
}
- return metav1.Condition{}, false
+ return message, success, false
}
func (j *JobSet) PodsReady() bool {
diff --git a/pkg/controller/jobs/kubeflow/kubeflowjob/kubeflowjob_controller.go b/pkg/controller/jobs/kubeflow/kubeflowjob/kubeflowjob_controller.go
index fd6bdcf1df..94cad2ee78 100644
--- a/pkg/controller/jobs/kubeflow/kubeflowjob/kubeflowjob_controller.go
+++ b/pkg/controller/jobs/kubeflow/kubeflowjob/kubeflowjob_controller.go
@@ -21,7 +21,6 @@ import (
kftraining "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
corev1 "k8s.io/api/core/v1"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -82,30 +81,26 @@ func (j *KubeflowJob) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool {
return changed
}
-func (j *KubeflowJob) Finished() (metav1.Condition, bool) {
- var conditionType kftraining.JobConditionType
- var finished bool
+func (j *KubeflowJob) Finished() (message string, success, finished bool) {
if j.KFJobControl.JobStatus() == nil {
- return metav1.Condition{}, false
+ return message, finished, false
}
+ var conditionType kftraining.JobConditionType
for _, c := range j.KFJobControl.JobStatus().Conditions {
if (c.Type == kftraining.JobSucceeded || c.Type == kftraining.JobFailed) && c.Status == corev1.ConditionTrue {
conditionType = c.Type
finished = true
+ message = c.Message
break
}
}
- message := "Job finished successfully"
+
+ success = true
if conditionType == kftraining.JobFailed {
- message = "Job failed"
+ success = false
}
- condition := metav1.Condition{
- Type: kueue.WorkloadFinished,
- Status: metav1.ConditionTrue,
- Reason: "JobFinished",
- Message: message,
- }
- return condition, finished
+
+ return message, success, finished
}
func (j *KubeflowJob) PodSets() []kueue.PodSet {
diff --git a/pkg/controller/jobs/mpijob/mpijob_controller.go b/pkg/controller/jobs/mpijob/mpijob_controller.go
index acc36b2416..cb5df91772 100644
--- a/pkg/controller/jobs/mpijob/mpijob_controller.go
+++ b/pkg/controller/jobs/mpijob/mpijob_controller.go
@@ -148,29 +148,23 @@ func (j *MPIJob) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool {
return changed
}
-func (j *MPIJob) Finished() (metav1.Condition, bool) {
+func (j *MPIJob) Finished() (message string, success, finished bool) {
var conditionType kubeflow.JobConditionType
- var finished bool
for _, c := range j.Status.Conditions {
if (c.Type == kubeflow.JobSucceeded || c.Type == kubeflow.JobFailed) && c.Status == corev1.ConditionTrue {
conditionType = c.Type
finished = true
+ message = c.Message
break
}
}
- message := "Job finished successfully"
+ success = true
if conditionType == kubeflow.JobFailed {
- message = "Job failed"
+ success = false
}
- condition := metav1.Condition{
- Type: kueue.WorkloadFinished,
- Status: metav1.ConditionTrue,
- Reason: "JobFinished",
- Message: message,
- // ObservedGeneration is added via Update status by the job framework
- }
- return condition, finished
+
+ return message, success, finished
}
// PriorityClass calculates the priorityClass name needed for workload according to the following priorities:
diff --git a/pkg/controller/jobs/pod/pod_controller.go b/pkg/controller/jobs/pod/pod_controller.go
index eecf864c69..764e6402c7 100644
--- a/pkg/controller/jobs/pod/pod_controller.go
+++ b/pkg/controller/jobs/pod/pod_controller.go
@@ -316,25 +316,24 @@ func (p *Pod) RestorePodSetsInfo(_ []podset.PodSetInfo) bool {
// Finished means whether the job is completed/failed or not,
// condition represents the workload finished condition.
-func (p *Pod) Finished() (metav1.Condition, bool) {
- finished := true
-
- condition := metav1.Condition{
- Type: kueue.WorkloadFinished,
- Status: metav1.ConditionTrue,
- Reason: "JobFinished",
- Message: "Job finished successfully",
- }
+func (p *Pod) Finished() (message string, success, finished bool) {
+ finished = true
+ success = true
if !p.isGroup {
ph := p.pod.Status.Phase
finished = ph == corev1.PodSucceeded || ph == corev1.PodFailed
if ph == corev1.PodFailed {
- condition.Message = "Job failed"
+ message = p.pod.Status.Message
+ success = false
+ }
+
+ if ph == corev1.PodSucceeded {
+ message = p.pod.Status.Message
}
- return condition, finished
+ return message, success, finished
}
isActive := false
succeededCount := 0
@@ -342,7 +341,8 @@ func (p *Pod) Finished() (metav1.Condition, bool) {
groupTotalCount, err := p.groupTotalCount()
if err != nil {
ctrl.Log.V(2).Error(err, "failed to check if pod group is finished")
- return metav1.Condition{}, false
+ message = "failed to check if pod group is finished"
+ return message, success, false
}
for _, pod := range p.list.Items {
if pod.Status.Phase == corev1.PodSucceeded {
@@ -357,12 +357,12 @@ func (p *Pod) Finished() (metav1.Condition, bool) {
unretriableGroup := p.isUnretriableGroup()
if succeededCount == groupTotalCount || (!isActive && unretriableGroup) {
- condition.Message = fmt.Sprintf("Pods succeeded: %d/%d.", succeededCount, groupTotalCount)
+ message = fmt.Sprintf("Pods succeeded: %d/%d.", succeededCount, groupTotalCount)
} else {
- return metav1.Condition{}, false
+ return message, success, false
}
- return condition, finished
+ return message, success, finished
}
// PodSets will build workload podSets corresponding to the job.
diff --git a/pkg/controller/jobs/pod/pod_controller_test.go b/pkg/controller/jobs/pod/pod_controller_test.go
index ea08fd4174..5214f07b06 100644
--- a/pkg/controller/jobs/pod/pod_controller_test.go
+++ b/pkg/controller/jobs/pod/pod_controller_test.go
@@ -364,11 +364,13 @@ func TestReconciler(t *testing.T) {
Label("kueue.x-k8s.io/managed", "true").
KueueFinalizer().
StatusPhase(corev1.PodSucceeded).
+ StatusMessage("Job finished successfully").
Obj()},
wantPods: []corev1.Pod{*basePodWrapper.
Clone().
Label("kueue.x-k8s.io/managed", "true").
StatusPhase(corev1.PodSucceeded).
+ StatusMessage("Job finished successfully").
Obj()},
workloads: []kueue.Workload{
*utiltesting.MakeWorkload("unit-test", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
@@ -387,7 +389,7 @@ func TestReconciler(t *testing.T) {
Condition(metav1.Condition{
Type: "Finished",
Status: "True",
- Reason: "JobFinished",
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
Message: "Job finished successfully",
}).
Obj(),
@@ -416,11 +418,13 @@ func TestReconciler(t *testing.T) {
Clone().
Label("kueue.x-k8s.io/managed", "true").
StatusPhase(corev1.PodSucceeded).
+ StatusMessage("Job finished successfully").
Obj()},
wantPods: []corev1.Pod{*basePodWrapper.
Clone().
Label("kueue.x-k8s.io/managed", "true").
StatusPhase(corev1.PodSucceeded).
+ StatusMessage("Job finished successfully").
Obj()},
workloads: []kueue.Workload{
*utiltesting.MakeWorkload("unit-test", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
@@ -439,7 +443,7 @@ func TestReconciler(t *testing.T) {
Condition(metav1.Condition{
Type: "Finished",
Status: "True",
- Reason: "JobFinished",
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
Message: "Job finished successfully",
}).
Obj(),
@@ -906,7 +910,7 @@ func TestReconciler(t *testing.T) {
Condition(metav1.Condition{
Type: "Finished",
Status: "True",
- Reason: "JobFinished",
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
Message: "Pods succeeded: 2/2.",
}).
Obj(),
@@ -1074,7 +1078,7 @@ func TestReconciler(t *testing.T) {
Condition(metav1.Condition{
Type: "Finished",
Status: "True",
- Reason: "JobFinished",
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
Message: "Pods succeeded: 1/2. Pods failed: 1/2",
}).
Obj(),
@@ -1093,7 +1097,7 @@ func TestReconciler(t *testing.T) {
Condition(metav1.Condition{
Type: "Finished",
Status: "True",
- Reason: "JobFinished",
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
Message: "Pods succeeded: 1/2. Pods failed: 1/2",
}).
Obj(),
@@ -1497,7 +1501,7 @@ func TestReconciler(t *testing.T) {
Condition(metav1.Condition{
Type: "Finished",
Status: "True",
- Reason: "JobFinished",
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
Message: "Pods succeeded: 2/2.",
}).
Obj(),
@@ -2117,7 +2121,7 @@ func TestReconciler(t *testing.T) {
metav1.Condition{
Type: kueue.WorkloadFinished,
Status: metav1.ConditionTrue,
- Reason: "JobFinished",
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
Message: "Pods succeeded: 1/3.",
},
).
@@ -3614,6 +3618,7 @@ func TestReconciler_ErrorFinalizingPod(t *testing.T) {
Label("kueue.x-k8s.io/managed", "true").
KueueFinalizer().
StatusPhase(corev1.PodSucceeded).
+ StatusMessage("Job finished successfully").
Obj()
wl := *utiltesting.MakeWorkload("unit-test", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
@@ -3684,6 +3689,7 @@ func TestReconciler_ErrorFinalizingPod(t *testing.T) {
Clone().
Label("kueue.x-k8s.io/managed", "true").
StatusPhase(corev1.PodSucceeded).
+ StatusMessage("Job finished successfully").
Obj()
if diff := cmp.Diff(wantPod, gotPod, podCmpOpts...); diff != "" {
t.Errorf("Pod after second reconcile (-want,+got):\n%s", diff)
@@ -3704,7 +3710,7 @@ func TestReconciler_ErrorFinalizingPod(t *testing.T) {
metav1.Condition{
Type: kueue.WorkloadFinished,
Status: metav1.ConditionTrue,
- Reason: "JobFinished",
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
Message: "Job finished successfully",
},
).
diff --git a/pkg/controller/jobs/raycluster/raycluster_controller.go b/pkg/controller/jobs/raycluster/raycluster_controller.go
index 2a44cc8e17..a3f74f1abf 100644
--- a/pkg/controller/jobs/raycluster/raycluster_controller.go
+++ b/pkg/controller/jobs/raycluster/raycluster_controller.go
@@ -160,16 +160,13 @@ func (j *RayCluster) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool {
return changed
}
-func (j *RayCluster) Finished() (metav1.Condition, bool) {
- condition := metav1.Condition{
- Type: kueue.WorkloadFinished,
- Status: metav1.ConditionFalse,
- Reason: string(j.Status.State),
- Message: j.Status.Reason,
- ObservedGeneration: j.Generation,
+func (j *RayCluster) Finished() (message string, success, finished bool) {
+ success = true
+ if j.Status.State == rayv1.Failed {
+ success = false
}
// Technically a RayCluster is never "finished"
- return condition, false
+ return j.Status.Reason, success, false
}
func (j *RayCluster) PodsReady() bool {
diff --git a/pkg/controller/jobs/rayjob/rayjob_controller.go b/pkg/controller/jobs/rayjob/rayjob_controller.go
index 7037f0fb94..cce1dc0cb8 100644
--- a/pkg/controller/jobs/rayjob/rayjob_controller.go
+++ b/pkg/controller/jobs/rayjob/rayjob_controller.go
@@ -160,16 +160,14 @@ func (j *RayJob) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool {
return changed
}
-func (j *RayJob) Finished() (metav1.Condition, bool) {
- condition := metav1.Condition{
- Type: kueue.WorkloadFinished,
- Status: metav1.ConditionTrue,
- Reason: string(j.Status.JobStatus),
- Message: j.Status.Message,
- // ObservedGeneration is added via Update status by the job framework
+func (j *RayJob) Finished() (message string, success, finished bool) {
+ success = true
+ if j.Status.JobStatus == rayv1.JobStatusFailed {
+ success = false
}
-
- return condition, j.Status.JobStatus == rayv1.JobStatusFailed || j.Status.JobStatus == rayv1.JobStatusSucceeded
+ message = j.Status.Message
+ finished = j.Status.JobStatus == rayv1.JobStatusFailed || j.Status.JobStatus == rayv1.JobStatusSucceeded
+ return message, success, finished
}
func (j *RayJob) PodsReady() bool {
diff --git a/pkg/util/testingjobs/pod/wrappers.go b/pkg/util/testingjobs/pod/wrappers.go
index a6e95ff337..4471d6d7d1 100644
--- a/pkg/util/testingjobs/pod/wrappers.go
+++ b/pkg/util/testingjobs/pod/wrappers.go
@@ -204,6 +204,12 @@ func (p *PodWrapper) StatusPhase(ph corev1.PodPhase) *PodWrapper {
return p
}
+// StatusMessage updates status message of the Pod.
+func (p *PodWrapper) StatusMessage(msg string) *PodWrapper {
+ p.Pod.Status.Message = msg
+ return p
+}
+
// CreationTimestamp sets a creation timestamp for the pod object
func (p *PodWrapper) CreationTimestamp(t time.Time) *PodWrapper {
timestamp := metav1.NewTime(t).Rfc3339Copy()
diff --git a/test/e2e/multikueue/e2e_test.go b/test/e2e/multikueue/e2e_test.go
index e24fc09f03..550c030a12 100644
--- a/test/e2e/multikueue/e2e_test.go
+++ b/test/e2e/multikueue/e2e_test.go
@@ -218,11 +218,10 @@ var _ = ginkgo.Describe("MultiKueue", func() {
g.Expect(k8sManagerClient.Get(ctx, wlLookupKey, createdLeaderWorkload)).To(gomega.Succeed())
g.Expect(apimeta.FindStatusCondition(createdLeaderWorkload.Status.Conditions, kueue.WorkloadFinished)).To(gomega.BeComparableTo(&metav1.Condition{
- Type: kueue.WorkloadFinished,
- Status: metav1.ConditionTrue,
- Reason: "JobFinished",
- Message: `Job finished successfully`,
- }, util.IgnoreConditionTimestampsAndObservedGeneration))
+ Type: kueue.WorkloadFinished,
+ Status: metav1.ConditionTrue,
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
+ }, util.IgnoreConditionMessage, util.IgnoreConditionTimestampsAndObservedGeneration))
}, util.LongTimeout, util.Interval).Should(gomega.Succeed())
})
@@ -314,8 +313,8 @@ var _ = ginkgo.Describe("MultiKueue", func() {
g.Expect(apimeta.FindStatusCondition(createdLeaderWorkload.Status.Conditions, kueue.WorkloadFinished)).To(gomega.BeComparableTo(&metav1.Condition{
Type: kueue.WorkloadFinished,
Status: metav1.ConditionTrue,
- Reason: "JobSetFinished",
- Message: "JobSet finished successfully",
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
+ Message: "jobset completed successfully",
}, util.IgnoreConditionTimestampsAndObservedGeneration))
}, util.LongTimeout, util.Interval).Should(gomega.Succeed())
})
diff --git a/test/e2e/singlecluster/e2e_test.go b/test/e2e/singlecluster/e2e_test.go
index 26af1cfc8b..029a53551e 100644
--- a/test/e2e/singlecluster/e2e_test.go
+++ b/test/e2e/singlecluster/e2e_test.go
@@ -207,7 +207,7 @@ var _ = ginkgo.Describe("Kueue", func() {
gomega.BeComparableTo(metav1.Condition{
Type: kueue.WorkloadFinished,
Status: metav1.ConditionTrue,
- Reason: "JobFinished",
+ Reason: kueue.WorkloadFinishedReasonFailed,
}, util.IgnoreConditionMessage, util.IgnoreConditionTimestampsAndObservedGeneration)))
}, util.LongTimeout, util.Interval).Should(gomega.Succeed())
})
diff --git a/test/e2e/singlecluster/jobset_test.go b/test/e2e/singlecluster/jobset_test.go
index aed06ee6fd..59197fa358 100644
--- a/test/e2e/singlecluster/jobset_test.go
+++ b/test/e2e/singlecluster/jobset_test.go
@@ -23,6 +23,7 @@ import (
apimeta "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
+ "sigs.k8s.io/jobset/pkg/constants"
kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
workloadjobset "sigs.k8s.io/kueue/pkg/controller/jobs/jobset"
@@ -33,7 +34,7 @@ import (
// +kubebuilder:docs-gen:collapse=Imports
-var _ = ginkgo.Describe("Kueue", func() {
+var _ = ginkgo.Describe("Jobset", func() {
var ns *corev1.Namespace
ginkgo.BeforeEach(func() {
@@ -104,8 +105,8 @@ var _ = ginkgo.Describe("Kueue", func() {
g.Expect(apimeta.FindStatusCondition(createdLeaderWorkload.Status.Conditions, kueue.WorkloadFinished)).To(gomega.BeComparableTo(&metav1.Condition{
Type: kueue.WorkloadFinished,
Status: metav1.ConditionTrue,
- Reason: "JobSetFinished",
- Message: "JobSet finished successfully",
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
+ Message: constants.AllJobsCompletedMessage,
}, util.IgnoreConditionTimestampsAndObservedGeneration))
}, util.LongTimeout, util.Interval).Should(gomega.Succeed())
})
diff --git a/test/integration/controller/core/workload_controller_test.go b/test/integration/controller/core/workload_controller_test.go
index e8cdf361b1..046efb6226 100644
--- a/test/integration/controller/core/workload_controller_test.go
+++ b/test/integration/controller/core/workload_controller_test.go
@@ -291,7 +291,7 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn
}, util.Timeout, util.Interval).Should(gomega.BeComparableTo(&metav1.Condition{
Type: kueue.WorkloadFinished,
Status: metav1.ConditionTrue,
- Reason: "AdmissionChecksRejected",
+ Reason: kueue.WorkloadFinishedReasonAdmissionChecksRejected,
Message: "Admission checks [check1] are rejected",
}, util.IgnoreConditionTimestampsAndObservedGeneration))
@@ -390,7 +390,7 @@ var _ = ginkgo.Describe("Workload controller", ginkgo.Ordered, ginkgo.ContinueOn
gomega.BeComparableTo(metav1.Condition{
Type: kueue.WorkloadFinished,
Status: metav1.ConditionTrue,
- Reason: "AdmissionChecksRejected",
+ Reason: kueue.WorkloadFinishedReasonAdmissionChecksRejected,
Message: "Admission checks [check1] are rejected",
}, util.IgnoreConditionTimestampsAndObservedGeneration),
gomega.BeComparableTo(metav1.Condition{
diff --git a/test/integration/controller/jobs/job/job_controller_test.go b/test/integration/controller/jobs/job/job_controller_test.go
index bbef383ba8..417a902da0 100644
--- a/test/integration/controller/jobs/job/job_controller_test.go
+++ b/test/integration/controller/jobs/job/job_controller_test.go
@@ -504,7 +504,7 @@ var _ = ginkgo.Describe("Job controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
LastProbeTime: metav1.Now(),
LastTransitionTime: metav1.Now(),
Reason: "ByTest",
- Message: "by test",
+ Message: "Job finished successfully",
},
}
g.Expect(k8sClient.Status().Update(ctx, &createdJob)).To(gomega.Succeed())
@@ -519,7 +519,7 @@ var _ = ginkgo.Describe("Job controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
gomega.BeComparableTo(metav1.Condition{
Type: kueue.WorkloadFinished,
Status: metav1.ConditionTrue,
- Reason: "JobFinished",
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
Message: "Job finished successfully",
}, util.IgnoreConditionTimestampsAndObservedGeneration)))
}, util.Timeout, util.Interval).Should(gomega.Succeed())
diff --git a/test/integration/multikueue/multikueue_test.go b/test/integration/multikueue/multikueue_test.go
index 980620202f..9e558543d2 100644
--- a/test/integration/multikueue/multikueue_test.go
+++ b/test/integration/multikueue/multikueue_test.go
@@ -374,6 +374,7 @@ var _ = ginkgo.Describe("Multikueue", func() {
Status: corev1.ConditionTrue,
LastProbeTime: metav1.Now(),
LastTransitionTime: metav1.Now(),
+ Message: "Job finished successfully",
})
g.Expect(worker1TestCluster.client.Status().Update(worker1TestCluster.ctx, &createdJob)).To(gomega.Succeed())
}, util.Timeout, util.Interval).Should(gomega.Succeed())
@@ -384,8 +385,8 @@ var _ = ginkgo.Describe("Multikueue", func() {
g.Expect(apimeta.FindStatusCondition(createdWorkload.Status.Conditions, kueue.WorkloadFinished)).To(gomega.BeComparableTo(&metav1.Condition{
Type: kueue.WorkloadFinished,
Status: metav1.ConditionTrue,
- Reason: "JobFinished",
- Message: `Job finished successfully`,
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
+ Message: "Job finished successfully",
}, util.IgnoreConditionTimestampsAndObservedGeneration))
}, util.LongTimeout, util.Interval).Should(gomega.Succeed())
@@ -495,7 +496,7 @@ var _ = ginkgo.Describe("Multikueue", func() {
Type: string(jobset.JobSetCompleted),
Status: metav1.ConditionTrue,
Reason: "ByTest",
- Message: "by test",
+ Message: "JobSet finished successfully",
})
g.Expect(worker2TestCluster.client.Status().Update(worker2TestCluster.ctx, &createdJobSet)).To(gomega.Succeed())
}, util.Timeout, util.Interval).Should(gomega.Succeed())
@@ -506,8 +507,8 @@ var _ = ginkgo.Describe("Multikueue", func() {
g.Expect(apimeta.FindStatusCondition(createdWorkload.Status.Conditions, kueue.WorkloadFinished)).To(gomega.BeComparableTo(&metav1.Condition{
Type: kueue.WorkloadFinished,
Status: metav1.ConditionTrue,
- Reason: "JobSetFinished",
- Message: `JobSet finished successfully`,
+ Reason: kueue.WorkloadFinishedReasonSucceeded,
+ Message: "JobSet finished successfully",
}, util.IgnoreConditionTimestampsAndObservedGeneration))
}, util.LongTimeout, util.Interval).Should(gomega.Succeed())
From 0cdb8fda4bdf07d6a702cf8160acd71e2a07053b Mon Sep 17 00:00:00 2001
From: Traian Schiau <55734665+trasc@users.noreply.github.com>
Date: Fri, 26 Apr 2024 18:03:38 +0300
Subject: [PATCH 41/49] [sclability] Relax the expectations. (#2067)
* [sclability] Relax the expectations.
- Extend the admission time for large workloads
- Only check the average mCPU usage of minimalkueue (not sys and user
times)
- Extend the runner's timeout to 8 min
* Second iteration:
- Revert the timeout extension, was not needed.
- Bring back the wallMs check.
- Extend the medium WL admission time.
---
.../scheduler/checker/checker_test.go | 11 ++++-------
.../scheduler/default_rangespec.yaml | 19 ++++++++-----------
2 files changed, 12 insertions(+), 18 deletions(-)
diff --git a/test/performance/scheduler/checker/checker_test.go b/test/performance/scheduler/checker/checker_test.go
index 86feffc9f3..d870170cc5 100644
--- a/test/performance/scheduler/checker/checker_test.go
+++ b/test/performance/scheduler/checker/checker_test.go
@@ -36,8 +36,7 @@ var (
type RangeSpec struct {
Cmd struct {
MaxWallMs int64 `json:"maxWallMs"`
- MaxUserMs int64 `json:"maxUserMs"`
- MaxSysMs int64 `json:"maxSysMs"`
+ MCPU int64 `json:"mCPU"`
Maxrss uint64 `json:"maxrss"`
} `json:"cmd"`
ClusterQueueClassesMinUsage map[string]float64 `json:"clusterQueueClassesMinUsage"`
@@ -83,11 +82,9 @@ func TestScalability(t *testing.T) {
if cmdStats.WallMs > rangeSpec.Cmd.MaxWallMs {
t.Errorf("Wall time %dms is greater than maximum expected %dms", cmdStats.WallMs, rangeSpec.Cmd.MaxWallMs)
}
- if cmdStats.UserMs > rangeSpec.Cmd.MaxUserMs {
- t.Errorf("User time %dms is greater than maximum expected %dms", cmdStats.UserMs, rangeSpec.Cmd.MaxUserMs)
- }
- if cmdStats.SysMs > rangeSpec.Cmd.MaxSysMs {
- t.Errorf("Sys time %dms is greater than maximum expected %dms", cmdStats.SysMs, rangeSpec.Cmd.MaxSysMs)
+ mCPUUsed := (cmdStats.SysMs + cmdStats.UserMs) * 1000 / cmdStats.WallMs
+ if mCPUUsed > rangeSpec.Cmd.MCPU {
+ t.Errorf("Average CPU usage %dmCpu is greater than maximum expected %dmCPU", mCPUUsed, rangeSpec.Cmd.MCPU)
}
if cmdStats.Maxrss > int64(rangeSpec.Cmd.Maxrss) {
t.Errorf("Maxrss %dKib is greater than maximum expected %dKib", cmdStats.Maxrss, rangeSpec.Cmd.Maxrss)
diff --git a/test/performance/scheduler/default_rangespec.yaml b/test/performance/scheduler/default_rangespec.yaml
index 018c9472c7..ae8776f32e 100644
--- a/test/performance/scheduler/default_rangespec.yaml
+++ b/test/performance/scheduler/default_rangespec.yaml
@@ -5,14 +5,11 @@
# - #1782772615836864512
# - #1782775995984515072
cmd:
- # Average value 351116.4 (+/- 0.9%), setting at +5%
- maxWallMs: 368_000
+ # Average value 351116.4 (+/- 0.9%), setting at +20%
+ maxWallMs: 425_000
- # Average value 111500 (+/- 14%), setting at +20%
- maxUserMs: 134_000
-
- # Average value 27875 (+/- 16%), setting at +20%
- maxSysMs: 34_000
+ # Average value 396 mCPU (+/- 8%), setting at +25%
+ mCPU: 500
# Average value 445012 (+/- 0.3%), setting at +5%
maxrss: 468_000
@@ -22,11 +19,11 @@ clusterQueueClassesMinUsage:
cq: 56 #%
wlClassesMaxAvgTimeToAdmissionMs:
- # Average value 6666 (+/- 14%), setting at +20%
- large: 8_000
+ # Average value 6666 (+/- 14%), setting at +35%
+ large: 9_000
- # Average value 76768 (+/- 2%), setting at +5%
- medium: 81_000
+ # Average value 76768 (+/- 2%), setting at +20%
+ medium: 90_000
# Average value 215468 (+/- 2%), setting at +5%
small: 227_000
From 6516ada7611161b9d2193b6e910fb1909f90cbac Mon Sep 17 00:00:00 2001
From: Mykhailo Bobrovskyi
Date: Fri, 26 Apr 2024 19:55:57 +0300
Subject: [PATCH 42/49] Using patch/apply on update pods. (#2074)
---
pkg/controller/jobs/pod/pod_controller.go | 32 +++++++-------
.../jobs/pod/pod_controller_test.go | 10 ++---
pkg/util/client/client.go | 43 +++++++++++++++++++
3 files changed, 63 insertions(+), 22 deletions(-)
create mode 100644 pkg/util/client/client.go
diff --git a/pkg/controller/jobs/pod/pod_controller.go b/pkg/controller/jobs/pod/pod_controller.go
index 764e6402c7..12ece98947 100644
--- a/pkg/controller/jobs/pod/pod_controller.go
+++ b/pkg/controller/jobs/pod/pod_controller.go
@@ -52,6 +52,7 @@ import (
"sigs.k8s.io/kueue/pkg/controller/jobframework"
"sigs.k8s.io/kueue/pkg/podset"
"sigs.k8s.io/kueue/pkg/util/admissioncheck"
+ clientutil "sigs.k8s.io/kueue/pkg/util/client"
"sigs.k8s.io/kueue/pkg/util/kubeversion"
"sigs.k8s.io/kueue/pkg/util/maps"
"sigs.k8s.io/kueue/pkg/util/parallelize"
@@ -240,6 +241,8 @@ func (p *Pod) Run(ctx context.Context, c client.Client, podSetsInfo []podset.Pod
return fmt.Errorf("%w: expecting 1 pod set got %d", podset.ErrInvalidPodsetInfo, len(podSetsInfo))
}
+ podOriginal := p.pod.DeepCopy()
+
if ungated := ungatePod(&p.pod); !ungated {
return nil
}
@@ -248,8 +251,7 @@ func (p *Pod) Run(ctx context.Context, c client.Client, podSetsInfo []podset.Pod
return err
}
- err := c.Update(ctx, &p.pod)
- if err != nil {
+ if err := clientutil.Patch(ctx, c, podOriginal, &p.pod); err != nil {
return err
}
if recorder != nil {
@@ -258,21 +260,14 @@ func (p *Pod) Run(ctx context.Context, c client.Client, podSetsInfo []podset.Pod
return nil
}
- var podsToUngate []*corev1.Pod
-
- for i := range p.list.Items {
+ return parallelize.Until(ctx, len(p.list.Items), func(i int) error {
pod := &p.list.Items[i]
+ podOriginal := pod.DeepCopy()
+
if ungated := ungatePod(pod); !ungated {
- continue
+ return nil
}
- podsToUngate = append(podsToUngate, pod)
- }
- if len(podsToUngate) == 0 {
- return nil
- }
- return parallelize.Until(ctx, len(podsToUngate), func(i int) error {
- pod := podsToUngate[i]
roleHash, err := getRoleHash(*pod)
if err != nil {
return err
@@ -291,7 +286,7 @@ func (p *Pod) Run(ctx context.Context, c client.Client, podSetsInfo []podset.Pod
}
log.V(3).Info("Starting pod in group", "podInGroup", klog.KObj(pod))
- if err := c.Update(ctx, pod); err != nil {
+ if err := clientutil.Patch(ctx, c, podOriginal, pod); err != nil {
return err
}
if recorder != nil {
@@ -514,8 +509,9 @@ func (p *Pod) Finalize(ctx context.Context, c client.Client) error {
return parallelize.Until(ctx, len(podsInGroup.Items), func(i int) error {
pod := &podsInGroup.Items[i]
+ podOriginal := pod.DeepCopy()
if controllerutil.RemoveFinalizer(pod, PodFinalizer) {
- return c.Update(ctx, pod)
+ return clientutil.Patch(ctx, c, podOriginal, pod)
}
return nil
})
@@ -823,9 +819,10 @@ func (p *Pod) removeExcessPods(ctx context.Context, c client.Client, r record.Ev
// Finalize and delete the active pods created last
err := parallelize.Until(ctx, len(extraPods), func(i int) error {
pod := extraPods[i]
+ podOriginal := pod.DeepCopy()
if controllerutil.RemoveFinalizer(&pod, PodFinalizer) {
log.V(3).Info("Finalizing excess pod in group", "excessPod", klog.KObj(&pod))
- if err := c.Update(ctx, &pod); err != nil {
+ if err := clientutil.Patch(ctx, c, podOriginal, &pod); err != nil {
// We won't observe this cleanup in the event handler.
p.excessPodExpectations.ObservedUID(log, p.key, pod.UID)
return err
@@ -861,9 +858,10 @@ func (p *Pod) finalizePods(ctx context.Context, c client.Client, extraPods []cor
err := parallelize.Until(ctx, len(extraPods), func(i int) error {
pod := extraPods[i]
+ podOriginal := pod.DeepCopy()
if controllerutil.RemoveFinalizer(&pod, PodFinalizer) {
log.V(3).Info("Finalizing pod in group", "Pod", klog.KObj(&pod))
- if err := c.Update(ctx, &pod); err != nil {
+ if err := clientutil.Patch(ctx, c, podOriginal, &pod); err != nil {
// We won't observe this cleanup in the event handler.
p.excessPodExpectations.ObservedUID(log, p.key, pod.UID)
return err
diff --git a/pkg/controller/jobs/pod/pod_controller_test.go b/pkg/controller/jobs/pod/pod_controller_test.go
index 5214f07b06..fa829d2de5 100644
--- a/pkg/controller/jobs/pod/pod_controller_test.go
+++ b/pkg/controller/jobs/pod/pod_controller_test.go
@@ -3634,7 +3634,7 @@ func TestReconciler_ErrorFinalizingPod(t *testing.T) {
WithObjects(&pod).
WithStatusSubresource(&wl).
WithInterceptorFuncs(interceptor.Funcs{
- Update: func(ctx context.Context, client client.WithWatch, obj client.Object, opts ...client.UpdateOption) error {
+ Patch: func(ctx context.Context, client client.WithWatch, obj client.Object, patch client.Patch, opts ...client.PatchOption) error {
_, isPod := obj.(*corev1.Pod)
if isPod {
defer func() { reqcount++ }()
@@ -3644,10 +3644,10 @@ func TestReconciler_ErrorFinalizingPod(t *testing.T) {
}
if reqcount == 1 {
// Exec a regular update operation for the second request
- return client.Update(ctx, obj, opts...)
+ return client.Patch(ctx, obj, patch, opts...)
}
}
- return client.Update(ctx, obj, opts...)
+ return client.Patch(ctx, obj, patch, opts...)
},
SubResourcePatch: utiltesting.TreatSSAAsStrategicMerge,
})
@@ -3859,11 +3859,11 @@ func TestReconciler_DeletePodAfterTransientErrorsOnUpdateOrDeleteOps(t *testing.
kcBuilder := clientBuilder.
WithStatusSubresource(&wl).
WithInterceptorFuncs(interceptor.Funcs{
- Update: func(ctx context.Context, client client.WithWatch, obj client.Object, opts ...client.UpdateOption) error {
+ Patch: func(ctx context.Context, client client.WithWatch, obj client.Object, patch client.Patch, opts ...client.PatchOption) error {
if triggerUpdateErr {
return connRefusedErrMock
}
- return client.Update(ctx, obj, opts...)
+ return client.Patch(ctx, obj, patch, opts...)
},
Delete: func(ctx context.Context, client client.WithWatch, obj client.Object, opts ...client.DeleteOption) error {
if triggerDeleteErr {
diff --git a/pkg/util/client/client.go b/pkg/util/client/client.go
new file mode 100644
index 0000000000..d7192e7f84
--- /dev/null
+++ b/pkg/util/client/client.go
@@ -0,0 +1,43 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package client
+
+import (
+ "context"
+
+ "sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+func CreatePatch(before, after client.Object) (client.Patch, error) {
+ patchBase := client.MergeFrom(before)
+ patchBytes, err := patchBase.Data(after)
+ if err != nil {
+ return nil, err
+ }
+ return client.RawPatch(patchBase.Type(), patchBytes), nil
+}
+
+func Patch(ctx context.Context, c client.Client, before, after client.Object) error {
+ patch, err := CreatePatch(before, after)
+ if err != nil {
+ return err
+ }
+ if err = c.Patch(ctx, before, patch); err != nil {
+ return err
+ }
+ return nil
+}
From 44b3055ad9369e1814782888e46388023a4517c1 Mon Sep 17 00:00:00 2001
From: David Grove
Date: Fri, 26 Apr 2024 13:30:41 -0400
Subject: [PATCH 43/49] Add support for registering externally managed
frameworks (#2059)
* support for externally managed frameworks
* address review comments
* make generate-apiref to pick up changes in config API
* second round of review comments
* allow RegisterExternalJobType to be called multiple times with same kind
The operation of parsing a kindArg into a GVK is deterministic. Since
unit tests run multiple times, flagging these as errors will result
in spurious failures in main_test's TestValidateIntegrationsName.
* remove unnecessary WithExternalFrameworks option
---
apis/config/v1beta1/configuration_types.go | 3 +
apis/config/v1beta1/zz_generated.deepcopy.go | 5 +
cmd/kueue/main.go | 22 ++++-
cmd/kueue/main_test.go | 27 +++++-
.../manager/controller_manager_config.yaml | 2 +
.../jobframework/integrationmanager.go | 60 ++++++++++--
.../jobframework/integrationmanager_test.go | 94 ++++++++++++++++---
.../en/docs/reference/kueue-config.v1beta1.md | 8 ++
8 files changed, 195 insertions(+), 26 deletions(-)
diff --git a/apis/config/v1beta1/configuration_types.go b/apis/config/v1beta1/configuration_types.go
index 3c8d27939e..90c4059e37 100644
--- a/apis/config/v1beta1/configuration_types.go
+++ b/apis/config/v1beta1/configuration_types.go
@@ -307,6 +307,9 @@ type Integrations struct {
// - "kubeflow.org/xgboostjob"
// - "pod"
Frameworks []string `json:"frameworks,omitempty"`
+ // List of GroupVersionKinds that are managed for Kueue by external controllers;
+ // the expected format is `Kind.version.group.com`.
+ ExternalFrameworks []string `json:"externalFrameworks,omitempty"`
// PodOptions defines kueue controller behaviour for pod objects
PodOptions *PodIntegrationOptions `json:"podOptions,omitempty"`
diff --git a/apis/config/v1beta1/zz_generated.deepcopy.go b/apis/config/v1beta1/zz_generated.deepcopy.go
index 1a19f9cc08..8749fcd8af 100644
--- a/apis/config/v1beta1/zz_generated.deepcopy.go
+++ b/apis/config/v1beta1/zz_generated.deepcopy.go
@@ -240,6 +240,11 @@ func (in *Integrations) DeepCopyInto(out *Integrations) {
*out = make([]string, len(*in))
copy(*out, *in)
}
+ if in.ExternalFrameworks != nil {
+ in, out := &in.ExternalFrameworks, &out.ExternalFrameworks
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
if in.PodOptions != nil {
in, out := &in.PodOptions, &out.PodOptions
*out = new(PodIntegrationOptions)
diff --git a/cmd/kueue/main.go b/cmd/kueue/main.go
index 4b0bd26bdd..fb9ef9671b 100644
--- a/cmd/kueue/main.go
+++ b/cmd/kueue/main.go
@@ -32,7 +32,9 @@ import (
corev1 "k8s.io/api/core/v1"
schedulingv1 "k8s.io/api/scheduling/v1"
"k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/schema"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+ "k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/validation/field"
utilfeature "k8s.io/apiserver/pkg/util/feature"
autoscaling "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1"
@@ -41,6 +43,7 @@ import (
"k8s.io/client-go/rest"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client/apiutil"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
@@ -363,13 +366,30 @@ func apply(configFile string) (ctrl.Options, configapi.Configuration, error) {
if cfg.Integrations != nil {
var errorlist field.ErrorList
+ managedKinds := make(sets.Set[string])
availableFrameworks := jobframework.GetIntegrationsList()
path := field.NewPath("integrations", "frameworks")
for _, framework := range cfg.Integrations.Frameworks {
- if _, found := jobframework.GetIntegration(framework); !found {
+ if cb, found := jobframework.GetIntegration(framework); !found {
errorlist = append(errorlist, field.NotSupported(path, framework, availableFrameworks))
+ } else {
+ if gvk, err := apiutil.GVKForObject(cb.JobType, scheme); err == nil {
+ managedKinds = managedKinds.Insert(gvk.String())
+ }
}
}
+
+ path = field.NewPath("integrations", "externalFrameworks")
+ for idx, name := range cfg.Integrations.ExternalFrameworks {
+ if err := jobframework.RegisterExternalJobType(name); err == nil {
+ gvk, _ := schema.ParseKindArg(name)
+ if managedKinds.Has(gvk.String()) {
+ errorlist = append(errorlist, field.Duplicate(path.Index(idx), name))
+ }
+ managedKinds = managedKinds.Insert(gvk.String())
+ }
+ }
+
if len(errorlist) > 0 {
err := errorlist.ToAggregate()
return options, cfg, err
diff --git a/cmd/kueue/main_test.go b/cmd/kueue/main_test.go
index 25d78524a3..882047e4be 100644
--- a/cmd/kueue/main_test.go
+++ b/cmd/kueue/main_test.go
@@ -41,6 +41,8 @@ kind: Configuration
integrations:
frameworks:
- batch/job
+ externalFrameworks:
+ - "Foo.v1.example.com"
`), os.FileMode(0600)); err != nil {
t.Fatal(err)
}
@@ -56,6 +58,19 @@ integrations:
t.Fatal(err)
}
+ badIntegrationsConfig2 := filepath.Join(tmpDir, "badIntegrations2.yaml")
+ if err := os.WriteFile(badIntegrationsConfig2, []byte(`
+apiVersion: config.kueue.x-k8s.io/v1beta1
+kind: Configuration
+integrations:
+ frameworks:
+ - batch/job
+ externalFrameworks:
+ - Job.v1.batch
+`), os.FileMode(0600)); err != nil {
+ t.Fatal(err)
+ }
+
enableDefaultInternalCertManagement := &config.InternalCertManagement{
Enable: ptr.To(true),
WebhookServiceName: ptr.To(config.DefaultWebhookServiceName),
@@ -92,7 +107,8 @@ integrations:
Integrations: &config.Integrations{
// referencing job.FrameworkName ensures the link of job package
// therefore the batch/framework should be registered
- Frameworks: []string{job.FrameworkName},
+ Frameworks: []string{job.FrameworkName},
+ ExternalFrameworks: []string{"Foo.v1.example.com"},
PodOptions: &config.PodIntegrationOptions{
NamespaceSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
@@ -124,6 +140,11 @@ integrations:
configFile: badIntegrationsConfig,
wantError: fmt.Errorf("integrations.frameworks: Unsupported value: \"unregistered/jobframework\": supported values: \"batch/job\", \"jobset.x-k8s.io/jobset\", \"kubeflow.org/mpijob\", \"kubeflow.org/mxjob\", \"kubeflow.org/paddlejob\", \"kubeflow.org/pytorchjob\", \"kubeflow.org/tfjob\", \"kubeflow.org/xgboostjob\", \"pod\", \"ray.io/raycluster\", \"ray.io/rayjob\""),
},
+ {
+ name: "bad integrations config 2",
+ configFile: badIntegrationsConfig2,
+ wantError: fmt.Errorf("integrations.externalFrameworks[0]: Duplicate value: \"Job.v1.batch\""),
+ },
}
for _, tc := range testcases {
@@ -137,7 +158,9 @@ integrations:
t.Errorf("Unexpected config (-want +got):\n%s", diff)
}
} else {
- if diff := cmp.Diff(tc.wantError.Error(), err.Error()); diff != "" {
+ if err == nil {
+ t.Errorf("Failed to get expected error")
+ } else if diff := cmp.Diff(tc.wantError.Error(), err.Error()); diff != "" {
t.Errorf("Unexpected error (-want +got):\n%s", diff)
}
}
diff --git a/config/components/manager/controller_manager_config.yaml b/config/components/manager/controller_manager_config.yaml
index e698ecf762..13e58ac5df 100644
--- a/config/components/manager/controller_manager_config.yaml
+++ b/config/components/manager/controller_manager_config.yaml
@@ -42,6 +42,8 @@ integrations:
- "kubeflow.org/tfjob"
- "kubeflow.org/xgboostjob"
# - "pod"
+# externalFrameworks:
+# - "Foo.v1.example.com"
# podOptions:
# namespaceSelector:
# matchExpressions:
diff --git a/pkg/controller/jobframework/integrationmanager.go b/pkg/controller/jobframework/integrationmanager.go
index a535077fb5..ec1af39992 100644
--- a/pkg/controller/jobframework/integrationmanager.go
+++ b/pkg/controller/jobframework/integrationmanager.go
@@ -24,6 +24,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -33,6 +34,7 @@ import (
var (
errDuplicateFrameworkName = errors.New("duplicate framework name")
errMissingMandatoryField = errors.New("mandatory field missing")
+ errFrameworkNameFormat = errors.New("misformatted external framework name")
)
type JobReconcilerInterface interface {
@@ -66,8 +68,9 @@ type IntegrationCallbacks struct {
}
type integrationManager struct {
- names []string
- integrations map[string]IntegrationCallbacks
+ names []string
+ integrations map[string]IntegrationCallbacks
+ externalIntegrations map[string]runtime.Object
}
var manager integrationManager
@@ -98,6 +101,28 @@ func (m *integrationManager) register(name string, cb IntegrationCallbacks) erro
return nil
}
+func (m *integrationManager) registerExternal(kindArg string) error {
+ if m.externalIntegrations == nil {
+ m.externalIntegrations = make(map[string]runtime.Object)
+ }
+
+ gvk, _ := schema.ParseKindArg(kindArg)
+ if gvk == nil {
+ return fmt.Errorf("%w %q", errFrameworkNameFormat, kindArg)
+ }
+ apiVersion, kind := gvk.ToAPIVersionAndKind()
+ jobType := &metav1.PartialObjectMetadata{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: apiVersion,
+ Kind: kind,
+ },
+ }
+
+ m.externalIntegrations[kindArg] = jobType
+
+ return nil
+}
+
func (m *integrationManager) forEach(f func(name string, cb IntegrationCallbacks) error) error {
for _, name := range m.names {
if err := f(name, m.integrations[name]); err != nil {
@@ -112,6 +137,11 @@ func (m *integrationManager) get(name string) (IntegrationCallbacks, bool) {
return cb, f
}
+func (m *integrationManager) getExternal(kindArg string) (runtime.Object, bool) {
+ jt, f := m.externalIntegrations[kindArg]
+ return jt, f
+}
+
func (m *integrationManager) getList() []string {
ret := make([]string, len(m.names))
copy(ret, m.names)
@@ -119,13 +149,19 @@ func (m *integrationManager) getList() []string {
return ret
}
-func (m *integrationManager) getCallbacksForOwner(ownerRef *metav1.OwnerReference) *IntegrationCallbacks {
- for _, name := range m.names {
- cbs := m.integrations[name]
+func (m *integrationManager) getJobTypeForOwner(ownerRef *metav1.OwnerReference) runtime.Object {
+ for _, cbs := range m.integrations {
if cbs.IsManagingObjectsOwner != nil && cbs.IsManagingObjectsOwner(ownerRef) {
- return &cbs
+ return cbs.JobType
+ }
+ }
+ for _, jt := range m.externalIntegrations {
+ apiVersion, kind := jt.GetObjectKind().GroupVersionKind().ToAPIVersionAndKind()
+ if ownerRef.Kind == kind && ownerRef.APIVersion == apiVersion {
+ return jt
}
}
+
return nil
}
@@ -136,6 +172,12 @@ func RegisterIntegration(name string, cb IntegrationCallbacks) error {
return manager.register(name, cb)
}
+// RegisterExternalJobType registers a new externally-managed Kind, returns an error
+// if kindArg cannot be parsed as a Kind.version.group.
+func RegisterExternalJobType(kindArg string) error {
+ return manager.registerExternal(kindArg)
+}
+
// ForEachIntegration loops through the registered list of frameworks calling f,
// if at any point f returns an error the loop is stopped and that error is returned.
func ForEachIntegration(f func(name string, cb IntegrationCallbacks) error) error {
@@ -156,14 +198,14 @@ func GetIntegrationsList() []string {
// IsOwnerManagedByKueue returns true if the provided owner can be managed by
// kueue.
func IsOwnerManagedByKueue(owner *metav1.OwnerReference) bool {
- return manager.getCallbacksForOwner(owner) != nil
+ return manager.getJobTypeForOwner(owner) != nil
}
// GetEmptyOwnerObject returns an empty object of the owner's type,
// returns nil if the owner is not manageable by kueue.
func GetEmptyOwnerObject(owner *metav1.OwnerReference) client.Object {
- if cbs := manager.getCallbacksForOwner(owner); cbs != nil {
- return cbs.JobType.DeepCopyObject().(client.Object)
+ if jt := manager.getJobTypeForOwner(owner); jt != nil {
+ return jt.DeepCopyObject().(client.Object)
}
return nil
}
diff --git a/pkg/controller/jobframework/integrationmanager_test.go b/pkg/controller/jobframework/integrationmanager_test.go
index 2a3a3ec397..c3f124599b 100644
--- a/pkg/controller/jobframework/integrationmanager_test.go
+++ b/pkg/controller/jobframework/integrationmanager_test.go
@@ -24,9 +24,11 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
+ batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -236,6 +238,58 @@ func compareCallbacks(x, y interface{}) bool {
return reflect.ValueOf(xcb.AddToScheme).Pointer() == reflect.ValueOf(ycb.AddToScheme).Pointer()
}
+func TestRegisterExternal(t *testing.T) {
+ cases := map[string]struct {
+ manager *integrationManager
+ kindArg string
+ wantError error
+ wantGVK *schema.GroupVersionKind
+ }{
+ "successful 1": {
+ manager: &integrationManager{
+ names: []string{"oldFramework"},
+ integrations: map[string]IntegrationCallbacks{
+ "oldFramework": testIntegrationCallbacks,
+ },
+ },
+ kindArg: "Job.v1.batch",
+ wantError: nil,
+ wantGVK: &schema.GroupVersionKind{Group: "batch", Version: "v1", Kind: "Job"},
+ },
+ "successful 2": {
+ manager: &integrationManager{
+ externalIntegrations: map[string]runtime.Object{
+ "Job.v1.batch": &batchv1.Job{TypeMeta: metav1.TypeMeta{Kind: "Job", APIVersion: "batch/v1"}},
+ },
+ },
+ kindArg: "AppWrapper.v1beta2.workload.codeflare.dev",
+ wantError: nil,
+ wantGVK: &schema.GroupVersionKind{Group: "workload.codeflare.dev", Version: "v1beta2", Kind: "AppWrapper"},
+ },
+ "malformed kind arg": {
+ manager: &integrationManager{},
+ kindArg: "batch/job",
+ wantError: errFrameworkNameFormat,
+ wantGVK: nil,
+ },
+ }
+
+ for tcName, tc := range cases {
+ t.Run(tcName, func(t *testing.T) {
+ gotError := tc.manager.registerExternal(tc.kindArg)
+ if diff := cmp.Diff(tc.wantError, gotError, cmpopts.EquateErrors()); diff != "" {
+ t.Errorf("Unexpected error (-want +got):\n%s", diff)
+ }
+ if gotJobType, found := tc.manager.getExternal(tc.kindArg); found {
+ gvk := gotJobType.GetObjectKind().GroupVersionKind()
+ if diff := cmp.Diff(tc.wantGVK, &gvk); diff != "" {
+ t.Errorf("Unexpected jobtypes (-want +got):\n%s", diff)
+ }
+ }
+ })
+ }
+}
+
func TestForEach(t *testing.T) {
foeEachError := errors.New("test error")
cases := map[string]struct {
@@ -286,7 +340,7 @@ func TestForEach(t *testing.T) {
}
}
-func TestGetCallbacksForOwner(t *testing.T) {
+func TestGetJobTypeForOwner(t *testing.T) {
dontManage := IntegrationCallbacks{
NewReconciler: func(client.Client, record.EventRecorder, ...Option) JobReconcilerInterface {
panic("not implemented")
@@ -297,13 +351,18 @@ func TestGetCallbacksForOwner(t *testing.T) {
manageK1 := func() IntegrationCallbacks {
ret := dontManage
ret.IsManagingObjectsOwner = func(owner *metav1.OwnerReference) bool { return owner.Kind == "K1" }
+ ret.JobType = &metav1.PartialObjectMetadata{TypeMeta: metav1.TypeMeta{Kind: "K1"}}
return ret
}()
manageK2 := func() IntegrationCallbacks {
ret := dontManage
ret.IsManagingObjectsOwner = func(owner *metav1.OwnerReference) bool { return owner.Kind == "K2" }
+ ret.JobType = &metav1.PartialObjectMetadata{TypeMeta: metav1.TypeMeta{Kind: "K2"}}
return ret
}()
+ externalK3 := func() runtime.Object {
+ return &metav1.PartialObjectMetadata{TypeMeta: metav1.TypeMeta{Kind: "K3"}}
+ }()
mgr := integrationManager{
names: []string{"manageK1", "dontManage", "manageK2"},
@@ -312,38 +371,45 @@ func TestGetCallbacksForOwner(t *testing.T) {
"manageK1": manageK1,
"manageK2": manageK2,
},
+ externalIntegrations: map[string]runtime.Object{
+ "externalK3": externalK3,
+ },
}
cases := map[string]struct {
- owner *metav1.OwnerReference
- wantCallbacks *IntegrationCallbacks
+ owner *metav1.OwnerReference
+ wantJobType runtime.Object
}{
"K1": {
- owner: &metav1.OwnerReference{Kind: "K1"},
- wantCallbacks: &manageK1,
+ owner: &metav1.OwnerReference{Kind: "K1"},
+ wantJobType: manageK1.JobType,
},
"K2": {
- owner: &metav1.OwnerReference{Kind: "K2"},
- wantCallbacks: &manageK2,
+ owner: &metav1.OwnerReference{Kind: "K2"},
+ wantJobType: manageK2.JobType,
},
"K3": {
- owner: &metav1.OwnerReference{Kind: "K3"},
- wantCallbacks: nil,
+ owner: &metav1.OwnerReference{Kind: "K3"},
+ wantJobType: externalK3,
+ },
+ "K4": {
+ owner: &metav1.OwnerReference{Kind: "K4"},
+ wantJobType: nil,
},
}
for tcName, tc := range cases {
t.Run(tcName, func(t *testing.T) {
- gotCallbacks := mgr.getCallbacksForOwner(tc.owner)
- if tc.wantCallbacks == nil {
- if gotCallbacks != nil {
+ wantJobType := mgr.getJobTypeForOwner(tc.owner)
+ if tc.wantJobType == nil {
+ if wantJobType != nil {
t.Errorf("This owner should be unmanaged")
}
} else {
- if gotCallbacks == nil {
+ if wantJobType == nil {
t.Errorf("This owner should be managed")
} else {
- if diff := cmp.Diff(*tc.wantCallbacks, *gotCallbacks, cmp.FilterValues(func(_, _ interface{}) bool { return true }, cmp.Comparer(compareCallbacks))); diff != "" {
+ if diff := cmp.Diff(tc.wantJobType, wantJobType); diff != "" {
t.Errorf("Unexpected callbacks (-want +got):\n%s", diff)
}
}
diff --git a/site/content/en/docs/reference/kueue-config.v1beta1.md b/site/content/en/docs/reference/kueue-config.v1beta1.md
index 040c2fecf9..da4d406598 100644
--- a/site/content/en/docs/reference/kueue-config.v1beta1.md
+++ b/site/content/en/docs/reference/kueue-config.v1beta1.md
@@ -429,6 +429,14 @@ Possible options:
+
externalFrameworks[Required]
+[]string
+
+
+
List of GroupVersionKinds that are managed for Kueue by external controllers;
+the expected format is Kind.version.group.com.