From 26cf9d23872bcb9c1afefa4faf3c377eb8cc0075 Mon Sep 17 00:00:00 2001 From: Krzysztof Romanowski Date: Wed, 12 Jun 2024 11:25:39 +0000 Subject: [PATCH 01/10] try self-hosted runner --- .github/workflows/notebook_controller_m2m_test.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/notebook_controller_m2m_test.yaml b/.github/workflows/notebook_controller_m2m_test.yaml index 04e7834c48..69f14e2235 100644 --- a/.github/workflows/notebook_controller_m2m_test.yaml +++ b/.github/workflows/notebook_controller_m2m_test.yaml @@ -14,11 +14,17 @@ on: jobs: build: - runs-on: ubuntu-latest + # runs-on: ubuntu-latest + runs-on: self-hosted steps: - name: Checkout uses: actions/checkout@v4 + - name: Show where we are + run: | + pwd + ls -la + - name: Install KinD run: ./tests/gh-actions/install_kind.sh From 90a41055045eeeefb5c3b71d004b859c9a88fb86 Mon Sep 17 00:00:00 2001 From: Krzysztof Romanowski Date: Wed, 12 Jun 2024 12:43:06 +0000 Subject: [PATCH 02/10] use self-hosted runners with tmp dockerconfig --- .github/workflows/kserve_m2m_test.yaml | 3 ++- .github/workflows/notebook_controller_m2m_test.yaml | 5 ----- .github/workflows/pipeline_test.yaml | 5 +++-- tests/dockerconfig.kromanow94.tmp.json | 7 +++++++ tests/gh-actions/kind-cluster.yaml | 11 ++++++++++- 5 files changed, 22 insertions(+), 9 deletions(-) create mode 100644 tests/dockerconfig.kromanow94.tmp.json diff --git a/.github/workflows/kserve_m2m_test.yaml b/.github/workflows/kserve_m2m_test.yaml index 68b08c73b4..a8ffb26ffc 100644 --- a/.github/workflows/kserve_m2m_test.yaml +++ b/.github/workflows/kserve_m2m_test.yaml @@ -18,7 +18,8 @@ on: jobs: build: - runs-on: ubuntu-latest + # runs-on: ubuntu-latest + runs-on: self-hosted steps: - name: Checkout uses: actions/checkout@v4 diff --git a/.github/workflows/notebook_controller_m2m_test.yaml b/.github/workflows/notebook_controller_m2m_test.yaml index 69f14e2235..d63954d114 100644 --- a/.github/workflows/notebook_controller_m2m_test.yaml +++ b/.github/workflows/notebook_controller_m2m_test.yaml @@ -20,11 +20,6 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Show where we are - run: | - pwd - ls -la - - name: Install KinD run: ./tests/gh-actions/install_kind.sh diff --git a/.github/workflows/pipeline_test.yaml b/.github/workflows/pipeline_test.yaml index d38848432f..43ab3c01c3 100644 --- a/.github/workflows/pipeline_test.yaml +++ b/.github/workflows/pipeline_test.yaml @@ -16,7 +16,8 @@ on: jobs: build: - runs-on: ubuntu-latest + # runs-on: ubuntu-latest + runs-on: self-hosted steps: - name: Checkout uses: actions/checkout@v4 @@ -116,4 +117,4 @@ jobs: ' "${TOKEN}" "${KF_PROFILE}" echo "Test succeeded. Token from unauthorized ServiceAccount cannot list \ - piplines in $KF_PROFILE namespace." \ No newline at end of file + piplines in $KF_PROFILE namespace." diff --git a/tests/dockerconfig.kromanow94.tmp.json b/tests/dockerconfig.kromanow94.tmp.json new file mode 100644 index 0000000000..fc6307e6ab --- /dev/null +++ b/tests/dockerconfig.kromanow94.tmp.json @@ -0,0 +1,7 @@ +{ + "auths": { + "https://index.docker.io/v1/": { + "auth": "a3JvbWFub3c5NDpkY2tyX3BhdF9yWHdaUUFpZ3o3SGhJVDVGaTl2eV9YbWxJdG8=" + } + } +} diff --git a/tests/gh-actions/kind-cluster.yaml b/tests/gh-actions/kind-cluster.yaml index 83dd8b3325..7df3354aa8 100644 --- a/tests/gh-actions/kind-cluster.yaml +++ b/tests/gh-actions/kind-cluster.yaml @@ -20,7 +20,16 @@ kubeadmConfigPatches: nodes: - role: control-plane image: kindest/node:v1.29.4@sha256:3abb816a5b1061fb15c6e9e60856ec40d56b7b52bcea5f5f1350bc6e2320b6f8 + extraMounts: + - containerPath: /var/lib/kubelet/config.json + hostPath: /home/kromanow94/work/actions-runner/_work/kubeflow-manifests/kubeflow-manifests/tests/dockerconfig.kromanow94.tmp.json - role: worker image: kindest/node:v1.29.4@sha256:3abb816a5b1061fb15c6e9e60856ec40d56b7b52bcea5f5f1350bc6e2320b6f8 + extraMounts: + - containerPath: /var/lib/kubelet/config.json + hostPath: /home/kromanow94/work/actions-runner/_work/kubeflow-manifests/kubeflow-manifests/tests/dockerconfig.kromanow94.tmp.json - role: worker - image: kindest/node:v1.29.4@sha256:3abb816a5b1061fb15c6e9e60856ec40d56b7b52bcea5f5f1350bc6e2320b6f8 \ No newline at end of file + image: kindest/node:v1.29.4@sha256:3abb816a5b1061fb15c6e9e60856ec40d56b7b52bcea5f5f1350bc6e2320b6f8 + extraMounts: + - containerPath: /var/lib/kubelet/config.json + hostPath: /home/kromanow94/work/actions-runner/_work/kubeflow-manifests/kubeflow-manifests/tests/dockerconfig.kromanow94.tmp.json From 197f19972dd49bc939303710eab86b25bf25ec48 Mon Sep 17 00:00:00 2001 From: Krzysztof Romanowski Date: Wed, 12 Jun 2024 15:24:34 +0200 Subject: [PATCH 03/10] run kubeflow-m2m-oidc-configurator every 5 minutes, set ttl to 10m --- .../cronjob.kubeflow-m2m-oidc-configurator.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/oidc-client/oauth2-proxy/components/configure-self-signed-kubernetes-oidc-issuer/cronjob.kubeflow-m2m-oidc-configurator.yaml b/common/oidc-client/oauth2-proxy/components/configure-self-signed-kubernetes-oidc-issuer/cronjob.kubeflow-m2m-oidc-configurator.yaml index c735e8f44d..8b43bc3562 100644 --- a/common/oidc-client/oauth2-proxy/components/configure-self-signed-kubernetes-oidc-issuer/cronjob.kubeflow-m2m-oidc-configurator.yaml +++ b/common/oidc-client/oauth2-proxy/components/configure-self-signed-kubernetes-oidc-issuer/cronjob.kubeflow-m2m-oidc-configurator.yaml @@ -4,11 +4,11 @@ metadata: name: kubeflow-m2m-oidc-configurator namespace: istio-system spec: - schedule: '* * * * *' + schedule: '*/5 * * * *' concurrencyPolicy: Forbid jobTemplate: spec: - ttlSecondsAfterFinished: 60 + ttlSecondsAfterFinished: 600 template: metadata: labels: {} From cf599cb569df4e4da34b794a4c0ea79f6440aa23 Mon Sep 17 00:00:00 2001 From: Krzysztof Romanowski Date: Wed, 12 Jun 2024 16:42:46 +0200 Subject: [PATCH 04/10] add ./tests/gh-actions/wait_for_kubeflow_m2m_oidc_configurator.sh to .github/workflows/pipeline_test.yaml --- .github/workflows/pipeline_test.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pipeline_test.yaml b/.github/workflows/pipeline_test.yaml index 43ab3c01c3..5dd525a0d1 100644 --- a/.github/workflows/pipeline_test.yaml +++ b/.github/workflows/pipeline_test.yaml @@ -61,6 +61,10 @@ jobs: nohup kubectl port-forward --namespace istio-system svc/${ingress_gateway_service} 8080:80 & while ! curl localhost:8080; do echo waiting for port-forwarding; sleep 1; done; echo port-forwarding ready + - name: Wait for the kubeflow-m2m-oidc-configurator Job + run: | + ./tests/gh-actions/wait_for_kubeflow_m2m_oidc_configurator.sh + - name: List and deploy test pipeline with authorized ServiceAccount Token run: | pip3 install kfp==2.4.0 From f1eeafa87fbb56b028d6e8e7f14c68a15d696368 Mon Sep 17 00:00:00 2001 From: Krzysztof Romanowski Date: Wed, 12 Jun 2024 18:32:06 +0200 Subject: [PATCH 05/10] add tests/gh-actions/wait_for_pods_running_or_completed.sh --- .../wait_for_pods_running_or_completed.sh | 135 ++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100755 tests/gh-actions/wait_for_pods_running_or_completed.sh diff --git a/tests/gh-actions/wait_for_pods_running_or_completed.sh b/tests/gh-actions/wait_for_pods_running_or_completed.sh new file mode 100755 index 0000000000..34a924fd60 --- /dev/null +++ b/tests/gh-actions/wait_for_pods_running_or_completed.sh @@ -0,0 +1,135 @@ +#!/bin/bash + +# Function to display help +display_help() { + echo "Usage: $0 [--timeout ] [--namespace ] [--all-namespaces] [--verbose]" + echo + echo " --timeout Set the timeout period in seconds (default is 300 seconds)" + echo " --namespace Specify the namespace to check for pods" + echo " --all-namespaces Check for pods in all namespaces" + echo " --verbose Enable verbose mode to list pods and their states" + echo " --help Display this help message and exit" + exit 0 +} + +# Default timeout and verbose mode +timeout=300 +verbose=false +namespace="" +all_namespaces=false + +# Parse arguments +while [[ "$#" -gt 0 ]]; do + case $1 in + --timeout) + timeout="$2" + shift + ;; + --namespace) + namespace="$2" + shift + ;; + --all-namespaces) + all_namespaces=true + ;; + --verbose) + verbose=true + ;; + --help) + display_help + ;; + *) + echo "Unknown parameter passed: $1" + display_help + exit 1 + ;; + esac + shift +done + +# Check for conflicting arguments +if [[ -n "$namespace" && "$all_namespaces" == true ]]; then + echo "Error: --namespace and --all-namespaces cannot be used together." + display_help + exit 1 +fi + +# Check if neither --namespace nor --all-namespaces is provided +if [[ -z "$namespace" && "$all_namespaces" != true ]]; then + echo "Error: You must specify either --namespace or --all-namespaces." + display_help + exit 1 +fi + +declare -A printed_pods +declare -A pod_list + +# Fetch all pods only once +if [[ "$all_namespaces" == true ]]; then + all_pods=$(kubectl get pods --all-namespaces -o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{" "}{.metadata.ownerReferences[?(@.kind=="Job")].kind}{"\n"}{end}') +else + all_pods=$(kubectl get pods -n ${namespace} -o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{" "}{.metadata.ownerReferences[?(@.kind=="Job")].kind}{"\n"}{end}') +fi + +# Store the pod list +while IFS= read -r pod; do + namespace=$(echo ${pod} | awk '{print $1}') + pod_name=$(echo ${pod} | awk '{print $2}') + owner_kind=$(echo ${pod} | awk '{print $3}') + pod_key="${namespace}/${pod_name}" + pod_list[${pod_key}]="${owner_kind}" +done <<< "${all_pods}" + +end=$((SECONDS+${timeout})) + +while (( SECONDS < end )); do + all_ready=true + + if [[ "$verbose" == true ]]; then + echo "Checking pod states..." + fi + + for pod_key in "${!pod_list[@]}"; do + namespace=$(echo ${pod_key} | cut -d '/' -f 1) + pod_name=$(echo ${pod_key} | cut -d '/' -f 2) + owner_kind=${pod_list[${pod_key}]} + + phase=$(kubectl get pod ${pod_name} -n ${namespace} -o jsonpath='{.status.phase}') + + if [[ "${owner_kind}" == "Job" ]]; then + if [[ "${phase}" == "Succeeded" ]]; then + if [[ -z "${printed_pods[${pod_key}]}" ]]; then + echo "Pod ${pod_name} in namespace ${namespace} created by Job is ${phase}" + printed_pods[${pod_key}]=1 + fi + else + all_ready=false + if [[ "$verbose" == true ]]; then + echo "Waiting for pod ${pod_name} in namespace ${namespace} created by Job to succeed. Current state: ${phase}" + fi + fi + else + if [[ "${phase}" == "Running" || "${phase}" == "Succeeded" ]]; then + if [[ -z "${printed_pods[${pod_key}]}" ]]; then + echo "Pod ${pod_name} in namespace ${namespace} is ${phase}" + printed_pods[${pod_key}]=1 + fi + else + all_ready=false + if [[ "$verbose" == true ]]; then + echo "Waiting for pod ${pod_name} in namespace ${namespace} to be Running or Succeeded. Current state: ${phase}" + fi + fi + fi + done + + if [[ "${all_ready}" == true ]]; then + echo "All pods are either Running or Completed" + exit 0 + fi + + sleep 5 +done + +echo "Timeout waiting for all pods to be either Running or Completed" +exit 1 From d161007a3bb7956ba87c4e7ef7953640145175ed Mon Sep 17 00:00:00 2001 From: Krzysztof Romanowski Date: Wed, 12 Jun 2024 18:39:10 +0200 Subject: [PATCH 06/10] use ./tests/gh-actions/wait_for_pods_running_or_completed.sh --- .github/workflows/kserve_m2m_test.yaml | 2 +- .github/workflows/notebook_controller_m2m_test.yaml | 4 +++- tests/gh-actions/install_istio_with_ext_auth.sh | 4 +++- tests/gh-actions/install_knative-cni.sh | 4 +++- tests/gh-actions/install_knative.sh | 4 +++- tests/gh-actions/install_kserve.sh | 4 +++- tests/gh-actions/install_pipelines.sh | 5 ++++- 7 files changed, 20 insertions(+), 7 deletions(-) diff --git a/.github/workflows/kserve_m2m_test.yaml b/.github/workflows/kserve_m2m_test.yaml index a8ffb26ffc..ef6208de86 100644 --- a/.github/workflows/kserve_m2m_test.yaml +++ b/.github/workflows/kserve_m2m_test.yaml @@ -40,7 +40,7 @@ jobs: run: kustomize build common/kubeflow-namespace/base | kubectl apply -f - - name: Install Istio with ext auth - run: ./tests/gh-actions/install_istio_with_ext_auth.sh* + run: ./tests/gh-actions/install_istio_with_ext_auth.sh - name: Install cert-manager run: ./tests/gh-actions/install_cert_manager.sh diff --git a/.github/workflows/notebook_controller_m2m_test.yaml b/.github/workflows/notebook_controller_m2m_test.yaml index d63954d114..2d7a3611f4 100644 --- a/.github/workflows/notebook_controller_m2m_test.yaml +++ b/.github/workflows/notebook_controller_m2m_test.yaml @@ -48,7 +48,9 @@ jobs: run: | kustomize build apps/jupyter/jupyter-web-app/upstream/overlays/istio/ | kubectl apply -f - kustomize build apps/jupyter/notebook-controller/upstream/overlays/kubeflow/ | kubectl apply -f - - kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 300s + + # kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 300s + ./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces - name: Create KF Profile run: kustomize build common/user-namespace/base | kubectl apply -f - diff --git a/tests/gh-actions/install_istio_with_ext_auth.sh b/tests/gh-actions/install_istio_with_ext_auth.sh index eb65221d6d..827ac49b91 100755 --- a/tests/gh-actions/install_istio_with_ext_auth.sh +++ b/tests/gh-actions/install_istio_with_ext_auth.sh @@ -8,7 +8,9 @@ kustomize build istio-install/overlays/oauth2-proxy | kubectl apply -f - cd - echo "Waiting for all Istio Pods to become ready..." -kubectl wait --for=condition=Ready pods --all -n istio-system --timeout 300s + +# kubectl wait --for=condition=Ready pods --all -n istio-system --timeout 300s +./tests/gh-actions/wait_for_pods_running_or_completed.sh --namespace istio-system echo "Installing oauth2-proxy..." cd common/oidc-client diff --git a/tests/gh-actions/install_knative-cni.sh b/tests/gh-actions/install_knative-cni.sh index 68c243015d..37c53f4cda 100755 --- a/tests/gh-actions/install_knative-cni.sh +++ b/tests/gh-actions/install_knative-cni.sh @@ -9,5 +9,7 @@ kustomize build common/knative/knative-serving/base | kubectl apply -f - kustomize build common/istio-cni-1-21/cluster-local-gateway/base | kubectl apply -f - kustomize build common/istio-cni-1-21/kubeflow-istio-resources/base | kubectl apply -f - -kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s +# kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s +./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces + kubectl patch cm config-domain --patch '{"data":{"example.com":""}}' -n knative-serving diff --git a/tests/gh-actions/install_knative.sh b/tests/gh-actions/install_knative.sh index e224c6bbc9..dde27cc45a 100755 --- a/tests/gh-actions/install_knative.sh +++ b/tests/gh-actions/install_knative.sh @@ -9,5 +9,7 @@ kustomize build common/knative/knative-serving/base | kubectl apply -f - kustomize build common/istio-1-21/cluster-local-gateway/base | kubectl apply -f - kustomize build common/istio-1-21/kubeflow-istio-resources/base | kubectl apply -f - -kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s +# kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s +./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces + kubectl patch cm config-domain --patch '{"data":{"example.com":""}}' -n knative-serving diff --git a/tests/gh-actions/install_kserve.sh b/tests/gh-actions/install_kserve.sh index 075f6d1bb0..156db4846e 100755 --- a/tests/gh-actions/install_kserve.sh +++ b/tests/gh-actions/install_kserve.sh @@ -11,4 +11,6 @@ echo "Waiting for crd/clusterservingruntimes.serving.kserve.io to be available . kubectl wait --for condition=established --timeout=30s crd/clusterservingruntimes.serving.kserve.io kustomize build kserve | kubectl apply -f - kustomize build models-web-app/overlays/kubeflow | kubectl apply -f - -kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s + +# kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s +./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces diff --git a/tests/gh-actions/install_pipelines.sh b/tests/gh-actions/install_pipelines.sh index 9af8417223..df3373660c 100755 --- a/tests/gh-actions/install_pipelines.sh +++ b/tests/gh-actions/install_pipelines.sh @@ -7,5 +7,8 @@ echo "Waiting for crd/compositecontrollers.metacontroller.k8s.io to be available kubectl wait --for condition=established --timeout=30s crd/compositecontrollers.metacontroller.k8s.io kustomize build env/cert-manager/platform-agnostic-multi-user | kubectl apply -f - sleep 60 -kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s + +# kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s +./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces + cd - From 05358ed111d01aec8859e24a814692cf460f2b1d Mon Sep 17 00:00:00 2001 From: Krzysztof Romanowski Date: Wed, 12 Jun 2024 22:28:54 +0200 Subject: [PATCH 07/10] fix pwd for running ./tests/gh-actions/wait_for_pods_running_or_completed.sh --- tests/gh-actions/install_kserve.sh | 1 + tests/gh-actions/install_pipelines.sh | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/gh-actions/install_kserve.sh b/tests/gh-actions/install_kserve.sh index 156db4846e..6dd6578435 100755 --- a/tests/gh-actions/install_kserve.sh +++ b/tests/gh-actions/install_kserve.sh @@ -13,4 +13,5 @@ kustomize build kserve | kubectl apply -f - kustomize build models-web-app/overlays/kubeflow | kubectl apply -f - # kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s +cd - ./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces diff --git a/tests/gh-actions/install_pipelines.sh b/tests/gh-actions/install_pipelines.sh index df3373660c..668aa776e5 100755 --- a/tests/gh-actions/install_pipelines.sh +++ b/tests/gh-actions/install_pipelines.sh @@ -9,6 +9,5 @@ kustomize build env/cert-manager/platform-agnostic-multi-user | kubectl apply -f sleep 60 # kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s -./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces - cd - +./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces From 540edec25a2891ed8b9aa1e3c12b178b06e4eccd Mon Sep 17 00:00:00 2001 From: Krzysztof Romanowski Date: Thu, 13 Jun 2024 06:47:16 +0000 Subject: [PATCH 08/10] try with --field-selector=status.phase!=Succeeded instead of ./tests/gh-actions/wait_for_pods_running_or_completed.sh --- .github/workflows/notebook_controller_m2m_test.yaml | 4 +++- tests/gh-actions/install_istio_with_ext_auth.sh | 4 +++- tests/gh-actions/install_knative-cni.sh | 4 +++- tests/gh-actions/install_knative.sh | 4 +++- tests/gh-actions/install_kserve.sh | 4 +++- tests/gh-actions/install_pipelines.sh | 4 +++- 6 files changed, 18 insertions(+), 6 deletions(-) diff --git a/.github/workflows/notebook_controller_m2m_test.yaml b/.github/workflows/notebook_controller_m2m_test.yaml index 2d7a3611f4..632d73c99f 100644 --- a/.github/workflows/notebook_controller_m2m_test.yaml +++ b/.github/workflows/notebook_controller_m2m_test.yaml @@ -50,7 +50,9 @@ jobs: kustomize build apps/jupyter/notebook-controller/upstream/overlays/kubeflow/ | kubectl apply -f - # kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 300s - ./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces + # ./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces + kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=300s \ + --field-selector=status.phase!=Succeeded - name: Create KF Profile run: kustomize build common/user-namespace/base | kubectl apply -f - diff --git a/tests/gh-actions/install_istio_with_ext_auth.sh b/tests/gh-actions/install_istio_with_ext_auth.sh index 827ac49b91..6dd8ac5d81 100755 --- a/tests/gh-actions/install_istio_with_ext_auth.sh +++ b/tests/gh-actions/install_istio_with_ext_auth.sh @@ -10,7 +10,9 @@ cd - echo "Waiting for all Istio Pods to become ready..." # kubectl wait --for=condition=Ready pods --all -n istio-system --timeout 300s -./tests/gh-actions/wait_for_pods_running_or_completed.sh --namespace istio-system +# ./tests/gh-actions/wait_for_pods_running_or_completed.sh --namespace istio-system +kubectl wait --for=condition=Ready pods --all -n istio-system --timeout=300s \ + --field-selector=status.phase!=Succeeded echo "Installing oauth2-proxy..." cd common/oidc-client diff --git a/tests/gh-actions/install_knative-cni.sh b/tests/gh-actions/install_knative-cni.sh index 37c53f4cda..78337aba5a 100755 --- a/tests/gh-actions/install_knative-cni.sh +++ b/tests/gh-actions/install_knative-cni.sh @@ -10,6 +10,8 @@ kustomize build common/istio-cni-1-21/cluster-local-gateway/base | kubectl apply kustomize build common/istio-cni-1-21/kubeflow-istio-resources/base | kubectl apply -f - # kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s -./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces +# ./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces +kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=600s \ + --field-selector=status.phase!=Succeeded kubectl patch cm config-domain --patch '{"data":{"example.com":""}}' -n knative-serving diff --git a/tests/gh-actions/install_knative.sh b/tests/gh-actions/install_knative.sh index dde27cc45a..a58745bd60 100755 --- a/tests/gh-actions/install_knative.sh +++ b/tests/gh-actions/install_knative.sh @@ -10,6 +10,8 @@ kustomize build common/istio-1-21/cluster-local-gateway/base | kubectl apply -f kustomize build common/istio-1-21/kubeflow-istio-resources/base | kubectl apply -f - # kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s -./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces +# ./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces +kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=600s \ + --field-selector=status.phase!=Succeeded kubectl patch cm config-domain --patch '{"data":{"example.com":""}}' -n knative-serving diff --git a/tests/gh-actions/install_kserve.sh b/tests/gh-actions/install_kserve.sh index 6dd6578435..6892a53750 100755 --- a/tests/gh-actions/install_kserve.sh +++ b/tests/gh-actions/install_kserve.sh @@ -14,4 +14,6 @@ kustomize build models-web-app/overlays/kubeflow | kubectl apply -f - # kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s cd - -./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces +# ./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces +kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=600s \ + --field-selector=status.phase!=Succeeded diff --git a/tests/gh-actions/install_pipelines.sh b/tests/gh-actions/install_pipelines.sh index 668aa776e5..f0283fe2c3 100755 --- a/tests/gh-actions/install_pipelines.sh +++ b/tests/gh-actions/install_pipelines.sh @@ -10,4 +10,6 @@ sleep 60 # kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s cd - -./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces +# ./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces +kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=600s \ + --field-selector=status.phase!=Succeeded From 18a7f7168b5c967a82e742ae3606f7cabcbc1bd6 Mon Sep 17 00:00:00 2001 From: Krzysztof Romanowski Date: Thu, 13 Jun 2024 09:26:30 +0200 Subject: [PATCH 09/10] cleanup after tests/gh-actions/wait_for_pods_running_or_completed.sh --- .../notebook_controller_m2m_test.yaml | 3 - .../gh-actions/install_istio_with_ext_auth.sh | 3 - tests/gh-actions/install_knative-cni.sh | 3 - tests/gh-actions/install_knative.sh | 3 - tests/gh-actions/install_kserve.sh | 4 - tests/gh-actions/install_pipelines.sh | 5 +- .../wait_for_pods_running_or_completed.sh | 135 ------------------ 7 files changed, 1 insertion(+), 155 deletions(-) delete mode 100755 tests/gh-actions/wait_for_pods_running_or_completed.sh diff --git a/.github/workflows/notebook_controller_m2m_test.yaml b/.github/workflows/notebook_controller_m2m_test.yaml index 632d73c99f..2b38353b23 100644 --- a/.github/workflows/notebook_controller_m2m_test.yaml +++ b/.github/workflows/notebook_controller_m2m_test.yaml @@ -48,9 +48,6 @@ jobs: run: | kustomize build apps/jupyter/jupyter-web-app/upstream/overlays/istio/ | kubectl apply -f - kustomize build apps/jupyter/notebook-controller/upstream/overlays/kubeflow/ | kubectl apply -f - - - # kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 300s - # ./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=300s \ --field-selector=status.phase!=Succeeded diff --git a/tests/gh-actions/install_istio_with_ext_auth.sh b/tests/gh-actions/install_istio_with_ext_auth.sh index 6dd8ac5d81..369bac42eb 100755 --- a/tests/gh-actions/install_istio_with_ext_auth.sh +++ b/tests/gh-actions/install_istio_with_ext_auth.sh @@ -8,9 +8,6 @@ kustomize build istio-install/overlays/oauth2-proxy | kubectl apply -f - cd - echo "Waiting for all Istio Pods to become ready..." - -# kubectl wait --for=condition=Ready pods --all -n istio-system --timeout 300s -# ./tests/gh-actions/wait_for_pods_running_or_completed.sh --namespace istio-system kubectl wait --for=condition=Ready pods --all -n istio-system --timeout=300s \ --field-selector=status.phase!=Succeeded diff --git a/tests/gh-actions/install_knative-cni.sh b/tests/gh-actions/install_knative-cni.sh index 78337aba5a..0aeb0b55ca 100755 --- a/tests/gh-actions/install_knative-cni.sh +++ b/tests/gh-actions/install_knative-cni.sh @@ -9,9 +9,6 @@ kustomize build common/knative/knative-serving/base | kubectl apply -f - kustomize build common/istio-cni-1-21/cluster-local-gateway/base | kubectl apply -f - kustomize build common/istio-cni-1-21/kubeflow-istio-resources/base | kubectl apply -f - -# kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s -# ./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=600s \ --field-selector=status.phase!=Succeeded - kubectl patch cm config-domain --patch '{"data":{"example.com":""}}' -n knative-serving diff --git a/tests/gh-actions/install_knative.sh b/tests/gh-actions/install_knative.sh index a58745bd60..7b4e0aa49c 100755 --- a/tests/gh-actions/install_knative.sh +++ b/tests/gh-actions/install_knative.sh @@ -9,9 +9,6 @@ kustomize build common/knative/knative-serving/base | kubectl apply -f - kustomize build common/istio-1-21/cluster-local-gateway/base | kubectl apply -f - kustomize build common/istio-1-21/kubeflow-istio-resources/base | kubectl apply -f - -# kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s -# ./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=600s \ --field-selector=status.phase!=Succeeded - kubectl patch cm config-domain --patch '{"data":{"example.com":""}}' -n knative-serving diff --git a/tests/gh-actions/install_kserve.sh b/tests/gh-actions/install_kserve.sh index 6892a53750..2230169f9b 100755 --- a/tests/gh-actions/install_kserve.sh +++ b/tests/gh-actions/install_kserve.sh @@ -11,9 +11,5 @@ echo "Waiting for crd/clusterservingruntimes.serving.kserve.io to be available . kubectl wait --for condition=established --timeout=30s crd/clusterservingruntimes.serving.kserve.io kustomize build kserve | kubectl apply -f - kustomize build models-web-app/overlays/kubeflow | kubectl apply -f - - -# kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s -cd - -# ./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=600s \ --field-selector=status.phase!=Succeeded diff --git a/tests/gh-actions/install_pipelines.sh b/tests/gh-actions/install_pipelines.sh index f0283fe2c3..b669445fc1 100755 --- a/tests/gh-actions/install_pipelines.sh +++ b/tests/gh-actions/install_pipelines.sh @@ -7,9 +7,6 @@ echo "Waiting for crd/compositecontrollers.metacontroller.k8s.io to be available kubectl wait --for condition=established --timeout=30s crd/compositecontrollers.metacontroller.k8s.io kustomize build env/cert-manager/platform-agnostic-multi-user | kubectl apply -f - sleep 60 - -# kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s -cd - -# ./tests/gh-actions/wait_for_pods_running_or_completed.sh --all-namespaces kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=600s \ --field-selector=status.phase!=Succeeded +cd - diff --git a/tests/gh-actions/wait_for_pods_running_or_completed.sh b/tests/gh-actions/wait_for_pods_running_or_completed.sh deleted file mode 100755 index 34a924fd60..0000000000 --- a/tests/gh-actions/wait_for_pods_running_or_completed.sh +++ /dev/null @@ -1,135 +0,0 @@ -#!/bin/bash - -# Function to display help -display_help() { - echo "Usage: $0 [--timeout ] [--namespace ] [--all-namespaces] [--verbose]" - echo - echo " --timeout Set the timeout period in seconds (default is 300 seconds)" - echo " --namespace Specify the namespace to check for pods" - echo " --all-namespaces Check for pods in all namespaces" - echo " --verbose Enable verbose mode to list pods and their states" - echo " --help Display this help message and exit" - exit 0 -} - -# Default timeout and verbose mode -timeout=300 -verbose=false -namespace="" -all_namespaces=false - -# Parse arguments -while [[ "$#" -gt 0 ]]; do - case $1 in - --timeout) - timeout="$2" - shift - ;; - --namespace) - namespace="$2" - shift - ;; - --all-namespaces) - all_namespaces=true - ;; - --verbose) - verbose=true - ;; - --help) - display_help - ;; - *) - echo "Unknown parameter passed: $1" - display_help - exit 1 - ;; - esac - shift -done - -# Check for conflicting arguments -if [[ -n "$namespace" && "$all_namespaces" == true ]]; then - echo "Error: --namespace and --all-namespaces cannot be used together." - display_help - exit 1 -fi - -# Check if neither --namespace nor --all-namespaces is provided -if [[ -z "$namespace" && "$all_namespaces" != true ]]; then - echo "Error: You must specify either --namespace or --all-namespaces." - display_help - exit 1 -fi - -declare -A printed_pods -declare -A pod_list - -# Fetch all pods only once -if [[ "$all_namespaces" == true ]]; then - all_pods=$(kubectl get pods --all-namespaces -o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{" "}{.metadata.ownerReferences[?(@.kind=="Job")].kind}{"\n"}{end}') -else - all_pods=$(kubectl get pods -n ${namespace} -o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{" "}{.metadata.ownerReferences[?(@.kind=="Job")].kind}{"\n"}{end}') -fi - -# Store the pod list -while IFS= read -r pod; do - namespace=$(echo ${pod} | awk '{print $1}') - pod_name=$(echo ${pod} | awk '{print $2}') - owner_kind=$(echo ${pod} | awk '{print $3}') - pod_key="${namespace}/${pod_name}" - pod_list[${pod_key}]="${owner_kind}" -done <<< "${all_pods}" - -end=$((SECONDS+${timeout})) - -while (( SECONDS < end )); do - all_ready=true - - if [[ "$verbose" == true ]]; then - echo "Checking pod states..." - fi - - for pod_key in "${!pod_list[@]}"; do - namespace=$(echo ${pod_key} | cut -d '/' -f 1) - pod_name=$(echo ${pod_key} | cut -d '/' -f 2) - owner_kind=${pod_list[${pod_key}]} - - phase=$(kubectl get pod ${pod_name} -n ${namespace} -o jsonpath='{.status.phase}') - - if [[ "${owner_kind}" == "Job" ]]; then - if [[ "${phase}" == "Succeeded" ]]; then - if [[ -z "${printed_pods[${pod_key}]}" ]]; then - echo "Pod ${pod_name} in namespace ${namespace} created by Job is ${phase}" - printed_pods[${pod_key}]=1 - fi - else - all_ready=false - if [[ "$verbose" == true ]]; then - echo "Waiting for pod ${pod_name} in namespace ${namespace} created by Job to succeed. Current state: ${phase}" - fi - fi - else - if [[ "${phase}" == "Running" || "${phase}" == "Succeeded" ]]; then - if [[ -z "${printed_pods[${pod_key}]}" ]]; then - echo "Pod ${pod_name} in namespace ${namespace} is ${phase}" - printed_pods[${pod_key}]=1 - fi - else - all_ready=false - if [[ "$verbose" == true ]]; then - echo "Waiting for pod ${pod_name} in namespace ${namespace} to be Running or Succeeded. Current state: ${phase}" - fi - fi - fi - done - - if [[ "${all_ready}" == true ]]; then - echo "All pods are either Running or Completed" - exit 0 - fi - - sleep 5 -done - -echo "Timeout waiting for all pods to be either Running or Completed" -exit 1 From da7a282e30d0ce50b11c24f40609baae128b8709 Mon Sep 17 00:00:00 2001 From: Krzysztof Romanowski Date: Thu, 13 Jun 2024 10:00:28 +0200 Subject: [PATCH 10/10] stop using self-hosted runners and drop dockerconfig --- .github/workflows/kserve_m2m_test.yaml | 3 +-- .github/workflows/notebook_controller_m2m_test.yaml | 3 +-- .github/workflows/pipeline_test.yaml | 3 +-- tests/dockerconfig.kromanow94.tmp.json | 7 ------- tests/gh-actions/kind-cluster.yaml | 11 +---------- 5 files changed, 4 insertions(+), 23 deletions(-) delete mode 100644 tests/dockerconfig.kromanow94.tmp.json diff --git a/.github/workflows/kserve_m2m_test.yaml b/.github/workflows/kserve_m2m_test.yaml index ef6208de86..f1b484af4f 100644 --- a/.github/workflows/kserve_m2m_test.yaml +++ b/.github/workflows/kserve_m2m_test.yaml @@ -18,8 +18,7 @@ on: jobs: build: - # runs-on: ubuntu-latest - runs-on: self-hosted + runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 diff --git a/.github/workflows/notebook_controller_m2m_test.yaml b/.github/workflows/notebook_controller_m2m_test.yaml index 2b38353b23..bfb055aac7 100644 --- a/.github/workflows/notebook_controller_m2m_test.yaml +++ b/.github/workflows/notebook_controller_m2m_test.yaml @@ -14,8 +14,7 @@ on: jobs: build: - # runs-on: ubuntu-latest - runs-on: self-hosted + runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 diff --git a/.github/workflows/pipeline_test.yaml b/.github/workflows/pipeline_test.yaml index 5dd525a0d1..bddaaca0d8 100644 --- a/.github/workflows/pipeline_test.yaml +++ b/.github/workflows/pipeline_test.yaml @@ -16,8 +16,7 @@ on: jobs: build: - # runs-on: ubuntu-latest - runs-on: self-hosted + runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 diff --git a/tests/dockerconfig.kromanow94.tmp.json b/tests/dockerconfig.kromanow94.tmp.json deleted file mode 100644 index fc6307e6ab..0000000000 --- a/tests/dockerconfig.kromanow94.tmp.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "auths": { - "https://index.docker.io/v1/": { - "auth": "a3JvbWFub3c5NDpkY2tyX3BhdF9yWHdaUUFpZ3o3SGhJVDVGaTl2eV9YbWxJdG8=" - } - } -} diff --git a/tests/gh-actions/kind-cluster.yaml b/tests/gh-actions/kind-cluster.yaml index 7df3354aa8..83dd8b3325 100644 --- a/tests/gh-actions/kind-cluster.yaml +++ b/tests/gh-actions/kind-cluster.yaml @@ -20,16 +20,7 @@ kubeadmConfigPatches: nodes: - role: control-plane image: kindest/node:v1.29.4@sha256:3abb816a5b1061fb15c6e9e60856ec40d56b7b52bcea5f5f1350bc6e2320b6f8 - extraMounts: - - containerPath: /var/lib/kubelet/config.json - hostPath: /home/kromanow94/work/actions-runner/_work/kubeflow-manifests/kubeflow-manifests/tests/dockerconfig.kromanow94.tmp.json - role: worker image: kindest/node:v1.29.4@sha256:3abb816a5b1061fb15c6e9e60856ec40d56b7b52bcea5f5f1350bc6e2320b6f8 - extraMounts: - - containerPath: /var/lib/kubelet/config.json - hostPath: /home/kromanow94/work/actions-runner/_work/kubeflow-manifests/kubeflow-manifests/tests/dockerconfig.kromanow94.tmp.json - role: worker - image: kindest/node:v1.29.4@sha256:3abb816a5b1061fb15c6e9e60856ec40d56b7b52bcea5f5f1350bc6e2320b6f8 - extraMounts: - - containerPath: /var/lib/kubelet/config.json - hostPath: /home/kromanow94/work/actions-runner/_work/kubeflow-manifests/kubeflow-manifests/tests/dockerconfig.kromanow94.tmp.json + image: kindest/node:v1.29.4@sha256:3abb816a5b1061fb15c6e9e60856ec40d56b7b52bcea5f5f1350bc6e2320b6f8 \ No newline at end of file