Skip to content

Commit

Permalink
ci: add e2e workflow for k8s 1.20, 1.24 and the latest
Browse files Browse the repository at this point in the history
Signed-off-by: saintube <saintube@foxmail.com>
  • Loading branch information
saintube committed Aug 26, 2023
1 parent 04c5ba8 commit 6eedbd3
Show file tree
Hide file tree
Showing 12 changed files with 382 additions and 26 deletions.
102 changes: 102 additions & 0 deletions .github/workflows/e2e-k8s-1.22.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
name: E2E-K8S-1.22

on:
push:
branches:
- master
- release-*
pull_request: {}
workflow_dispatch: {}

env:
# Common versions
GO_VERSION: '1.19'
KIND_ACTION_VERSION: 'v1.5.0'
KIND_VERSION: 'v0.20.0'
KIND_IMAGE: 'kindest/node:v1.22.17'
KIND_CLUSTER_NAME: 'ci-testing'
COMPONENT_NS: "koordinator-system"

jobs:

slo-controller:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
with:
submodules: true
- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: ${{ env.GO_VERSION }}
- name: Setup Kind Cluster
uses: helm/kind-action@v1.5.0
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf.yaml
version: ${{ env.KIND_VERSION }}
- name: Build image
run: |
export IMAGE="koordinator-sh/koord-manager:e2e-${GITHUB_RUN_ID}"
docker build --pull --no-cache . -t $IMAGE -f docker/koord-manager.dockerfile
kind load docker-image --name=${KIND_CLUSTER_NAME} $IMAGE || { echo >&2 "kind not installed or error loading image: $IMAGE"; exit 1; }
- name: Check host environment
run: |
set -ex
kubectl version --short
kubectl get pods -A
kubectl get nodes -o yaml
tree -L 2 /sys/
tree -L 2 /sys/fs/cgroup
cat /proc/cpuinfo
- name: Install Koordinator
run: |
set -ex
kubectl cluster-info
IMG=koordinator-sh/koord-manager:e2e-${GITHUB_RUN_ID} ./hack/deploy_kind.sh
NODES=$(kubectl get node | wc -l)
for ((i=1;i<10;i++));
do
set +e
PODS=$(kubectl get pod -n ${COMPONENT_NS} | grep "koord-manager\|koordlet" | grep '1/1' | wc -l)
set -e
if [ "$PODS" -ge "$NODES" ]; then
break
fi
sleep 6
done
set +e
PODS=$(kubectl get pod -n ${COMPONENT_NS} | grep "koord-manager\|koordlet" | grep '1/1' | wc -l)
kubectl get pod -A
kubectl get node -o yaml
kubectl get all -n ${COMPONENT_NS} -o wide
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koord-manager | head -n 1 | awk '{print $1}' | xargs kubectl logs -n ${COMPONENT_NS} --tail=100
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koord-scheduler | awk '{print $1}' | xargs kubectl logs -n ${COMPONENT_NS} --tail=100
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koordlet | head -n 1 | awk '{print $1}' | xargs -L 1 kubectl logs -n ${COMPONENT_NS}
kubectl get pod -n ${COMPONENT_NS} -o wide
set -e
if [ "$PODS" -ge "$NODES" ]; then
echo "Wait for koord-manager and koordlet ready successfully"
else
echo "Timeout to wait for koord-manager and koordlet ready"
exit 1
fi
- name: Run E2E Tests
run: |
export KUBECONFIG=/home/runner/.kube/config
make ginkgo
set +e
EXTRA_ARGS="-koordinator-component-namespace=${COMPONENT_NS} -allowed-not-ready-nodes=1 -system-pods-startup-timeout=10s"
./bin/ginkgo -timeout 60m -v --focus='slo-controller' test/e2e -- ${EXTRA_ARGS}
retVal=$?
restartCount=$(kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers | head -n 1 | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "koord-manager has not restarted"
else
kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers
echo "koord-manager has restarted, abort!!!"
kubectl get pod -n ${COMPONENT_NS} --no-headers -l koord-app=koord-manager | head -n 1 | awk '{print $1}' | xargs kubectl logs -p -n ${COMPONENT_NS}
exit 1
fi
exit $retVal
102 changes: 102 additions & 0 deletions .github/workflows/e2e-k8s-1.24.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
name: E2E-K8S-1.24

on:
push:
branches:
- master
- release-*
pull_request: {}
workflow_dispatch: {}

env:
# Common versions
GO_VERSION: '1.19'
KIND_ACTION_VERSION: 'v1.5.0'
KIND_VERSION: 'v0.20.0'
KIND_IMAGE: 'kindest/node:v1.24.15'
KIND_CLUSTER_NAME: 'ci-testing'
COMPONENT_NS: "koordinator-system"

jobs:

slo-controller:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
with:
submodules: true
- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: ${{ env.GO_VERSION }}
- name: Setup Kind Cluster
uses: helm/kind-action@v1.5.0
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf.yaml
version: ${{ env.KIND_VERSION }}
- name: Build image
run: |
export IMAGE="koordinator-sh/koord-manager:e2e-${GITHUB_RUN_ID}"
docker build --pull --no-cache . -t $IMAGE -f docker/koord-manager.dockerfile
kind load docker-image --name=${KIND_CLUSTER_NAME} $IMAGE || { echo >&2 "kind not installed or error loading image: $IMAGE"; exit 1; }
- name: Check host environment
run: |
set -ex
kubectl version --short
kubectl get pods -A
kubectl get nodes -o yaml
tree -L 2 /sys/
tree -L 2 /sys/fs/cgroup
cat /proc/cpuinfo
- name: Install Koordinator
run: |
set -ex
kubectl cluster-info
IMG=koordinator-sh/koord-manager:e2e-${GITHUB_RUN_ID} ./hack/deploy_kind.sh
NODES=$(kubectl get node | wc -l)
for ((i=1;i<10;i++));
do
set +e
PODS=$(kubectl get pod -n ${COMPONENT_NS} | grep "koord-manager\|koordlet" | grep '1/1' | wc -l)
set -e
if [ "$PODS" -ge "$NODES" ]; then
break
fi
sleep 6
done
set +e
PODS=$(kubectl get pod -n ${COMPONENT_NS} | grep "koord-manager\|koordlet" | grep '1/1' | wc -l)
kubectl get pod -A
kubectl get node -o yaml
kubectl get all -n ${COMPONENT_NS} -o wide
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koord-manager | head -n 1 | awk '{print $1}' | xargs kubectl logs -n ${COMPONENT_NS} --tail=100
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koord-scheduler | awk '{print $1}' | xargs kubectl logs -n ${COMPONENT_NS} --tail=100
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koordlet | head -n 1 | awk '{print $1}' | xargs -L 1 kubectl logs -n ${COMPONENT_NS}
kubectl get pod -n ${COMPONENT_NS} -o wide
set -e
if [ "$PODS" -ge "$NODES" ]; then
echo "Wait for koord-manager and koordlet ready successfully"
else
echo "Timeout to wait for koord-manager and koordlet ready"
exit 1
fi
- name: Run E2E Tests
run: |
export KUBECONFIG=/home/runner/.kube/config
make ginkgo
set +e
EXTRA_ARGS="-koordinator-component-namespace=${COMPONENT_NS} -allowed-not-ready-nodes=1 -system-pods-startup-timeout=10s"
./bin/ginkgo -timeout 60m -v --focus='slo-controller' test/e2e -- ${EXTRA_ARGS}
retVal=$?
restartCount=$(kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers | head -n 1 | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "koord-manager has not restarted"
else
kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers
echo "koord-manager has restarted, abort!!!"
kubectl get pod -n ${COMPONENT_NS} --no-headers -l koord-app=koord-manager | head -n 1 | awk '{print $1}' | xargs kubectl logs -p -n ${COMPONENT_NS}
exit 1
fi
exit $retVal
100 changes: 100 additions & 0 deletions .github/workflows/e2e-k8s-latest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
name: E2E-K8S-latest

on:
push:
branches:
- master
- release-*
pull_request: {}
workflow_dispatch: {}

env:
# Common versions
GO_VERSION: '1.19'
KIND_ACTION_VERSION: 'v1.5.0'
KIND_VERSION: 'v0.20.0'
KIND_CLUSTER_NAME: 'ci-testing'
COMPONENT_NS: "koordinator-system"

jobs:

slo-controller:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
with:
submodules: true
- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: ${{ env.GO_VERSION }}
- name: Setup Kind Cluster
uses: helm/kind-action@v1.5.0
with:
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf.yaml
version: ${{ env.KIND_VERSION }}
- name: Build image
run: |
export IMAGE="koordinator-sh/koord-manager:e2e-${GITHUB_RUN_ID}"
docker build --pull --no-cache . -t $IMAGE -f docker/koord-manager.dockerfile
kind load docker-image --name=${KIND_CLUSTER_NAME} $IMAGE || { echo >&2 "kind not installed or error loading image: $IMAGE"; exit 1; }
- name: Check host environment
run: |
set -ex
kubectl version --short
kubectl get pods -A
kubectl get nodes -o yaml
tree -L 2 /sys/
tree -L 2 /sys/fs/cgroup
cat /proc/cpuinfo
- name: Install Koordinator
run: |
set -ex
kubectl cluster-info
IMG=koordinator-sh/koord-manager:e2e-${GITHUB_RUN_ID} ./hack/deploy_kind.sh
NODES=$(kubectl get node | wc -l)
for ((i=1;i<10;i++));
do
set +e
PODS=$(kubectl get pod -n ${COMPONENT_NS} | grep "koord-manager\|koordlet" | grep '1/1' | wc -l)
set -e
if [ "$PODS" -ge "$NODES" ]; then
break
fi
sleep 6
done
set +e
PODS=$(kubectl get pod -n ${COMPONENT_NS} | grep "koord-manager\|koordlet" | grep '1/1' | wc -l)
kubectl get pod -A
kubectl get node -o yaml
kubectl get all -n ${COMPONENT_NS} -o wide
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koord-manager | head -n 1 | awk '{print $1}' | xargs kubectl logs -n ${COMPONENT_NS} --tail=100
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koord-scheduler | awk '{print $1}' | xargs kubectl logs -n ${COMPONENT_NS} --tail=100
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koordlet | head -n 1 | awk '{print $1}' | xargs -L 1 kubectl logs -n ${COMPONENT_NS}
kubectl get pod -n ${COMPONENT_NS} -o wide
set -e
if [ "$PODS" -ge "$NODES" ]; then
echo "Wait for koord-manager and koordlet ready successfully"
else
echo "Timeout to wait for koord-manager and koordlet ready"
exit 1
fi
- name: Run E2E Tests
run: |
export KUBECONFIG=/home/runner/.kube/config
make ginkgo
set +e
EXTRA_ARGS="-koordinator-component-namespace=${COMPONENT_NS} -allowed-not-ready-nodes=1 -system-pods-startup-timeout=10s"
./bin/ginkgo -timeout 60m -v --focus='slo-controller' test/e2e -- ${EXTRA_ARGS}
retVal=$?
restartCount=$(kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers | head -n 1 | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "koord-manager has not restarted"
else
kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers
echo "koord-manager has restarted, abort!!!"
kubectl get pod -n ${COMPONENT_NS} --no-headers -l koord-app=koord-manager | head -n 1 | awk '{print $1}' | xargs kubectl logs -p -n ${COMPONENT_NS}
exit 1
fi
exit $retVal
15 changes: 15 additions & 0 deletions config/manager/descheduler-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,18 @@ data:
- koordinator-system
evictQPS: "10"
evictBurst: 1
- name: LowNodeLoad
args:
apiVersion: descheduler/v1alpha2
kind: LowNodeLoadArgs
evictableNamespaces:
exclude:
- kube-system
- koordinator-system
useDeviationThresholds: false
lowThresholds:
cpu: 45
memory: 55
highThresholds:
cpu: 75
memory: 80
12 changes: 9 additions & 3 deletions config/manager/koordlet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@ metadata:
namespace: system
spec:
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 0
maxUnavailable: 20%
type: RollingUpdate
selector:
matchLabels:
koord-app: koordlet
Expand Down Expand Up @@ -71,6 +70,9 @@ spec:
- mountPath: /host-var-run-koordlet/
mountPropagation: Bidirectional
name: host-var-run-koordlet
- mountPath: /prediction-checkpoints
name: host-koordlet-checkpoint-dir
mountPropagation: Bidirectional
- mountPath: /host-sys/
name: host-sys
- mountPath: /etc/kubernetes/
Expand All @@ -82,7 +84,7 @@ spec:
- mountPath: /var/lib/kubelet
name: host-kubelet-rootdir
readOnly: true
- mountPaht: /dev
- mountPath: /dev
name: host-dev
mountPropagation: HostToContainer
- mountPath: /metric-data/
Expand Down Expand Up @@ -119,6 +121,10 @@ spec:
path: /var/run/koordlet/
type: DirectoryOrCreate
name: host-var-run-koordlet
- hostPath:
path: /var/run/koordlet/prediction-checkpoints
type: DirectoryOrCreate
name: host-koordlet-checkpoint-dir
- hostPath:
path: /sys/
type: ""
Expand Down
Loading

0 comments on commit 6eedbd3

Please sign in to comment.