Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: initialize e2e workflow #1573

Merged
merged 1 commit into from
Aug 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions .github/workflows/e2e-k8s-1.22.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
name: E2E-K8S-1.22

on:
push:
branches:
- master
- release-*
pull_request: {}
workflow_dispatch: {}

env:
# Common versions
GO_VERSION: '1.19'
KIND_ACTION_VERSION: 'v1.5.0'
KIND_VERSION: 'v0.20.0'
KIND_IMAGE: 'kindest/node:v1.22.17'
KIND_CLUSTER_NAME: 'ci-testing'
COMPONENT_NS: "koordinator-system"

jobs:

slo-controller:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
with:
submodules: true
- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: ${{ env.GO_VERSION }}
- name: Setup Kind Cluster
uses: helm/kind-action@v1.5.0
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf.yaml
version: ${{ env.KIND_VERSION }}
- name: Build image
run: |
export IMAGE="koordinator-sh/koord-manager:e2e-${GITHUB_RUN_ID}"
docker build --pull --no-cache . -t $IMAGE -f docker/koord-manager.dockerfile
kind load docker-image --name=${KIND_CLUSTER_NAME} $IMAGE || { echo >&2 "kind not installed or error loading image: $IMAGE"; exit 1; }
- name: Check host environment
run: |
set -ex
kubectl version --short
kubectl get pods -A
kubectl get nodes -o yaml
tree -L 2 /sys/
tree -L 2 /sys/fs/cgroup
cat /proc/cpuinfo
- name: Install Koordinator
run: |
set -ex
kubectl cluster-info
IMG=koordinator-sh/koord-manager:e2e-${GITHUB_RUN_ID} ./hack/deploy_kind.sh
NODES=$(kubectl get node | wc -l)
for ((i=1;i<10;i++));
do
set +e
PODS=$(kubectl get pod -n ${COMPONENT_NS} | grep "koord-manager\|koordlet" | grep '1/1' | wc -l)
set -e
if [ "$PODS" -ge "$NODES" ]; then
break
fi
sleep 6
done
set +e
PODS=$(kubectl get pod -n ${COMPONENT_NS} | grep "koord-manager\|koordlet" | grep '1/1' | wc -l)
kubectl get pod -A
kubectl get node -o yaml
kubectl get all -n ${COMPONENT_NS} -o wide
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koord-manager | head -n 1 | awk '{print $1}' | xargs kubectl logs -n ${COMPONENT_NS} --tail=100
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koord-scheduler | awk '{print $1}' | xargs kubectl logs -n ${COMPONENT_NS} --tail=100
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koordlet | head -n 1 | awk '{print $1}' | xargs -L 1 kubectl logs -n ${COMPONENT_NS}
kubectl get pod -n ${COMPONENT_NS} -o wide
set -e
if [ "$PODS" -ge "$NODES" ]; then
echo "Wait for koord-manager and koordlet ready successfully"
else
echo "Timeout to wait for koord-manager and koordlet ready"
exit 1
fi
- name: Run E2E Tests
run: |
export KUBECONFIG=/home/runner/.kube/config
make ginkgo
set +e
EXTRA_ARGS="-koordinator-component-namespace=${COMPONENT_NS} -allowed-not-ready-nodes=1 -system-pods-startup-timeout=10s"
./bin/ginkgo -timeout 60m -v --focus='slo-controller' test/e2e -- ${EXTRA_ARGS}
retVal=$?
restartCount=$(kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers | head -n 1 | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "koord-manager has not restarted"
else
kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers
echo "koord-manager has restarted, abort!!!"
kubectl get pod -n ${COMPONENT_NS} --no-headers -l koord-app=koord-manager | head -n 1 | awk '{print $1}' | xargs kubectl logs -p -n ${COMPONENT_NS}
exit 1
fi
exit $retVal
102 changes: 102 additions & 0 deletions .github/workflows/e2e-k8s-1.24.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
name: E2E-K8S-1.24

on:
push:
branches:
- master
- release-*
pull_request: {}
workflow_dispatch: {}

env:
# Common versions
GO_VERSION: '1.19'
KIND_ACTION_VERSION: 'v1.5.0'
KIND_VERSION: 'v0.20.0'
KIND_IMAGE: 'kindest/node:v1.24.15'
KIND_CLUSTER_NAME: 'ci-testing'
COMPONENT_NS: "koordinator-system"

jobs:

slo-controller:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
with:
submodules: true
- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: ${{ env.GO_VERSION }}
- name: Setup Kind Cluster
uses: helm/kind-action@v1.5.0
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf.yaml
version: ${{ env.KIND_VERSION }}
- name: Build image
run: |
export IMAGE="koordinator-sh/koord-manager:e2e-${GITHUB_RUN_ID}"
docker build --pull --no-cache . -t $IMAGE -f docker/koord-manager.dockerfile
kind load docker-image --name=${KIND_CLUSTER_NAME} $IMAGE || { echo >&2 "kind not installed or error loading image: $IMAGE"; exit 1; }
- name: Check host environment
run: |
set -ex
kubectl version --short
kubectl get pods -A
kubectl get nodes -o yaml
tree -L 2 /sys/
tree -L 2 /sys/fs/cgroup
cat /proc/cpuinfo
- name: Install Koordinator
run: |
set -ex
kubectl cluster-info
IMG=koordinator-sh/koord-manager:e2e-${GITHUB_RUN_ID} ./hack/deploy_kind.sh
NODES=$(kubectl get node | wc -l)
for ((i=1;i<10;i++));
do
set +e
PODS=$(kubectl get pod -n ${COMPONENT_NS} | grep "koord-manager\|koordlet" | grep '1/1' | wc -l)
set -e
if [ "$PODS" -ge "$NODES" ]; then
break
fi
sleep 6
done
set +e
PODS=$(kubectl get pod -n ${COMPONENT_NS} | grep "koord-manager\|koordlet" | grep '1/1' | wc -l)
kubectl get pod -A
kubectl get node -o yaml
kubectl get all -n ${COMPONENT_NS} -o wide
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koord-manager | head -n 1 | awk '{print $1}' | xargs kubectl logs -n ${COMPONENT_NS} --tail=100
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koord-scheduler | awk '{print $1}' | xargs kubectl logs -n ${COMPONENT_NS} --tail=100
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koordlet | head -n 1 | awk '{print $1}' | xargs -L 1 kubectl logs -n ${COMPONENT_NS}
kubectl get pod -n ${COMPONENT_NS} -o wide
set -e
if [ "$PODS" -ge "$NODES" ]; then
echo "Wait for koord-manager and koordlet ready successfully"
else
echo "Timeout to wait for koord-manager and koordlet ready"
exit 1
fi
- name: Run E2E Tests
run: |
export KUBECONFIG=/home/runner/.kube/config
make ginkgo
set +e
EXTRA_ARGS="-koordinator-component-namespace=${COMPONENT_NS} -allowed-not-ready-nodes=1 -system-pods-startup-timeout=10s"
./bin/ginkgo -timeout 60m -v --focus='slo-controller' test/e2e -- ${EXTRA_ARGS}
retVal=$?
restartCount=$(kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers | head -n 1 | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "koord-manager has not restarted"
else
kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers
echo "koord-manager has restarted, abort!!!"
kubectl get pod -n ${COMPONENT_NS} --no-headers -l koord-app=koord-manager | head -n 1 | awk '{print $1}' | xargs kubectl logs -p -n ${COMPONENT_NS}
exit 1
fi
exit $retVal
100 changes: 100 additions & 0 deletions .github/workflows/e2e-k8s-latest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
name: E2E-K8S-latest

on:
push:
branches:
- master
- release-*
pull_request: {}
workflow_dispatch: {}

env:
# Common versions
GO_VERSION: '1.19'
KIND_ACTION_VERSION: 'v1.5.0'
KIND_VERSION: 'v0.20.0'
KIND_CLUSTER_NAME: 'ci-testing'
COMPONENT_NS: "koordinator-system"

jobs:

slo-controller:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
with:
submodules: true
- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: ${{ env.GO_VERSION }}
- name: Setup Kind Cluster
uses: helm/kind-action@v1.5.0
with:
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf.yaml
version: ${{ env.KIND_VERSION }}
- name: Build image
run: |
export IMAGE="koordinator-sh/koord-manager:e2e-${GITHUB_RUN_ID}"
docker build --pull --no-cache . -t $IMAGE -f docker/koord-manager.dockerfile
kind load docker-image --name=${KIND_CLUSTER_NAME} $IMAGE || { echo >&2 "kind not installed or error loading image: $IMAGE"; exit 1; }
- name: Check host environment
run: |
set -ex
kubectl version --short
kubectl get pods -A
kubectl get nodes -o yaml
tree -L 2 /sys/
tree -L 2 /sys/fs/cgroup
cat /proc/cpuinfo
- name: Install Koordinator
run: |
set -ex
kubectl cluster-info
IMG=koordinator-sh/koord-manager:e2e-${GITHUB_RUN_ID} ./hack/deploy_kind.sh
NODES=$(kubectl get node | wc -l)
for ((i=1;i<10;i++));
do
set +e
PODS=$(kubectl get pod -n ${COMPONENT_NS} | grep "koord-manager\|koordlet" | grep '1/1' | wc -l)
set -e
if [ "$PODS" -ge "$NODES" ]; then
break
fi
sleep 6
done
set +e
PODS=$(kubectl get pod -n ${COMPONENT_NS} | grep "koord-manager\|koordlet" | grep '1/1' | wc -l)
kubectl get pod -A
kubectl get node -o yaml
kubectl get all -n ${COMPONENT_NS} -o wide
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koord-manager | head -n 1 | awk '{print $1}' | xargs kubectl logs -n ${COMPONENT_NS} --tail=100
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koord-scheduler | awk '{print $1}' | xargs kubectl logs -n ${COMPONENT_NS} --tail=100
kubectl get pod -n ${COMPONENT_NS} --no-headers | grep koordlet | head -n 1 | awk '{print $1}' | xargs -L 1 kubectl logs -n ${COMPONENT_NS}
kubectl get pod -n ${COMPONENT_NS} -o wide
set -e
if [ "$PODS" -ge "$NODES" ]; then
echo "Wait for koord-manager and koordlet ready successfully"
else
echo "Timeout to wait for koord-manager and koordlet ready"
exit 1
fi
- name: Run E2E Tests
run: |
export KUBECONFIG=/home/runner/.kube/config
make ginkgo
set +e
EXTRA_ARGS="-koordinator-component-namespace=${COMPONENT_NS} -allowed-not-ready-nodes=1 -system-pods-startup-timeout=10s"
./bin/ginkgo -timeout 60m -v --focus='slo-controller' test/e2e -- ${EXTRA_ARGS}
retVal=$?
restartCount=$(kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers | head -n 1 | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "koord-manager has not restarted"
else
kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers
echo "koord-manager has restarted, abort!!!"
kubectl get pod -n ${COMPONENT_NS} --no-headers -l koord-app=koord-manager | head -n 1 | awk '{print $1}' | xargs kubectl logs -p -n ${COMPONENT_NS}
exit 1
fi
exit $retVal
15 changes: 15 additions & 0 deletions config/manager/descheduler-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,18 @@ data:
- koordinator-system
evictQPS: "10"
evictBurst: 1
- name: LowNodeLoad
args:
apiVersion: descheduler/v1alpha2
kind: LowNodeLoadArgs
evictableNamespaces:
exclude:
- kube-system
- koordinator-system
useDeviationThresholds: false
lowThresholds:
cpu: 45
memory: 55
highThresholds:
cpu: 75
memory: 80
12 changes: 9 additions & 3 deletions config/manager/koordlet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@ metadata:
namespace: system
spec:
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 0
maxUnavailable: 20%
type: RollingUpdate
selector:
matchLabels:
koord-app: koordlet
Expand Down Expand Up @@ -71,6 +70,9 @@ spec:
- mountPath: /host-var-run-koordlet/
mountPropagation: Bidirectional
name: host-var-run-koordlet
- mountPath: /prediction-checkpoints
name: host-koordlet-checkpoint-dir
mountPropagation: Bidirectional
- mountPath: /host-sys/
name: host-sys
- mountPath: /etc/kubernetes/
Expand All @@ -82,7 +84,7 @@ spec:
- mountPath: /var/lib/kubelet
name: host-kubelet-rootdir
readOnly: true
- mountPaht: /dev
- mountPath: /dev
name: host-dev
mountPropagation: HostToContainer
- mountPath: /metric-data/
Expand Down Expand Up @@ -119,6 +121,10 @@ spec:
path: /var/run/koordlet/
type: DirectoryOrCreate
name: host-var-run-koordlet
- hostPath:
path: /var/run/koordlet/prediction-checkpoints
type: DirectoryOrCreate
name: host-koordlet-checkpoint-dir
- hostPath:
path: /sys/
type: ""
Expand Down
Loading