Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ci #981

Merged
merged 3 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .github/kind-cluster-1.28.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: "kindest/node:v1.28.9"
- role: control-plane
image: "kindest/node:v1.28.9"
- role: control-plane
image: "kindest/node:v1.28.9"
- role: worker
Expand Down
4 changes: 0 additions & 4 deletions .github/kind-cluster-1.29.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: "kindest/node:v1.29.4"
- role: control-plane
image: "kindest/node:v1.29.4"
- role: control-plane
image: "kindest/node:v1.29.4"
- role: worker
Expand Down
4 changes: 0 additions & 4 deletions .github/kind-cluster-1.30.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: "kindest/node:v1.30.2"
- role: control-plane
image: "kindest/node:v1.30.2"
- role: control-plane
image: "kindest/node:v1.30.2"
- role: worker
Expand Down
30 changes: 15 additions & 15 deletions .github/workflows/on-pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
id: tags
- name: Build image
run: VERSION="${{ steps.tags.outputs.sha_short }}" make image
run: VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make image
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@6e7b7d1fd3e4fef0c5fa8cce1229c54b2c9bd0d8
with:
Expand Down Expand Up @@ -132,8 +132,8 @@ jobs:
id: tags
- name: Build artifacts
run: |
VERSION="${{ steps.tags.outputs.sha_short }}" make image
VERSION="${{ steps.tags.outputs.sha_short }}" make manifest
VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make image
VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make manifest

- name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions
run: |
Expand All @@ -145,7 +145,7 @@ jobs:
EOF

# Default name for helm/kind-action kind clusters is "chart-testing"
- name: Create kind cluster with 5 nodes
- name: Create kind cluster with 3 nodes
uses: helm/kind-action@v1.10.0
with:
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
Expand All @@ -169,7 +169,7 @@ jobs:
max_attempts: 10
retry_wait_seconds: 60
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*5.*5.*5.*5.*5'"
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*3.*3.*3.*3.*3'"

- name: Create reboot sentinel files
run: |
Expand Down Expand Up @@ -217,8 +217,8 @@ jobs:
id: tags
- name: Build artifacts
run: |
VERSION="${{ steps.tags.outputs.sha_short }}" make image
VERSION="${{ steps.tags.outputs.sha_short }}" make manifest
VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make image
VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make manifest

- name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions
run: |
Expand All @@ -230,7 +230,7 @@ jobs:
EOF

# Default name for helm/kind-action kind clusters is "chart-testing"
- name: Create kind cluster with 5 nodes
- name: Create kind cluster with 3 nodes
uses: helm/kind-action@v1.10.0
with:
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
Expand All @@ -241,7 +241,7 @@ jobs:

- name: Do not wait for an hour before detecting the rebootSentinel
run: |
sed -i 's/#\(.*\)--period=1h/\1--period=30s/g' kured-ds-signal.yaml
sed -i 's/#\(.*\)--period=1h/\1--period=15s/g' kured-ds-signal.yaml

- name: Install kured with kubectl
run: |
Expand All @@ -254,7 +254,7 @@ jobs:
max_attempts: 10
retry_wait_seconds: 60
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*5.*5.*5.*5.*5'"
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*3.*3.*3.*3.*3'"

- name: Create reboot sentinel files
run: |
Expand Down Expand Up @@ -303,8 +303,8 @@ jobs:
id: tags
- name: Build artifacts
run: |
VERSION="${{ steps.tags.outputs.sha_short }}" make image
VERSION="${{ steps.tags.outputs.sha_short }}" make manifest
VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make image
VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make manifest

- name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions
run: |
Expand All @@ -316,7 +316,7 @@ jobs:
EOF

# Default name for helm/kind-action kind clusters is "chart-testing"
- name: Create kind cluster with 5 nodes
- name: Create kind cluster with 3 nodes
uses: helm/kind-action@v1.10.0
with:
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
Expand All @@ -327,7 +327,7 @@ jobs:

- name: Do not wait for an hour before detecting the rebootSentinel
run: |
sed -i 's/#\(.*\)--period=1h/\1--period=30s/g' kured-ds.yaml
sed -i 's/#\(.*\)--period=1h/\1--period=15s/g' kured-ds.yaml
sed -i 's/#\(.*\)--concurrency=1/\1--concurrency=2/g' kured-ds.yaml

- name: Install kured with kubectl
Expand All @@ -341,7 +341,7 @@ jobs:
max_attempts: 10
retry_wait_seconds: 60
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*5.*5.*5.*5.*5'"
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*3.*3.*3.*3.*3'"

- name: Create reboot sentinel files
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/periodics-daily.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ jobs:
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
id: tags
- name: Build artifacts
run: VERSION="${{ steps.tags.outputs.sha_short }}" make image
run: VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make image
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@6e7b7d1fd3e4fef0c5fa8cce1229c54b2c9bd0d8
with:
Expand Down
3 changes: 3 additions & 0 deletions .trivyignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# https://pkg.go.dev/vuln/GO-2024-3106
# Will be automatically fixed when we'll use golang 1.22.7
CVE-2024-34156
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

TEMPDIR=./.tmp
GORELEASER_CMD=$(TEMPDIR)/goreleaser
DH_ORG=kubereboot
DH_ORG ?= kubereboot
VERSION=$(shell git rev-parse --short HEAD)
SUDO=$(shell docker info >/dev/null 2>&1 || echo "sudo -E")

Expand Down
5 changes: 3 additions & 2 deletions tests/kind/create-reboot-sentinels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
KUBECTL_CMD="${KUBECTL_CMD:-kubectl}"
SENTINEL_FILE="${SENTINEL_FILE:-/var/run/reboot-required}"

echo "Creating reboot sentinel on all nodes"
echo "Creating reboot sentinel on worker nodes"

for nodename in $("$KUBECTL_CMD" get nodes -o name); do
# To speed up the system, let's not kill the control plane.
for nodename in $("$KUBECTL_CMD" get nodes -o name | grep -v control-plane); do
docker exec "${nodename/node\//}" hostname
docker exec "${nodename/node\//}" touch "${SENTINEL_FILE}"
done
17 changes: 13 additions & 4 deletions tests/kind/follow-coordinated-reboot.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

NODECOUNT=${NODECOUNT:-5}
NODECOUNT=${NODECOUNT:-2}
KUBECTL_CMD="${KUBECTL_CMD:-kubectl}"
DEBUG="${DEBUG:-false}"
CONTAINER_NAME_FORMAT=${CONTAINER_NAME_FORMAT:-"chart-testing-*"}
Expand Down Expand Up @@ -35,10 +35,12 @@ trap gather_logs_and_cleanup EXIT

declare -A was_unschedulable
declare -A has_recovered
max_attempts="60"
sleep_time=60
max_attempts="200"
sleep_time=5
attempt_num=1

# Get docker info of each of those kind containers. If one has crashed, restart it.

set +o errexit
echo "There are $NODECOUNT nodes in the cluster"
until [ ${#was_unschedulable[@]} == "$NODECOUNT" ] && [ ${#has_recovered[@]} == "$NODECOUNT" ]
Expand All @@ -52,13 +54,14 @@ do
# cat "$tmp_dir"/node_output
#fi

"$KUBECTL_CMD" get nodes -o custom-columns=NAME:.metadata.name,SCHEDULABLE:.spec.unschedulable --no-headers > "$tmp_dir"/node_output
"$KUBECTL_CMD" get nodes -o custom-columns=NAME:.metadata.name,SCHEDULABLE:.spec.unschedulable --no-headers | grep -v control-plane > "$tmp_dir"/node_output
if [[ "$DEBUG" == "true" ]]; then
# This is useful to see if a node gets stuck after drain, and doesn't
# come back up.
echo "Result of command $KUBECTL_CMD get nodes ... showing unschedulable nodes:"
cat "$tmp_dir"/node_output
fi

while read -r node; do
unschedulable=$(echo "$node" | grep true | cut -f 1 -d ' ')
if [ -n "$unschedulable" ] && [ -z ${was_unschedulable["$unschedulable"]+x} ] ; then
Expand All @@ -70,6 +73,12 @@ do
echo "$schedulable has recovered!"
has_recovered["$schedulable"]=1
fi

# If the container has crashed, restart it.
node_name=$(echo "$node" | cut -f 1 -d ' ')
stopped_container_id=$(docker container ls --filter=name="$node_name" --filter=status=exited -q)
if [ -n "$stopped_container_id" ]; then echo "Node $stopped_container_id needs restart"; docker start "$stopped_container_id"; echo "Container started."; fi

done < "$tmp_dir"/node_output

if [[ "${#has_recovered[@]}" == "$NODECOUNT" ]]; then
Expand Down
Loading