Skip to content

Commit

Permalink
Control pod deletion behaviour via annotation
Browse files Browse the repository at this point in the history
This change introduces `operator.redpanda.com/decommission-on-delete`
annotation that tells the operator to decommission a node when a pod
gets deleted.

Seting this annotation allows us to upgrade the k8s worker nodes of a
cluster without having to worry about launching or deleting instances
because we can trigger the cloud provider's native upgrade
functionality.

In essense we set the annotation and then allow the cloud provider to
launch a new k8s worker node and drain the old one. Since we're now
decommissioning the redpanda pod when it gets deleted the new pod that
gets scheduled will no longer be stuck in Pending state due to PVC
attachments still on the old k8s worker node.
  • Loading branch information
Kyriakos Oikonomakos committed Apr 18, 2024
1 parent 0dfe3b1 commit 12086b8
Show file tree
Hide file tree
Showing 15 changed files with 272 additions and 1 deletion.
15 changes: 14 additions & 1 deletion src/go/k8s/internal/controller/redpanda/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ const (
SecretAnnotationExternalCAKey = "operator.redpanda.com/external-ca"

NotManaged = "false"

DecommissionOnDeleteAnnotation = "operator.redpanda.com/decommission-on-delete"
)

var (
Expand Down Expand Up @@ -334,6 +336,17 @@ func (r *ClusterReconciler) handlePodFinalizer(
if err != nil {
return fmt.Errorf("unable to fetch PodList: %w", err)
}

var decommissionOnDelete bool
decommissionOnDeleteVal, ok := rp.Annotations[DecommissionOnDeleteAnnotation]
if ok {
decommissionOnDelete, err = strconv.ParseBool(decommissionOnDeleteVal)
if err != nil {
//nolint:goerr113 // not going to use wrapped static error here this time
return fmt.Errorf("value of annotation operator.redpanda.com/decommission-on-delete must be convertable to boolean")
}
}

for i := range pods.Items {
pod := &pods.Items[i]
if pod.DeletionTimestamp.IsZero() {
Expand All @@ -357,7 +370,7 @@ func (r *ClusterReconciler) handlePodFinalizer(
// nor has a noexecute taint
untainted := true
for _, taint := range node.Spec.Taints {
if taint.Effect == corev1.TaintEffectNoExecute && taint.Key == corev1.TaintNodeUnreachable {
if (taint.Effect == corev1.TaintEffectNoExecute && taint.Key == corev1.TaintNodeUnreachable) || (decommissionOnDelete && taint.Effect == corev1.TaintEffectNoSchedule) {
untainted = false
}
}
Expand Down
37 changes: 37 additions & 0 deletions src/go/k8s/tests/e2e/decommission-on-delete/00-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
apiVersion: redpanda.vectorized.io/v1alpha1
kind: Cluster
metadata:
name: decomm-on-delete
status:
replicas: 3
currentReplicas: 3
readyReplicas: 3
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: v1
kind: Pod
metadata:
name: decomm-on-delete-0
annotations:
operator.redpanda.com/node-id: "0"
---
apiVersion: v1
kind: Pod
metadata:
name: decomm-on-delete-1
annotations:
operator.redpanda.com/node-id: "1"
---
apiVersion: v1
kind: Pod
metadata:
name: decomm-on-delete-2
annotations:
operator.redpanda.com/node-id: "2"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
collectors:
- command: ../../../hack/get-redpanda-info.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
apiVersion: redpanda.vectorized.io/v1alpha1
kind: Cluster
metadata:
name: decomm-on-delete
spec:
image: "localhost/redpanda"
version: "dev"
replicas: 3
resources:
requests:
cpu: 1
memory: 1Gi
limits:
cpu: 1
memory: 1Gi
configuration:
rpcServer:
port: 33145
kafkaApi:
- port: 9092
adminApi:
- port: 9644
pandaproxyApi:
- port: 8082
developerMode: true
additionalCommandlineArguments:
dump-memory-diagnostics-on-alloc-failure-kind: all
abort-on-seastar-bad-alloc: ''
18 changes: 18 additions & 0 deletions src/go/k8s/tests/e2e/decommission-on-delete/01-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: redpanda.vectorized.io/v1alpha1
kind: Cluster
metadata:
name: decomm-on-delete
annotations:
operator.redpanda.com/decommission-on-delete: "true"
status:
replicas: 3
currentReplicas: 3
readyReplicas: 3
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
collectors:
- command: ../../../hack/get-redpanda-info.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: redpanda.vectorized.io/v1alpha1
kind: Cluster
metadata:
name: decomm-on-delete
annotations:
operator.redpanda.com/decommission-on-delete: "true"
18 changes: 18 additions & 0 deletions src/go/k8s/tests/e2e/decommission-on-delete/02-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: v1
kind: Pod
metadata:
labels:
job-name: get-broker-count-managed-decom
status:
containerStatuses:
- name: curl
state:
terminated:
exitCode: 0
reason: Completed
phase: Succeeded
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
collectors:
- command: ../../../hack/get-redpanda-info.sh
30 changes: 30 additions & 0 deletions src/go/k8s/tests/e2e/decommission-on-delete/02-probe.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
apiVersion: batch/v1
kind: Job
metadata:
name: get-broker-count-managed-decom
spec:
backoffLimit: 10
template:
spec:
activeDeadlineSeconds: 90
containers:
- name: curl
image: apteno/alpine-jq:latest
env:
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
command:
- /bin/sh
- -c
- -ex
args:
- |
url=http://decomm-on-delete-0.decomm-on-delete.$NAMESPACE.svc.cluster.local:9644/v1/brokers
res=$(curl --silent -L $url | jq '. | length')
if [[ "$res" != "3" ]]; then
exit 1;
fi
restartPolicy: Never
23 changes: 23 additions & 0 deletions src/go/k8s/tests/e2e/decommission-on-delete/03-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: redpanda.vectorized.io/v1alpha1
kind: Cluster
metadata:
name: decomm-on-delete
status:
replicas: 3
currentReplicas: 3
readyReplicas: 3
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: v1
kind: Pod
metadata:
name: decomm-on-delete-0
annotations:
operator.redpanda.com/node-id: "3"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
collectors:
- command: ../../../hack/get-redpanda-info.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
apiVersion: kuttl.dev/v1beta1
kind: TestStep
commands:
- command: kubectl delete pod decomm-on-delete-0
namespaced: true
# NOTE: Using the builtin `delete` functionality here will not work
# because the STS replaces the very fast and the step times out.
18 changes: 18 additions & 0 deletions src/go/k8s/tests/e2e/decommission-on-delete/04-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: redpanda.vectorized.io/v1alpha1
kind: Cluster
metadata:
name: decomm-on-delete
annotations:
operator.redpanda.com/decommission-on-delete: "false"
status:
replicas: 3
currentReplicas: 3
readyReplicas: 3
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
collectors:
- command: ../../../hack/get-redpanda-info.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: redpanda.vectorized.io/v1alpha1
kind: Cluster
metadata:
name: decomm-on-delete
annotations:
operator.redpanda.com/decommission-on-delete: "false"
23 changes: 23 additions & 0 deletions src/go/k8s/tests/e2e/decommission-on-delete/05-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: redpanda.vectorized.io/v1alpha1
kind: Cluster
metadata:
name: decomm-on-delete
status:
replicas: 3
currentReplicas: 3
readyReplicas: 3
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: v1
kind: Pod
metadata:
name: decomm-on-delete-0
annotations:
operator.redpanda.com/node-id: "3"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
collectors:
- command: ../../../hack/get-redpanda-info.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
apiVersion: kuttl.dev/v1beta1
kind: TestStep
commands:
- command: kubectl delete pod decomm-on-delete-0
namespaced: true
# NOTE: Using the builtin `delete` functionality here will not work
# because the STS replaces the very fast and the step times out.
28 changes: 28 additions & 0 deletions src/go/k8s/tests/e2e/decommission-on-delete/06-clean.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
delete:
- apiVersion: redpanda.vectorized.io/v1alpha1
kind: Cluster
name: decomm-on-delete
namespace: redpanda-system
- apiVersion: v1
kind: PersistentVolumeClaim
name: datadir-decommission-0
namespace: redpanda-system
- apiVersion: v1
kind: PersistentVolumeClaim
name: datadir-decommission-1
namespace: redpanda-system
- apiVersion: v1
kind: PersistentVolumeClaim
name: datadir-decommission-2
namespace: redpanda-system
- apiVersion: batch/v1
kind: Job
name: get-broker-count-managed-decom
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: get-broker-count-managed-decom
namespace: redpanda-system
7 changes: 7 additions & 0 deletions src/go/k8s/tests/e2e/decommission-on-delete/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
This test

0. creates a 3 node Redpanda cluster
1. enables decommission on pod deletion
2. deletes one pod at a time and checks that its node id increases as expected
3. disables decommision on pod deletion
4. deletes a pod and checks that its node id did not change

0 comments on commit 12086b8

Please sign in to comment.