Skip to content

Commit

Permalink
Merge pull request #154 from projectsyn/feat/restart-holder-ds
Browse files Browse the repository at this point in the history
Add support for replacing outdated CSI holder pods
  • Loading branch information
simu authored Feb 26, 2024
2 parents 718e309 + 7ef7cf3 commit fe79022
Show file tree
Hide file tree
Showing 5 changed files with 636 additions and 1 deletion.
4 changes: 3 additions & 1 deletion component/main.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ local namespaces =
else [];

local common_labels(name) = {
'app.kubernetes.io/name': name,
'app.kubernetes.io/name': std.strReplace(name, ':', '-'),
'app.kubernetes.io/managed-by': 'commodore',
'app.kubernetes.io/component': 'rook-ceph',
};
Expand Down Expand Up @@ -92,6 +92,7 @@ local add_labels(manifests) = [
for manifest in manifests
];


std.mapWithKey(
function(field, value)
if std.isArray(value) then
Expand Down Expand Up @@ -126,6 +127,7 @@ std.mapWithKey(
csi_metrics.servicemonitor,
[if params.ceph_cluster.monitoring_enabled then '40_alertrules']:
alert_rules.rules,
'50_restart_holder_ds': (import 'restart-holder-ds.libsonnet'),
'99_cleanup': (import 'cleanup.libsonnet'),
}
)
141 changes: 141 additions & 0 deletions component/restart-holder-ds.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
local kap = import 'lib/kapitan.libjsonnet';
local kube = import 'lib/kube.libjsonnet';

local inv = kap.inventory();
local params = inv.parameters.rook_ceph;

local serviceaccount = kube.ServiceAccount('holder-updater') {
metadata+: {
namespace: params.namespace,
},
};

local rbac = [
serviceaccount,
kube.RoleBinding('holder-updater-admin') {
metadata+: {
namespace: params.namespace,
},
roleRef: {
kind: 'ClusterRole',
name: 'admin',
},
subjects_: [ serviceaccount ],
},
kube.ClusterRoleBinding('syn:rook-ceph:holder-updater-cluster-reader') {
roleRef: {
kind: 'ClusterRole',
name: 'cluster-reader',
},
subjects_: [ serviceaccount ],
},
];

local script = |||
#!/bin/sh
trap : TERM INT
sleep infinity &
while true; do
# assumption: holder plugin daemonset is called
# `csi-cephfsplugin-holder-${cephcluster:name}`
cephfs_holder_wanted_gen=$(kubectl get ds csi-cephfsplugin-holder-%(cephcluster_name)s -ojsonpath='{.metadata.generation}')
rbd_holder_wanted_gen=$(kubectl get ds csi-rbdplugin-holder-%(cephcluster_name)s -ojsonpath='{.metadata.generation}')
needs_update=$( (\
kubectl get pods -l app=csi-cephfsplugin-holder --field-selector spec.nodeName=${NODE_NAME} -ojson |\
jq --arg wanted_gen ${cephfs_holder_wanted_gen} \
-r '.items[] | select(.metadata.labels."pod-template-generation" != $wanted_gen) | .metadata.name'
kubectl get pods -l app=csi-rbdplugin-holder --field-selector spec.nodeName=${NODE_NAME} -ojson |\
jq --arg wanted_gen ${rbd_holder_wanted_gen} \
-r '.items[] | select(.metadata.labels."pod-template-generation" != $wanted_gen) | .metadata.name'
) | wc -l)
if [ $needs_update -eq 0 ]; then
echo "No holder pods with outdated pod generation, nothing to do"
break
fi
non_ds_pods=$(kubectl get pods -A --field-selector spec.nodeName=${NODE_NAME} -ojson | \
jq -r '.items[] | select(.metadata.ownerReferences[0].kind!="DaemonSet") | .metadata.name' | wc -l)
if [ $non_ds_pods -eq 0 ]; then
echo "node ${NODE_NAME} drained, deleting Ceph CSI holder pods"
kubectl delete pods -l app=csi-cephfsplugin-holder --field-selector=spec.nodeName=${NODE_NAME}
kubectl delete pods -l app=csi-rbdplugin-holder --field-selector=spec.nodeName=${NODE_NAME}
break
else
echo "${non_ds_pods} non-daemonset pods still on node ${NODE_NAME}, sleeping for 5s"
fi
sleep 5
done
echo "script completed, sleeping"
wait
||| % { cephcluster_name: params.ceph_cluster.name };

local configmap = kube.ConfigMap('holder-restart-script') {
metadata+: {
namespace: params.namespace,
},
data: {
'wait-and-delete-holder-pods.sh': script,
},
};

local daemonset = kube.DaemonSet('syn-holder-updater') {
metadata+: {
annotations+: {
'syn.tools/description':
'DaemonSet which waits for node to be drained (by waiting until no ' +
'non-daemonset pods are running on the node) and then deletes any ' +
'outdated csi holder pods. Outdated holder pods are identified by ' +
'comparing the DaemonSet generation with the pod generation.',
},
namespace: params.namespace,
},
spec+: {
template+: {
spec+: {
serviceAccountName: serviceaccount.metadata.name,
containers_: {
update: kube.Container('update') {
image: '%(registry)s/%(image)s:%(tag)s' % params.images.kubectl,
command: [ '/scripts/wait-and-delete-holder-pods.sh' ],
env_: {
NODE_NAME: {
fieldRef: {
fieldPath: 'spec.nodeName',
},
},
},
// The script doesn't consume any resources once it's determined
// that nothing is left to do, but before then, we do consume a
// bit of resources. We're setting modest requests, and no limits
// to avoid running into issues because the script gets throttled
// due to CPU limits.
resources: {
requests: {
cpu: '5m',
memory: '20Mi',
},
},
volumeMounts_: {
scripts: {
mountPath: '/scripts',
},
},
},
},
volumes_: {
scripts: {
configMap: {
name: configmap.metadata.name,
defaultMode: 504, // 0770
},
},
},
},
},
},
};

rbac + [
configmap,
daemonset,
]
164 changes: 164 additions & 0 deletions tests/golden/cephfs/rook-ceph/rook-ceph/50_restart_holder_ds.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
apiVersion: v1
kind: ServiceAccount
metadata:
annotations: {}
labels:
app.kubernetes.io/component: rook-ceph
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: holder-updater
name: holder-updater
name: holder-updater
namespace: syn-rook-ceph-operator
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
annotations: {}
labels:
app.kubernetes.io/component: rook-ceph
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: holder-updater-admin
name: holder-updater-admin
name: holder-updater-admin
namespace: syn-rook-ceph-operator
roleRef:
kind: ClusterRole
name: admin
subjects:
- kind: ServiceAccount
name: holder-updater
namespace: syn-rook-ceph-operator
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
annotations: {}
labels:
app.kubernetes.io/component: rook-ceph
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: syn-rook-ceph-holder-updater-cluster-reader
name: syn-rook-ceph-holder-updater-cluster-reader
name: syn:rook-ceph:holder-updater-cluster-reader
roleRef:
kind: ClusterRole
name: cluster-reader
subjects:
- kind: ServiceAccount
name: holder-updater
namespace: syn-rook-ceph-operator
---
apiVersion: v1
data:
wait-and-delete-holder-pods.sh: |
#!/bin/sh
trap : TERM INT
sleep infinity &
while true; do
# assumption: holder plugin daemonset is called
# `csi-cephfsplugin-holder-${cephcluster:name}`
cephfs_holder_wanted_gen=$(kubectl get ds csi-cephfsplugin-holder-cluster -ojsonpath='{.metadata.generation}')
rbd_holder_wanted_gen=$(kubectl get ds csi-rbdplugin-holder-cluster -ojsonpath='{.metadata.generation}')
needs_update=$( (\
kubectl get pods -l app=csi-cephfsplugin-holder --field-selector spec.nodeName=${NODE_NAME} -ojson |\
jq --arg wanted_gen ${cephfs_holder_wanted_gen} \
-r '.items[] | select(.metadata.labels."pod-template-generation" != $wanted_gen) | .metadata.name'
kubectl get pods -l app=csi-rbdplugin-holder --field-selector spec.nodeName=${NODE_NAME} -ojson |\
jq --arg wanted_gen ${rbd_holder_wanted_gen} \
-r '.items[] | select(.metadata.labels."pod-template-generation" != $wanted_gen) | .metadata.name'
) | wc -l)
if [ $needs_update -eq 0 ]; then
echo "No holder pods with outdated pod generation, nothing to do"
break
fi
non_ds_pods=$(kubectl get pods -A --field-selector spec.nodeName=${NODE_NAME} -ojson | \
jq -r '.items[] | select(.metadata.ownerReferences[0].kind!="DaemonSet") | .metadata.name' | wc -l)
if [ $non_ds_pods -eq 0 ]; then
echo "node ${NODE_NAME} drained, deleting Ceph CSI holder pods"
kubectl delete pods -l app=csi-cephfsplugin-holder --field-selector=spec.nodeName=${NODE_NAME}
kubectl delete pods -l app=csi-rbdplugin-holder --field-selector=spec.nodeName=${NODE_NAME}
break
else
echo "${non_ds_pods} non-daemonset pods still on node ${NODE_NAME}, sleeping for 5s"
fi
sleep 5
done
echo "script completed, sleeping"
wait
kind: ConfigMap
metadata:
annotations: {}
labels:
app.kubernetes.io/component: rook-ceph
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: holder-restart-script
name: holder-restart-script
name: holder-restart-script
namespace: syn-rook-ceph-operator
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
annotations:
syn.tools/description: DaemonSet which waits for node to be drained (by waiting
until no non-daemonset pods are running on the node) and then deletes any outdated
csi holder pods. Outdated holder pods are identified by comparing the DaemonSet
generation with the pod generation.
labels:
app.kubernetes.io/component: rook-ceph
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: syn-holder-updater
name: syn-holder-updater
name: syn-holder-updater
namespace: syn-rook-ceph-operator
spec:
selector:
matchLabels:
app.kubernetes.io/component: rook-ceph
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: syn-holder-updater
name: syn-holder-updater
template:
metadata:
annotations: {}
labels:
app.kubernetes.io/component: rook-ceph
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: syn-holder-updater
name: syn-holder-updater
spec:
containers:
- args: []
command:
- /scripts/wait-and-delete-holder-pods.sh
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
image: docker.io/bitnami/kubectl:1.28.4@sha256:6485a923f6f4ff3d42d871ce5bd45ee8f25a303c44972a4ad31ddd895082fc22
imagePullPolicy: IfNotPresent
name: update
ports: []
resources:
requests:
cpu: 5m
memory: 20Mi
stdin: false
tty: false
volumeMounts:
- mountPath: /scripts
name: scripts
imagePullSecrets: []
initContainers: []
serviceAccountName: holder-updater
terminationGracePeriodSeconds: 30
volumes:
- configMap:
defaultMode: 504
name: holder-restart-script
name: scripts
updateStrategy:
rollingUpdate:
maxUnavailable: 1
type: RollingUpdate
Loading

0 comments on commit fe79022

Please sign in to comment.