From e91d6b723f3cb86ac0d38121ed59cbcf67bcb0e6 Mon Sep 17 00:00:00 2001 From: Toshikuni Fukaya Date: Mon, 15 May 2023 02:45:26 +0000 Subject: [PATCH 1/5] Add a finalizer to pods Since controller-runtime does not provide a pod name if the pod is deleted, we can not delete an item storing event times from maps. To solve this, add a finalizer to pods to get their names. Signed-off-by: Toshikuni Fukaya --- constants/constants.go | 1 + controllers/node_controller.go | 12 ++++++++++++ controllers/pod_controller.go | 15 ++++++++++++++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/constants/constants.go b/constants/constants.go index ac59b96..5d59f61 100644 --- a/constants/constants.go +++ b/constants/constants.go @@ -3,6 +3,7 @@ package constants const ( ProbeNamePrefix = "pie-probe" ProbeContainerName = "probe" + PodFinalizerName = "pie.topolvm.io/pod" NodeFinalizerName = "pie.topolvm.io/node" StorageClassFinalizerName = "pie.topolvm.io/storage-class" diff --git a/controllers/node_controller.go b/controllers/node_controller.go index 2384d7d..07219a6 100644 --- a/controllers/node_controller.go +++ b/controllers/node_controller.go @@ -186,6 +186,16 @@ func makeCronSchedule(storageClass, nodeName string, period int) string { return fmt.Sprintf("%d-59/%d * * * *", h.Sum32()%uint32(period), period) } +func addPodFinalizer(spec *corev1.PodTemplateSpec) { + finalizers := spec.GetFinalizers() + for _, finalizer := range finalizers { + if finalizer == constants.PodFinalizerName { + return + } + } + spec.SetFinalizers(append(finalizers, constants.PodFinalizerName)) +} + func (r *NodeReconciler) createOrUpdateJob(ctx context.Context, storageClass, nodeName string) error { nodeCtrlLogger.Info("createOrUpdateJob") defer nodeCtrlLogger.Info("createOrUpdateJob Finished") @@ -212,6 +222,8 @@ func (r *NodeReconciler) createOrUpdateJob(ctx context.Context, storageClass, no cronjob.Spec.JobTemplate.Spec.Template.SetLabels(label) + addPodFinalizer(&cronjob.Spec.JobTemplate.Spec.Template) + if len(cronjob.Spec.JobTemplate.Spec.Template.Spec.Containers) != 1 { cronjob.Spec.JobTemplate.Spec.Template.Spec.Containers = []corev1.Container{{}} } diff --git a/controllers/pod_controller.go b/controllers/pod_controller.go index 5363b0d..86e81a1 100644 --- a/controllers/pod_controller.go +++ b/controllers/pod_controller.go @@ -11,6 +11,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -68,12 +69,15 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R err := r.client.Get(ctx, client.ObjectKey{Namespace: req.Namespace, Name: req.Name}, &pod) if err != nil { if apierrors.IsNotFound(err) { - r.po.deleteEventTime(pod.Name) return ctrl.Result{}, nil } return ctrl.Result{}, err } + if !controllerutil.ContainsFinalizer(&pod, constants.PodFinalizerName) { + return ctrl.Result{}, nil + } + r.po.setPodRegisteredTime(pod.Name, pod.CreationTimestamp.Time) for _, status := range pod.Status.ContainerStatuses { @@ -87,6 +91,15 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R } } + if !pod.DeletionTimestamp.IsZero() { + r.po.deleteEventTime(pod.Name) + controllerutil.RemoveFinalizer(&pod, constants.PodFinalizerName) + err := r.client.Update(ctx, &pod) + if err != nil { + return ctrl.Result{}, err + } + } + return ctrl.Result{}, nil } From 8ce866ace25dd5cd763e288574f094bebcad5922 Mon Sep 17 00:00:00 2001 From: Toshikuni Fukaya Date: Mon, 15 May 2023 07:29:22 +0000 Subject: [PATCH 2/5] Set deletion policy for job deletion To make sure to delete a pod from job, set deletion policy to background for job deletion. Signed-off-by: Toshikuni Fukaya --- controllers/provision_observer.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/controllers/provision_observer.go b/controllers/provision_observer.go index 43a54f5..8ee06b0 100644 --- a/controllers/provision_observer.go +++ b/controllers/provision_observer.go @@ -108,7 +108,8 @@ func (p *provisionObserver) deleteOwnerJobOfPod(ctx context.Context, podName str return err } - err = p.client.Delete(ctx, &job) + policy := metav1.DeletePropagationBackground + err = p.client.Delete(ctx, &job, &client.DeleteOptions{PropagationPolicy: &policy}) if err != nil { if apierrors.IsNotFound(err) { continue From 9566e1a81a08fca4561e680e59f2b7cae64de97a Mon Sep 17 00:00:00 2001 From: Toshikuni Fukaya Date: Mon, 15 May 2023 02:54:58 +0000 Subject: [PATCH 3/5] Ensure check function working before pod deletion Since check function runs peridically, it may miss a deleting pod event. To prevent it, run the function before deleting pod events from maps. Signed-off-by: Toshikuni Fukaya --- controllers/provision_observer.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/controllers/provision_observer.go b/controllers/provision_observer.go index 8ee06b0..1b28b20 100644 --- a/controllers/provision_observer.go +++ b/controllers/provision_observer.go @@ -28,7 +28,10 @@ type provisionObserver struct { podRegisteredTime map[string]time.Time podStartedTime map[string]time.Time countedFlag map[string]struct{} - mu sync.Mutex + // mu protects above maps + mu sync.Mutex + makeCheckCh chan struct{} + checkDoneCh chan struct{} } func newProvisionObserver( @@ -45,6 +48,8 @@ func newProvisionObserver( podRegisteredTime: make(map[string]time.Time), podStartedTime: make(map[string]time.Time), countedFlag: make(map[string]struct{}), + makeCheckCh: make(chan struct{}), + checkDoneCh: make(chan struct{}), } } @@ -63,6 +68,9 @@ func (p *provisionObserver) setPodStartedTime(podName string, eventTime time.Tim } func (p *provisionObserver) deleteEventTime(podName string) { + p.makeCheckCh <- struct{}{} + <-p.checkDoneCh + p.mu.Lock() defer p.mu.Unlock() @@ -170,12 +178,19 @@ func (p *provisionObserver) Start(ctx context.Context) error { defer ticker.Stop() for { + needNotify := false select { + case <-p.makeCheckCh: + needNotify = true case <-ticker.C: case <-ctx.Done(): return nil } p.check(ctx) + + if needNotify { + p.checkDoneCh <- struct{}{} + } } } From 25e6fbe49a3a9eacba3d215ba84ac147c07fe4d0 Mon Sep 17 00:00:00 2001 From: Toshikuni Fukaya Date: Mon, 15 May 2023 06:46:18 +0000 Subject: [PATCH 4/5] Fix group name The group name seems not to accept version. Signed-off-by: Toshikuni Fukaya --- charts/pie/templates/role.yaml | 2 +- config/rbac/role.yaml | 2 +- controllers/provision_observer.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/pie/templates/role.yaml b/charts/pie/templates/role.yaml index faf6b02..9a11e7b 100644 --- a/charts/pie/templates/role.yaml +++ b/charts/pie/templates/role.yaml @@ -76,7 +76,7 @@ rules: - update - watch - apiGroups: - - batch/v1 + - batch resources: - jobs verbs: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index ef7dff2..d5727d8 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -79,7 +79,7 @@ rules: - update - watch - apiGroups: - - batch/v1 + - batch resources: - jobs verbs: diff --git a/controllers/provision_observer.go b/controllers/provision_observer.go index 1b28b20..fdcbc6c 100644 --- a/controllers/provision_observer.go +++ b/controllers/provision_observer.go @@ -171,7 +171,7 @@ func (p *provisionObserver) check(ctx context.Context) { } } -//+kubebuilder:rbac:namespace=default,groups=batch/v1,resources=jobs,verbs=get;list;watch;delete +//+kubebuilder:rbac:namespace=default,groups=batch,resources=jobs,verbs=get;list;watch;delete func (p *provisionObserver) Start(ctx context.Context) error { ticker := time.NewTicker(time.Second) From 96b253fe30a62e6d7b29c799de25f934368bc53c Mon Sep 17 00:00:00 2001 From: Toshikuni Fukaya Date: Mon, 15 May 2023 07:13:53 +0000 Subject: [PATCH 5/5] Fix e2e to wait two target SCs The previous code checks standard or dummy SC. But we actually want to check both. Signed-off-by: Toshikuni Fukaya --- e2e/suite_test.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/e2e/suite_test.go b/e2e/suite_test.go index 2ce1d94..40a3653 100644 --- a/e2e/suite_test.go +++ b/e2e/suite_test.go @@ -158,15 +158,17 @@ var _ = Describe("pie", func() { } By("checking pie_create_probe_total have on_time=true for standard SC or on_time=false for dummy SC") - Expect("pie_create_probe_total").Should(BeKeyOf(metricFamilies)) - for _, metric := range metricFamilies["pie_create_probe_total"].Metric { - Expect(metric.Label).Should(Or(ContainElement(&standardSCLabelPair), ContainElement(&dummySCLabelPair))) + g.Expect("pie_create_probe_total").Should(BeKeyOf(metricFamilies)) + metrics := metricFamilies["pie_create_probe_total"].Metric + g.Expect(metrics).Should(ContainElement(HaveField("Label", ContainElement(&standardSCLabelPair)))) + g.Expect(metrics).Should(ContainElement(HaveField("Label", ContainElement(&dummySCLabelPair)))) + for _, metric := range metrics { for _, label := range metric.Label { switch { case reflect.DeepEqual(label, &standardSCLabelPair): - Expect(metric.Label).Should(ContainElement(&onTimeTrueLabelPair)) + g.Expect(metric.Label).Should(ContainElement(&onTimeTrueLabelPair)) case reflect.DeepEqual(label, &dummySCLabelPair): - Expect(metric.Label).Should(ContainElement(&onTimeFalseLabelPair)) + g.Expect(metric.Label).Should(ContainElement(&onTimeFalseLabelPair)) } } }