Ensure that etcd health check errors are logged.

Update controlplane/kubeadm/controllers/controller.go Co-authored-by: Vince Prignano <vince@vincepri.com>
kubernetes-sigs · Jul 28, 2020 · 93797cb · 93797cb
1 parent 2d74380
commit 93797cb
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 18 deletions.
diff --git a/controlplane/kubeadm/controllers/controller.go b/controlplane/kubeadm/controllers/controller.go
@@ -421,32 +421,29 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(o handler.M
 // It removes any etcd members that do not have a corresponding node.
 // Also, as a final step, checks if there is any machines that is being deleted.
 func (r *KubeadmControlPlaneReconciler) reconcileHealth(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
-	logger := controlPlane.Logger()
 
 	// Do a health check of the Control Plane components
 	if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster)); err != nil {
-		logger.V(2).Info("Waiting for control plane to pass control plane health check to continue reconciliation", "cause", err)
 		r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy",
 			"Waiting for control plane to pass control plane health check to continue reconciliation: %v", err)
-		return ctrl.Result{RequeueAfter: healthCheckFailedRequeueAfter}, nil
+		return ctrl.Result{}, errors.Wrap(err, "Waiting for control plane to pass control plane health check to continue reconciliation")
 	}
 
 	// Ensure etcd is healthy
 	if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, util.ObjectKey(cluster)); err != nil {
+		errList := []error{}
+		errList = append(errList, errors.Wrap(err, "Waiting for control plane to pass etcd health check to continue reconciliation"))
+		r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy",
+			"Waiting for control plane to pass etcd health check to continue reconciliation: %v", err)
 		// If there are any etcd members that do not have corresponding nodes, remove them from etcd and from the kubeadm configmap.
 		// This will solve issues related to manual control-plane machine deletion.
 		workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster))
 		if err != nil {
-			return ctrl.Result{}, err
-		}
-		if err := workloadCluster.ReconcileEtcdMembers(ctx); err != nil {
-			logger.V(2).Info("Failed attempt to remove potential hanging etcd members to pass etcd health check to continue reconciliation", "cause", err)
+			errList = append(errList, errors.Wrap(err, "cannot get remote client to workload cluster"))
+		} else if err := workloadCluster.ReconcileEtcdMembers(ctx); err != nil {
+			errList = append(errList, errors.Wrap(err, "Failed attempt to remove potential hanging etcd members to pass etcd health check to continue reconciliation"))
 		}
-
-		logger.V(2).Info("Waiting for control plane to pass etcd health check to continue reconciliation", "cause", err)
-		r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy",
-			"Waiting for control plane to pass etcd health check to continue reconciliation: %v", err)
-		return ctrl.Result{RequeueAfter: healthCheckFailedRequeueAfter}, nil
+		return ctrl.Result{}, kerrors.NewAggregate(errList)
 	}
 
 	// We need this check for scale up as well as down to avoid scaling up when there is a machine being deleted.

diff --git a/controlplane/kubeadm/controllers/scale_test.go b/controlplane/kubeadm/controllers/scale_test.go
@@ -170,9 +170,8 @@ func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) {
 				Machines: beforeMachines,
 			}
 
-			result, err := r.scaleUpControlPlane(context.Background(), cluster.DeepCopy(), kcp.DeepCopy(), controlPlane)
-			g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: healthCheckFailedRequeueAfter}))
-			g.Expect(err).To(BeNil())
+			_, err := r.scaleUpControlPlane(context.Background(), cluster.DeepCopy(), kcp.DeepCopy(), controlPlane)
+			g.Expect(err).To(HaveOccurred())
 
 			controlPlaneMachines := &clusterv1.MachineList{}
 			g.Expect(fakeClient.List(context.Background(), controlPlaneMachines)).To(Succeed())

diff --git a/controlplane/kubeadm/controllers/upgrade_test.go b/controlplane/kubeadm/controllers/upgrade_test.go
@@ -89,9 +89,8 @@ func TestKubeadmControlPlaneReconciler_upgradeControlPlane(t *testing.T) {
 
 	// run upgrade a second time, simulate that the node has not appeared yet but the machine exists
 	r.managementCluster.(*fakeManagementCluster).ControlPlaneHealthy = false
-	result, err = r.upgradeControlPlane(context.Background(), cluster, kcp, controlPlane, needingUpgrade)
-	g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: healthCheckFailedRequeueAfter}))
-	g.Expect(err).To(BeNil())
+	_, err = r.upgradeControlPlane(context.Background(), cluster, kcp, controlPlane, needingUpgrade)
+	g.Expect(err).To(HaveOccurred())
 	g.Expect(fakeClient.List(context.Background(), bothMachines, client.InNamespace(cluster.Namespace))).To(Succeed())
 	g.Expect(bothMachines.Items).To(HaveLen(2))