Merge pull request #4471 from JoelSpeed/disable-node-startup-timeout

✨ Allow users to disable MHC NodeStartupTimeout
kubernetes-sigs · May 19, 2021 · 06f58a6 · 06f58a6
2 parents e7fb8ee + 2be6be2
commit 06f58a6
Show file tree

Hide file tree

Showing 9 changed files with 108 additions and 29 deletions.
diff --git a/api/v1alpha4/common_types.go b/api/v1alpha4/common_types.go
@@ -18,6 +18,7 @@ package v1alpha4
 
 import (
 	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
 const (
@@ -93,6 +94,11 @@ const (
 	ManagedByAnnotation = "cluster.x-k8s.io/managed-by"
 )
 
+var (
+	// ZeroDuration is a zero value of the metav1.Duration type.
+	ZeroDuration = metav1.Duration{}
+)
+
 // MachineAddressType describes a valid MachineAddress type.
 type MachineAddressType string
 

diff --git a/api/v1alpha4/machinehealthcheck_types.go b/api/v1alpha4/machinehealthcheck_types.go
@@ -56,6 +56,8 @@ type MachineHealthCheckSpec struct {
 
 	// Machines older than this duration without a node will be considered to have
 	// failed and will be remediated.
+	// If not set, this value is defaulted to 10 minutes.
+	// If you wish to disable this feature, set the value explicitly to 0.
 	// +optional
 	NodeStartupTimeout *metav1.Duration `json:"nodeStartupTimeout,omitempty"`
 

diff --git a/api/v1alpha4/machinehealthcheck_webhook.go b/api/v1alpha4/machinehealthcheck_webhook.go
@@ -30,13 +30,15 @@ import (
 )
 
 var (
-	// Default time allowed for a node to start up. Can be made longer as part of
-	// spec if required for particular provider.
+	// DefaultNodeStartupTimeout is the time allowed for a node to start up.
+	// Can be made longer as part of spec if required for particular provider.
 	// 10 minutes should allow the instance to start and the node to join the
 	// cluster on most providers.
-	defaultNodeStartupTimeout = metav1.Duration{Duration: 10 * time.Minute}
+	DefaultNodeStartupTimeout = metav1.Duration{Duration: 10 * time.Minute}
 	// Minimum time allowed for a node to start up.
 	minNodeStartupTimeout = metav1.Duration{Duration: 30 * time.Second}
+	// We allow users to disable the nodeStartupTimeout by setting the duration to 0.
+	disabledNodeStartupTimeout = ZeroDuration
 )
 
 // SetMinNodeStartupTimeout allows users to optionally set a custom timeout
@@ -73,7 +75,7 @@ func (m *MachineHealthCheck) Default() {
 	}
 
 	if m.Spec.NodeStartupTimeout == nil {
-		m.Spec.NodeStartupTimeout = &defaultNodeStartupTimeout
+		m.Spec.NodeStartupTimeout = &DefaultNodeStartupTimeout
 	}
 }
 
@@ -129,7 +131,9 @@ func (m *MachineHealthCheck) validate(old *MachineHealthCheck) error {
 		)
 	}
 
-	if m.Spec.NodeStartupTimeout != nil && m.Spec.NodeStartupTimeout.Seconds() < minNodeStartupTimeout.Seconds() {
+	if m.Spec.NodeStartupTimeout != nil &&
+		m.Spec.NodeStartupTimeout.Seconds() != disabledNodeStartupTimeout.Seconds() &&
+		m.Spec.NodeStartupTimeout.Seconds() < minNodeStartupTimeout.Seconds() {
 		allErrs = append(
 			allErrs,
 			field.Invalid(field.NewPath("spec", "nodeStartupTimeout"), m.Spec.NodeStartupTimeout.Seconds(), "must be at least 30s"),

diff --git a/api/v1alpha4/machinehealthcheck_webhook_test.go b/api/v1alpha4/machinehealthcheck_webhook_test.go
@@ -177,9 +177,9 @@ func TestMachineHealthCheckNodeStartupTimeout(t *testing.T) {
 			expectErr: true,
 		},
 		{
-			name:      "when the nodeStartupTimeout is 0",
+			name:      "when the nodeStartupTimeout is 0 (disabled)",
 			timeout:   &zero,
-			expectErr: true,
+			expectErr: false,
 		},
 	}
 

diff --git a/config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml b/config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml
@@ -319,7 +319,9 @@ spec:
                 x-kubernetes-int-or-string: true
               nodeStartupTimeout:
                 description: Machines older than this duration without a node will
-                  be considered to have failed and will be remediated.
+                  be considered to have failed and will be remediated. If not set,
+                  this value is defaulted to 10 minutes. If you wish to disable this
+                  feature, set the value explicitly to 0.
                 type: string
               remediationTemplate:
                 description: "RemediationTemplate is a reference to a remediation

diff --git a/controllers/machinehealthcheck_controller.go b/controllers/machinehealthcheck_controller.go
@@ -209,8 +209,13 @@ func (r *MachineHealthCheckReconciler) reconcile(ctx context.Context, logger log
 	// do sort to avoid keep changing m.Status as the returned machines are not in order
 	sort.Strings(m.Status.Targets)
 
+	nodeStartupTimeout := m.Spec.NodeStartupTimeout
+	if nodeStartupTimeout == nil {
+		nodeStartupTimeout = &clusterv1.DefaultNodeStartupTimeout
+	}
+
 	// health check all targets and reconcile mhc status
-	healthy, unhealthy, nextCheckTimes := r.healthCheckTargets(targets, logger, m.Spec.NodeStartupTimeout.Duration)
+	healthy, unhealthy, nextCheckTimes := r.healthCheckTargets(targets, logger, *nodeStartupTimeout)
 	m.Status.CurrentHealthy = int32(len(healthy))
 
 	var unhealthyLimitKey, unhealthyLimitValue interface{}

diff --git a/controllers/machinehealthcheck_targets.go b/controllers/machinehealthcheck_targets.go
@@ -44,6 +44,11 @@ const (
 	EventDetectedUnhealthy string = "DetectedUnhealthy"
 )
 
+var (
+	// We allow users to disable the nodeStartupTimeout by setting the duration to 0.
+	disabledNodeStartupTimeout = clusterv1.ZeroDuration
+)
+
 // healthCheckTarget contains the information required to perform a health check
 // on the node to determine if any remediation is required.
 type healthCheckTarget struct {
@@ -81,7 +86,7 @@ func (t *healthCheckTarget) nodeName() string {
 // If the target doesn't currently need rememdiation, provide a duration after
 // which the target should next be checked.
 // The target should be requeued after this duration.
-func (t *healthCheckTarget) needsRemediation(logger logr.Logger, timeoutForMachineToHaveNode time.Duration) (bool, time.Duration) {
+func (t *healthCheckTarget) needsRemediation(logger logr.Logger, timeoutForMachineToHaveNode metav1.Duration) (bool, time.Duration) {
 	var nextCheckTimes []time.Duration
 	now := time.Now()
 
@@ -120,6 +125,12 @@ func (t *healthCheckTarget) needsRemediation(logger logr.Logger, timeoutForMachi
 
 	// the node has not been set yet
 	if t.Node == nil {
+		if timeoutForMachineToHaveNode == disabledNodeStartupTimeout {
+			// Startup timeout is disabled so no need to go any further.
+			// No node yet to check conditions, can return early here.
+			return false, 0
+		}
+
 		controlPlaneInitializedTime := conditions.GetLastTransitionTime(t.Cluster, clusterv1.ControlPlaneInitializedCondition).Time
 		clusterInfraReadyTime := conditions.GetLastTransitionTime(t.Cluster, clusterv1.InfrastructureReadyCondition).Time
 		machineCreationTime := t.Machine.CreationTimestamp.Time
@@ -135,14 +146,14 @@ func (t *healthCheckTarget) needsRemediation(logger logr.Logger, timeoutForMachi
 		}
 		logger.V(3).Info("Using comparison time", "time", comparisonTime)
 
-		if comparisonTime.Add(timeoutForMachineToHaveNode).Before(now) {
+		if comparisonTime.Add(timeoutForMachineToHaveNode.Duration).Before(now) {
 			conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSuccededCondition, clusterv1.NodeStartupTimeoutReason, clusterv1.ConditionSeverityWarning, "Node failed to report startup in %s", timeoutForMachineToHaveNode.String())
 			logger.V(3).Info("Target is unhealthy: machine has no node", "duration", timeoutForMachineToHaveNode.String())
 			return true, time.Duration(0)
 		}
 
 		durationUnhealthy := now.Sub(comparisonTime)
-		nextCheck := timeoutForMachineToHaveNode - durationUnhealthy + time.Second
+		nextCheck := timeoutForMachineToHaveNode.Duration - durationUnhealthy + time.Second
 
 		return false, nextCheck
 	}
@@ -261,7 +272,7 @@ func (r *MachineHealthCheckReconciler) getNodeFromMachine(ctx context.Context, c
 
 // healthCheckTargets health checks a slice of targets
 // and gives a data to measure the average health.
-func (r *MachineHealthCheckReconciler) healthCheckTargets(targets []healthCheckTarget, logger logr.Logger, timeoutForMachineToHaveNode time.Duration) ([]healthCheckTarget, []healthCheckTarget, []time.Duration) {
+func (r *MachineHealthCheckReconciler) healthCheckTargets(targets []healthCheckTarget, logger logr.Logger, timeoutForMachineToHaveNode metav1.Duration) ([]healthCheckTarget, []healthCheckTarget, []time.Duration) {
 	var nextCheckTimes []time.Duration
 	var unhealthy []healthCheckTarget
 	var healthy []healthCheckTarget
@@ -290,7 +301,7 @@ func (r *MachineHealthCheckReconciler) healthCheckTargets(targets []healthCheckT
 			continue
 		}
 
-		if t.Machine.DeletionTimestamp.IsZero() {
+		if t.Machine.DeletionTimestamp.IsZero() && t.Node != nil {
 			conditions.MarkTrue(t.Machine, clusterv1.MachineHealthCheckSuccededCondition)
 			healthy = append(healthy, t)
 		}

diff --git a/controllers/machinehealthcheck_targets_test.go b/controllers/machinehealthcheck_targets_test.go
@@ -197,9 +197,19 @@ func TestHealthCheckTargets(t *testing.T) {
 	conditions.MarkTrue(cluster, clusterv1.InfrastructureReadyCondition)
 	conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition)
 
+	// Ensure the control plane was initialized earlier to prevent it interfering with
+	// NodeStartupTimeout testing.
+	conds := clusterv1.Conditions{}
+	for _, condition := range cluster.GetConditions() {
+		condition.LastTransitionTime = metav1.NewTime(condition.LastTransitionTime.Add(-1 * time.Hour))
+		conds = append(conds, condition)
+	}
+	cluster.SetConditions(conds)
+
 	mhcSelector := map[string]string{"cluster": clusterName, "machine-group": "foo"}
 
 	timeoutForMachineToHaveNode := 10 * time.Minute
+	disabledTimeoutForMachineToHaveNode := time.Duration(0)
 
 	// Create a test MHC
 	testMHC := &clusterv1.MachineHealthCheck{
@@ -229,15 +239,26 @@ func TestHealthCheckTargets(t *testing.T) {
 
 	testMachine := newTestMachine("machine1", namespace, clusterName, "node1", mhcSelector)
 
-	// Target for when the node has not yet been seen by the Machine controller
-	testMachineLastUpdated400s := testMachine.DeepCopy()
+	// Targets for when the node has not yet been seen by the Machine controller
+	testMachineCreated1200s := testMachine.DeepCopy()
+	nowMinus1200s := metav1.NewTime(time.Now().Add(-1200 * time.Second))
+	testMachineCreated1200s.ObjectMeta.CreationTimestamp = nowMinus1200s
+
+	nodeNotYetStartedTarget1200s := healthCheckTarget{
+		Cluster: cluster,
+		MHC:     testMHC,
+		Machine: testMachineCreated1200s,
+		Node:    nil,
+	}
+
+	testMachineCreated400s := testMachine.DeepCopy()
 	nowMinus400s := metav1.NewTime(time.Now().Add(-400 * time.Second))
-	testMachineLastUpdated400s.Status.LastUpdated = &nowMinus400s
+	testMachineCreated400s.ObjectMeta.CreationTimestamp = nowMinus400s
 
-	nodeNotYetStartedTarget := healthCheckTarget{
+	nodeNotYetStartedTarget400s := healthCheckTarget{
 		Cluster: cluster,
 		MHC:     testMHC,
-		Machine: testMachineLastUpdated400s,
+		Machine: testMachineCreated400s,
 		Node:    nil,
 	}
 
@@ -292,18 +313,26 @@ func TestHealthCheckTargets(t *testing.T) {
 	}
 
 	testCases := []struct {
-		desc                     string
-		targets                  []healthCheckTarget
-		expectedHealthy          []healthCheckTarget
-		expectedNeedsRemediation []healthCheckTarget
-		expectedNextCheckTimes   []time.Duration
+		desc                        string
+		targets                     []healthCheckTarget
+		timeoutForMachineToHaveNode *time.Duration
+		expectedHealthy             []healthCheckTarget
+		expectedNeedsRemediation    []healthCheckTarget
+		expectedNextCheckTimes      []time.Duration
 	}{
 		{
-			desc:                     "when the node has not yet started",
-			targets:                  []healthCheckTarget{nodeNotYetStartedTarget},
+			desc:                     "when the node has not yet started for shorter than the timeout",
+			targets:                  []healthCheckTarget{nodeNotYetStartedTarget400s},
 			expectedHealthy:          []healthCheckTarget{},
 			expectedNeedsRemediation: []healthCheckTarget{},
-			expectedNextCheckTimes:   []time.Duration{timeoutForMachineToHaveNode},
+			expectedNextCheckTimes:   []time.Duration{timeoutForMachineToHaveNode - 400*time.Second},
+		},
+		{
+			desc:                     "when the node has not yet started for longer than the timeout",
+			targets:                  []healthCheckTarget{nodeNotYetStartedTarget1200s},
+			expectedHealthy:          []healthCheckTarget{},
+			expectedNeedsRemediation: []healthCheckTarget{nodeNotYetStartedTarget1200s},
+			expectedNextCheckTimes:   []time.Duration{},
 		},
 		{
 			desc:                     "when the node has gone away",
@@ -340,6 +369,14 @@ func TestHealthCheckTargets(t *testing.T) {
 			expectedNeedsRemediation: []healthCheckTarget{nodeUnknown400},
 			expectedNextCheckTimes:   []time.Duration{200 * time.Second, 100 * time.Second},
 		},
+		{
+			desc:                        "when the node has not started for a long time but the startup timeout is disabled",
+			targets:                     []healthCheckTarget{nodeNotYetStartedTarget400s},
+			timeoutForMachineToHaveNode: &disabledTimeoutForMachineToHaveNode,
+			expectedHealthy:             []healthCheckTarget{}, // The node is not healthy as it does not have a machine
+			expectedNeedsRemediation:    []healthCheckTarget{},
+			expectedNextCheckTimes:      []time.Duration{}, // We don't have a timeout so no way to know when to re-check
+		},
 	}
 
 	for _, tc := range testCases {
@@ -351,7 +388,13 @@ func TestHealthCheckTargets(t *testing.T) {
 				recorder: record.NewFakeRecorder(5),
 			}
 
-			healthy, unhealthy, nextCheckTimes := reconciler.healthCheckTargets(tc.targets, ctrl.LoggerFrom(ctx), timeoutForMachineToHaveNode)
+			// Allow individual test cases to override the timeoutForMachineToHaveNode.
+			timeout := metav1.Duration{Duration: timeoutForMachineToHaveNode}
+			if tc.timeoutForMachineToHaveNode != nil {
+				timeout.Duration = *tc.timeoutForMachineToHaveNode
+			}
+
+			healthy, unhealthy, nextCheckTimes := reconciler.healthCheckTargets(tc.targets, ctrl.LoggerFrom(ctx), timeout)
 
 			// Round durations down to nearest second account for minute differences
 			// in timing when running tests

diff --git a/docs/book/src/tasks/healthcheck.md b/docs/book/src/tasks/healthcheck.md
@@ -38,7 +38,13 @@ spec:
   # (Optional) maxUnhealthy prevents further remediation if the cluster is already partially unhealthy
   maxUnhealthy: 40%
   # (Optional) nodeStartupTimeout determines how long a MachineHealthCheck should wait for
-  # a Node to join the cluster, before considering a Machine unhealthy
+  # a Node to join the cluster, before considering a Machine unhealthy.
+  # Defaults to 10 minutes if not specified.
+  # Set to 0 to disable the node startup timeout.
+  # Disabling this timeout will prevent a Machine from being considered unhealthy when
+  # the Node it created has not yet registered with the cluster. This can be useful when
+  # Nodes take a long time to start up or when you only want condition based checks for
+  # Machine health.
   nodeStartupTimeout: 10m
   # selector is used to determine which Machines should be health checked
   selector: