openshift · stlaz · May 6, 2024 · atiratree · May 6, 2024 · stlaz
diff --git a/pkg/operator/apiserver/controller/workload/workload.go b/pkg/operator/apiserver/controller/workload/workload.go
@@ -262,6 +262,21 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o
 		desiredReplicas = *(workload.Spec.Replicas)
 	}
 
+	selector, err := metav1.LabelSelectorAsSelector(workload.Spec.Selector)
+	if err != nil {
+		return fmt.Errorf("failed to construct label selector: %v", err)
+	}
+	matchingPods, err := c.podsLister.List(selector)
+	if err != nil {
+		return err
+	}
+	// Terminatig pods don't account for any of the other status fields but
+	// still can exist in a state when they are accepting connections and would
+	// contribute to unexpected behavior if we report Progressing=False.
+	// The case of too many pods might occur for example if `TerminationGracePeriodSeconds`
+	// is set.
+	tooManyMatchingPods := int32(len(matchingPods)) > desiredReplicas
+
 	// If the workload is up to date, then we are no longer progressing
 	workloadAtHighestGeneration := workload.ObjectMeta.Generation == workload.Status.ObservedGeneration
 	workloadIsBeingUpdated := workload.Status.UpdatedReplicas < desiredReplicas
@@ -274,6 +289,10 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o
 		deploymentProgressingCondition.Status = operatorv1.ConditionTrue
 		deploymentProgressingCondition.Reason = "PodsUpdating"
 		deploymentProgressingCondition.Message = fmt.Sprintf("deployment/%s.%s: %d/%d pods have been updated to the latest generation", workload.Name, c.targetNamespace, workload.Status.UpdatedReplicas, desiredReplicas)
+	} else if tooManyMatchingPods {
+		deploymentProgressingCondition.Status = operatorv1.ConditionTrue
+		deploymentProgressingCondition.Reason = "PreviousGenPodsPresent"
+		deploymentProgressingCondition.Message = fmt.Sprintf("deployment/%s.%s: %d pod(s) from the previous generation are still present", workload.Name, c.targetNamespace, len(matchingPods)-int(desiredReplicas))
 	} else {
 		deploymentProgressingCondition.Status = operatorv1.ConditionFalse
 		deploymentProgressingCondition.Reason = "AsExpected"

diff --git a/pkg/operator/apiserver/controller/workload/workload_test.go b/pkg/operator/apiserver/controller/workload/workload_test.go
@@ -543,6 +543,108 @@ func TestUpdateOperatorStatus(t *testing.T) {
 				return areCondidtionsEqual(expectedConditions, actualStatus.Conditions)
 			},
 		},
+		{
+			name: "all pods rolled out but there is an old terminating pod",
+			workload: &appsv1.Deployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "apiserver",
+					Namespace: "openshift-apiserver",
+				},
+				Spec: appsv1.DeploymentSpec{
+					Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}},
+					Template: corev1.PodTemplateSpec{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar"}}},
+					Replicas: ptr.To[int32](2),
+				},
+				Status: appsv1.DeploymentStatus{
+					AvailableReplicas: 2,
+					ReadyReplicas:     2,
+					UpdatedReplicas:   2,
+				},
+			},
+			pods: []*corev1.Pod{
+				{
+					ObjectMeta: metav1.ObjectMeta{
+						Name: "apiserver-0", Namespace: "openshift-apiserver",
+						Labels: map[string]string{"foo": "bar"},
+					},
+					Status: corev1.PodStatus{
+						Phase: corev1.PodRunning,
+						ContainerStatuses: []corev1.ContainerStatus{
+							{
+								Name:  "test",
+								Ready: true,
+								State: corev1.ContainerState{
+									Running: &corev1.ContainerStateRunning{},
+								},
+							},
+						},
+					},
+				},
+				{
+					ObjectMeta: metav1.ObjectMeta{
+						Name: "apiserver-1", Namespace: "openshift-apiserver",
+						Labels: map[string]string{"foo": "bar"},
+					},
+					Status: corev1.PodStatus{
+						Phase: corev1.PodRunning,
+						ContainerStatuses: []corev1.ContainerStatus{
+							{
+								Name:  "test",
+								Ready: true,
+								State: corev1.ContainerState{
+									Running: &corev1.ContainerStateRunning{},
+								},
+							},
+						},
+					},
+				},
+				{
+					ObjectMeta: metav1.ObjectMeta{
+						Name: "apiserver-2", Namespace: "openshift-apiserver",
+						Labels: map[string]string{"foo": "bar"},
+					},
+					Status: corev1.PodStatus{
+						Phase: corev1.PodRunning,
+						ContainerStatuses: []corev1.ContainerStatus{
+							{
+								Name:  "test",
+								Ready: true,
+								State: corev1.ContainerState{
+									Running: &corev1.ContainerStateRunning{},
+								},
+							},
+						},
+					},
+				},
+			},
+			validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error {
+				expectedConditions := []operatorv1.OperatorCondition{
+					{
+						Type:    fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable),
+						Status:  operatorv1.ConditionTrue,
+						Reason:  "AsExpected",
+						Message: "",
+					},
+					{
+						Type:   fmt.Sprintf("%sWorkloadDegraded", defaultControllerName),
+						Status: operatorv1.ConditionFalse,
+					},
+					{
+						Type:    fmt.Sprintf("%sDeploymentDegraded", defaultControllerName),
+						Status:  operatorv1.ConditionFalse,
+						Reason:  "AsExpected",
+						Message: "",
+					},
+					{
+						Type:    fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing),
+						Status:  operatorv1.ConditionTrue,
+						Reason:  "PreviousGenPodsPresent",
+						Message: "deployment/apiserver.openshift-apiserver: 1 pod(s) from the previous generation are still present",
+					},
+				}
+				return areCondidtionsEqual(expectedConditions, actualStatus.Conditions)
+			},
+		},
 	}
 
 	for _, scenario := range scenarios {