Skip to content

Commit

Permalink
workloadctl: account for terminating pods
Browse files Browse the repository at this point in the history
Don't set Progressing=False if some pods from the previous generation
are still running.
  • Loading branch information
stlaz committed May 6, 2024
1 parent 52527b8 commit c238cda
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 0 deletions.
19 changes: 19 additions & 0 deletions pkg/operator/apiserver/controller/workload/workload.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,21 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o
desiredReplicas = *(workload.Spec.Replicas)
}

selector, err := metav1.LabelSelectorAsSelector(workload.Spec.Selector)
if err != nil {
return fmt.Errorf("failed to construct label selector: %v", err)
}
matchingPods, err := c.podsLister.List(selector)
if err != nil {
return err
}
// Terminatig pods don't account for any of the other status fields but
// still can exist in a state when they are accepting connections and would
// contribute to unexpected behavior if we report Progressing=False.
// The case of too many pods might occur for example if `TerminationGracePeriodSeconds`
// is set.
tooManyMatchingPods := int32(len(matchingPods)) > desiredReplicas

// If the workload is up to date, then we are no longer progressing
workloadAtHighestGeneration := workload.ObjectMeta.Generation == workload.Status.ObservedGeneration
workloadIsBeingUpdated := workload.Status.UpdatedReplicas < desiredReplicas
Expand All @@ -274,6 +289,10 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o
deploymentProgressingCondition.Status = operatorv1.ConditionTrue
deploymentProgressingCondition.Reason = "PodsUpdating"
deploymentProgressingCondition.Message = fmt.Sprintf("deployment/%s.%s: %d/%d pods have been updated to the latest generation", workload.Name, c.targetNamespace, workload.Status.UpdatedReplicas, desiredReplicas)
} else if tooManyMatchingPods {
deploymentProgressingCondition.Status = operatorv1.ConditionTrue
deploymentProgressingCondition.Reason = "PreviousGenPodsPresent"
deploymentProgressingCondition.Message = fmt.Sprintf("deployment/%s.%s: %d pod(s) from the previous generation are still present", workload.Name, c.targetNamespace, len(matchingPods)-int(desiredReplicas))
} else {
deploymentProgressingCondition.Status = operatorv1.ConditionFalse
deploymentProgressingCondition.Reason = "AsExpected"
Expand Down
102 changes: 102 additions & 0 deletions pkg/operator/apiserver/controller/workload/workload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,108 @@ func TestUpdateOperatorStatus(t *testing.T) {
return areCondidtionsEqual(expectedConditions, actualStatus.Conditions)
},
},
{
name: "all pods rolled out but there is an old terminating pod",
workload: &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "apiserver",
Namespace: "openshift-apiserver",
},
Spec: appsv1.DeploymentSpec{
Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}},
Template: corev1.PodTemplateSpec{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar"}}},
Replicas: ptr.To[int32](2),
},
Status: appsv1.DeploymentStatus{
AvailableReplicas: 2,
ReadyReplicas: 2,
UpdatedReplicas: 2,
},
},
pods: []*corev1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "apiserver-0", Namespace: "openshift-apiserver",
Labels: map[string]string{"foo": "bar"},
},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
ContainerStatuses: []corev1.ContainerStatus{
{
Name: "test",
Ready: true,
State: corev1.ContainerState{
Running: &corev1.ContainerStateRunning{},
},
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "apiserver-1", Namespace: "openshift-apiserver",
Labels: map[string]string{"foo": "bar"},
},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
ContainerStatuses: []corev1.ContainerStatus{
{
Name: "test",
Ready: true,
State: corev1.ContainerState{
Running: &corev1.ContainerStateRunning{},
},
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "apiserver-2", Namespace: "openshift-apiserver",
Labels: map[string]string{"foo": "bar"},
},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
ContainerStatuses: []corev1.ContainerStatus{
{
Name: "test",
Ready: true,
State: corev1.ContainerState{
Running: &corev1.ContainerStateRunning{},
},
},
},
},
},
},
validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error {
expectedConditions := []operatorv1.OperatorCondition{
{
Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable),
Status: operatorv1.ConditionTrue,
Reason: "AsExpected",
Message: "",
},
{
Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName),
Status: operatorv1.ConditionFalse,
},
{
Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName),
Status: operatorv1.ConditionFalse,
Reason: "AsExpected",
Message: "",
},
{
Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing),
Status: operatorv1.ConditionTrue,
Reason: "PreviousGenPodsPresent",
Message: "deployment/apiserver.openshift-apiserver: 1 pod(s) from the previous generation are still present",
},
}
return areCondidtionsEqual(expectedConditions, actualStatus.Conditions)
},
},
}

for _, scenario := range scenarios {
Expand Down

0 comments on commit c238cda

Please sign in to comment.