Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

apps: deployment config stuck in the new state should respect timeoutSeconds #17000

Merged
merged 1 commit into from
Oct 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/apps/apis/apps/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ const (
DeploymentCancelledNewerDeploymentExists = "newer deployment was found running"
DeploymentFailedUnrelatedDeploymentExists = "unrelated pod with the same name as this deployment is already running"
DeploymentFailedDeployerPodNoLongerExists = "deployer pod no longer exists"
DeploymentFailedUnableToCreateDeployerPod = "unable to create deployer pod"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks nice in oc status:

dc/test deploys docker.io/library/centos:7
  deployment #4 failed 44 minutes ago: unable to create deployer pod
  deployment #3 deployed 2 hours ago - 1 pod
  deployment #2 deployed 2 hours ago

)

// DeploymentStatus describes the possible states a deployment can be in.
Expand Down
15 changes: 15 additions & 0 deletions pkg/apps/controller/deployer/deployer_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,21 @@ func (c *DeploymentController) handle(deployment *v1.ReplicationController, will
}
break
}
// In case the deployment is stuck in "new" state because we fail to create
// deployer pod (quota, etc..) we should respect the timeoutSeconds in the
// config strategy and transition the rollout to failed instead of waiting for
// the deployment pod forever.
config, err := deployutil.DecodeDeploymentConfig(deployment, c.codec)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

long term, we want to get rid of this... we should copy the timeoutSecond into annotation so we don't have to decode here... this also apply to other fields we check i guess.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if err != nil {
return err
}
if deployutil.RolloutExceededTimeoutSeconds(config, deployment) {
nextStatus = deployapi.DeploymentStatusFailed
updatedAnnotations[deployapi.DeploymentStatusReasonAnnotation] = deployapi.DeploymentFailedUnableToCreateDeployerPod
c.emitDeploymentEvent(deployment, v1.EventTypeWarning, "RolloutTimeout", fmt.Sprintf("Rollout for %q failed to create deployer pod (timeoutSeconds: %ds)", deployutil.LabelForDeploymentV1(deployment), deployutil.GetTimeoutSecondsForStrategy(config)))
glog.V(4).Infof("Failing deployment %s/%s as we reached timeout while waiting for the deployer pod to be created", deployment.Namespace, deployment.Name)
break
}

switch {
case kerrors.IsNotFound(deployerErr):
Expand Down
16 changes: 16 additions & 0 deletions pkg/apps/controller/deployer/deployer_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ func TestHandle_createPodOk(t *testing.T) {
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.Spec.Template.Spec.NodeSelector = map[string]string{"labelKey1": "labelValue1", "labelKey2": "labelValue2"}
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, nil, nil, true, v1.PodUnknown)

Expand Down Expand Up @@ -227,6 +228,7 @@ func TestHandle_createPodFail(t *testing.T) {
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, nil, nil, true, v1.PodUnknown)

Expand Down Expand Up @@ -282,6 +284,7 @@ func TestHandle_deployerPodAlreadyExists(t *testing.T) {
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.CreationTimestamp = metav1.Now()
deployerPodName := deployutil.DeployerPodNameForDeployment(deployment.Name)

client := &fake.Clientset{}
Expand Down Expand Up @@ -321,6 +324,7 @@ func TestHandle_unrelatedPodAlreadyExists(t *testing.T) {

config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)

client := &fake.Clientset{}
Expand Down Expand Up @@ -362,6 +366,7 @@ func TestHandle_unrelatedPodAlreadyExistsTestScaled(t *testing.T) {
config := deploytest.TestDeploymentConfig(deploytest.OkDeploymentConfig(1))
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.CreationTimestamp = metav1.Now()
one := int32(1)
deployment.Spec.Replicas = &one

Expand Down Expand Up @@ -433,6 +438,7 @@ func TestHandle_noop(t *testing.T) {

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.deploymentPhase)
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, deployment, nil, true, test.podPhase)

Expand Down Expand Up @@ -476,6 +482,7 @@ func TestHandle_failedTest(t *testing.T) {
// Verify successful cleanup
config := deploytest.TestDeploymentConfig(deploytest.OkDeploymentConfig(1))
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
one := int32(1)
deployment.Spec.Replicas = &one
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusRunning)
Expand Down Expand Up @@ -519,6 +526,7 @@ func TestHandle_cleanupPodOk(t *testing.T) {
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusComplete)
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, deployment, hookPods, true, v1.PodSucceeded)
hookPods = append(hookPods, deployment.Name)
Expand Down Expand Up @@ -562,6 +570,7 @@ func TestHandle_cleanupPodOkTest(t *testing.T) {
// Verify successful cleanup
config := deploytest.TestDeploymentConfig(deploytest.OkDeploymentConfig(1))
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
one := int32(1)
deployment.Spec.Replicas = &one
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusRunning)
Expand Down Expand Up @@ -606,6 +615,7 @@ func TestHandle_cleanupPodNoop(t *testing.T) {
// Verify no-op
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusComplete)

controller := okDeploymentController(client, deployment, nil, true, v1.PodSucceeded)
Expand Down Expand Up @@ -637,6 +647,7 @@ func TestHandle_cleanupPodFail(t *testing.T) {
// Verify error
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusComplete)

controller := okDeploymentController(client, deployment, nil, true, v1.PodSucceeded)
Expand Down Expand Up @@ -667,6 +678,7 @@ func TestHandle_cancelNew(t *testing.T) {
})

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.Annotations[deployapi.DeploymentCancelledAnnotation] = deployapi.DeploymentCancelledAnnotationValue

Expand All @@ -688,6 +700,7 @@ func TestHandle_cleanupNewWithDeployers(t *testing.T) {
deletedDeployer := false

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.Annotations[deployapi.DeploymentCancelledAnnotation] = deployapi.DeploymentCancelledAnnotationValue

Expand Down Expand Up @@ -782,6 +795,7 @@ func TestHandle_cleanupPostNew(t *testing.T) {
})

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentCancelledAnnotation] = deployapi.DeploymentCancelledAnnotationValue
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.deploymentPhase)

Expand Down Expand Up @@ -845,6 +859,7 @@ func TestHandle_deployerPodDisappeared(t *testing.T) {
continue
}
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.phase)
deployment.CreationTimestamp = metav1.Now()
updatedDeployment = deployment

controller := okDeploymentController(client, nil, nil, true, v1.PodUnknown)
Expand Down Expand Up @@ -980,6 +995,7 @@ func TestHandle_transitionFromDeployer(t *testing.T) {

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.deploymentPhase)
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, deployment, nil, true, test.podPhase)

Expand Down
42 changes: 42 additions & 0 deletions pkg/apps/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,12 @@ func IsTerminatedDeployment(deployment runtime.Object) bool {
return IsCompleteDeployment(deployment) || IsFailedDeployment(deployment)
}

// IsNewDeployment returns true if the passed deployment is in new state.
func IsNewDeployment(deployment runtime.Object) bool {
current := DeploymentStatusFor(deployment)
return current == deployapi.DeploymentStatusNew
}

// IsCompleteDeployment returns true if the passed deployment is in state complete.
func IsCompleteDeployment(deployment runtime.Object) bool {
current := DeploymentStatusFor(deployment)
Expand Down Expand Up @@ -782,6 +788,42 @@ func DeploymentsForCleanup(configuration *deployapi.DeploymentConfig, deployment
return relevantDeployments
}

// GetTimeoutSecondsForStrategy returns the timeout in seconds defined in the
// deployment config strategy.
func GetTimeoutSecondsForStrategy(config *deployapi.DeploymentConfig) int64 {
var timeoutSeconds int64
switch config.Spec.Strategy.Type {
case deployapi.DeploymentStrategyTypeRolling:
timeoutSeconds = deployapi.DefaultRollingTimeoutSeconds
if t := config.Spec.Strategy.RollingParams.TimeoutSeconds; t != nil {
timeoutSeconds = *t
}
case deployapi.DeploymentStrategyTypeRecreate:
timeoutSeconds = deployapi.DefaultRecreateTimeoutSeconds
if t := config.Spec.Strategy.RecreateParams.TimeoutSeconds; t != nil {
timeoutSeconds = *t
}
case deployapi.DeploymentStrategyTypeCustom:
timeoutSeconds = deployapi.DefaultRecreateTimeoutSeconds
}
return timeoutSeconds
}

// RolloutExceededTimeoutSeconds returns true if the current deployment exceeded
// the timeoutSeconds defined for its strategy.
// Note that this is different than activeDeadlineSeconds which is the timeout
// set for the deployer pod. In some cases, the deployer pod cannot be created
// (like quota, etc...). In that case deployer controller use this function to
// measure if the created deployment (RC) exceeded the timeout.
func RolloutExceededTimeoutSeconds(config *deployapi.DeploymentConfig, latestRC *v1.ReplicationController) bool {
timeoutSeconds := GetTimeoutSecondsForStrategy(config)
// If user set the timeoutSeconds to 0, we assume there should be no timeout.
if timeoutSeconds <= 0 {
return false
}
return int64(time.Since(latestRC.CreationTimestamp.Time).Seconds()) > timeoutSeconds
}

// WaitForRunningDeployerPod waits a given period of time until the deployer pod
// for given replication controller is not running.
func WaitForRunningDeployerPod(podClient kcoreclient.PodsGetter, rc *api.ReplicationController, timeout time.Duration) error {
Expand Down
108 changes: 108 additions & 0 deletions pkg/apps/util/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -586,3 +586,111 @@ func TestRemoveCondition(t *testing.T) {
}
}
}

func TestRolloutExceededTimeoutSeconds(t *testing.T) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to see a test table containing setting different strategies and not specified timeouts mixed with creationTimestamps values

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

now := time.Now()
tests := []struct {
name string
config *deployapi.DeploymentConfig
deploymentCreationTime time.Time
expectTimeout bool
}{
// Recreate strategy with deployment running for 20s (exceeding 10s timeout)
{
name: "recreate timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy.RecreateParams.TimeoutSeconds = &timeoutSeconds
return config
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fill in timeout and creationTime

}(int64(10)),
deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: true,
},
// Recreate strategy with no timeout
{
name: "recreate no timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy.RecreateParams.TimeoutSeconds = &timeoutSeconds
return config
}(int64(0)),
deploymentCreationTime: now.Add(-700 * time.Second),
expectTimeout: false,
},

// Rolling strategy with deployment running for 20s (exceeding 10s timeout)
{
name: "rolling timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy = deploytest.OkRollingStrategy()
config.Spec.Strategy.RollingParams.TimeoutSeconds = &timeoutSeconds
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would assign int64ptr(10) directly here without the whole taking is as argument thing

return config
}(int64(10)),
deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: true,
},
// Rolling strategy with deployment with no timeout specified.
{
name: "rolling using default timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy = deploytest.OkRollingStrategy()
config.Spec.Strategy.RollingParams.TimeoutSeconds = nil
return config
}(0),
deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: false,
},
// Recreate strategy with deployment with no timeout specified.
{
name: "recreate using default timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy.RecreateParams.TimeoutSeconds = nil
return config
}(0),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't take it as you have to fix it because I told you, you don't have to but check how the argument is pointless here :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤷‍♂️

deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: false,
},
// Custom strategy with deployment with no timeout specified.
{
name: "custom using default timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy = deploytest.OkCustomStrategy()
return config
}(0),
deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: false,
},
// Custom strategy use default timeout exceeding it.
{
name: "custom using default timeout timing out",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy = deploytest.OkCustomStrategy()
return config
}(0),
deploymentCreationTime: now.Add(-700 * time.Second),
expectTimeout: true,
},
}

for _, tc := range tests {
config := tc.config
deployment, err := MakeDeploymentV1(config, kapi.Codecs.LegacyCodec(deployv1.SchemeGroupVersion))
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
deployment.ObjectMeta.CreationTimestamp = metav1.Time{Time: tc.deploymentCreationTime}
gotTimeout := RolloutExceededTimeoutSeconds(config, deployment)
if tc.expectTimeout && !gotTimeout {
t.Errorf("[%s]: expected timeout, but got no timeout", tc.name)
}
if !tc.expectTimeout && gotTimeout {
t.Errorf("[%s]: expected no timeout, but got timeout", tc.name)
}

}
}