Skip to content

Commit

Permalink
apps: deployment config stuck in the new state should respect timeout…
Browse files Browse the repository at this point in the history
…Secods
  • Loading branch information
mfojtik committed Oct 25, 2017
1 parent 05d2e14 commit 81ab37e
Show file tree
Hide file tree
Showing 5 changed files with 182 additions and 0 deletions.
1 change: 1 addition & 0 deletions pkg/apps/apis/apps/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ const (
DeploymentCancelledNewerDeploymentExists = "newer deployment was found running"
DeploymentFailedUnrelatedDeploymentExists = "unrelated pod with the same name as this deployment is already running"
DeploymentFailedDeployerPodNoLongerExists = "deployer pod no longer exists"
DeploymentFailedUnableToCreateDeployerPod = "unable to create deployer pod"
)

// DeploymentStatus describes the possible states a deployment can be in.
Expand Down
15 changes: 15 additions & 0 deletions pkg/apps/controller/deployer/deployer_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,21 @@ func (c *DeploymentController) handle(deployment *v1.ReplicationController, will
}
break
}
// In case the deployment is stuck in "new" state because we fail to create
// deployer pod (quota, etc..) we should respect the timeoutSeconds in the
// config strategy and transition the rollout to failed instead of waiting for
// the deployment pod forever.
config, err := deployutil.DecodeDeploymentConfig(deployment, c.codec)
if err != nil {
return err
}
if deployutil.RolloutExceededTimeoutSeconds(config, deployment) {
nextStatus = deployapi.DeploymentStatusFailed
updatedAnnotations[deployapi.DeploymentStatusReasonAnnotation] = deployapi.DeploymentFailedUnableToCreateDeployerPod
c.emitDeploymentEvent(deployment, v1.EventTypeWarning, "RolloutTimeout", fmt.Sprintf("Rollout for %q failed to create deployer pod (timeoutSeconds: %ds)", deployutil.LabelForDeploymentV1(deployment), deployutil.GetTimeoutSecondsForStrategy(config)))
glog.V(4).Infof("Failing deployment %s/%s as we reached timeout while waiting for the deployer pod to be created", deployment.Namespace, deployment.Name)
break
}

switch {
case kerrors.IsNotFound(deployerErr):
Expand Down
16 changes: 16 additions & 0 deletions pkg/apps/controller/deployer/deployer_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ func TestHandle_createPodOk(t *testing.T) {
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.Spec.Template.Spec.NodeSelector = map[string]string{"labelKey1": "labelValue1", "labelKey2": "labelValue2"}
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, nil, nil, true, v1.PodUnknown)

Expand Down Expand Up @@ -227,6 +228,7 @@ func TestHandle_createPodFail(t *testing.T) {
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, nil, nil, true, v1.PodUnknown)

Expand Down Expand Up @@ -282,6 +284,7 @@ func TestHandle_deployerPodAlreadyExists(t *testing.T) {
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.CreationTimestamp = metav1.Now()
deployerPodName := deployutil.DeployerPodNameForDeployment(deployment.Name)

client := &fake.Clientset{}
Expand Down Expand Up @@ -321,6 +324,7 @@ func TestHandle_unrelatedPodAlreadyExists(t *testing.T) {

config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)

client := &fake.Clientset{}
Expand Down Expand Up @@ -362,6 +366,7 @@ func TestHandle_unrelatedPodAlreadyExistsTestScaled(t *testing.T) {
config := deploytest.TestDeploymentConfig(deploytest.OkDeploymentConfig(1))
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.CreationTimestamp = metav1.Now()
one := int32(1)
deployment.Spec.Replicas = &one

Expand Down Expand Up @@ -433,6 +438,7 @@ func TestHandle_noop(t *testing.T) {

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.deploymentPhase)
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, deployment, nil, true, test.podPhase)

Expand Down Expand Up @@ -476,6 +482,7 @@ func TestHandle_failedTest(t *testing.T) {
// Verify successful cleanup
config := deploytest.TestDeploymentConfig(deploytest.OkDeploymentConfig(1))
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
one := int32(1)
deployment.Spec.Replicas = &one
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusRunning)
Expand Down Expand Up @@ -519,6 +526,7 @@ func TestHandle_cleanupPodOk(t *testing.T) {
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusComplete)
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, deployment, hookPods, true, v1.PodSucceeded)
hookPods = append(hookPods, deployment.Name)
Expand Down Expand Up @@ -562,6 +570,7 @@ func TestHandle_cleanupPodOkTest(t *testing.T) {
// Verify successful cleanup
config := deploytest.TestDeploymentConfig(deploytest.OkDeploymentConfig(1))
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
one := int32(1)
deployment.Spec.Replicas = &one
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusRunning)
Expand Down Expand Up @@ -606,6 +615,7 @@ func TestHandle_cleanupPodNoop(t *testing.T) {
// Verify no-op
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusComplete)

controller := okDeploymentController(client, deployment, nil, true, v1.PodSucceeded)
Expand Down Expand Up @@ -637,6 +647,7 @@ func TestHandle_cleanupPodFail(t *testing.T) {
// Verify error
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusComplete)

controller := okDeploymentController(client, deployment, nil, true, v1.PodSucceeded)
Expand Down Expand Up @@ -667,6 +678,7 @@ func TestHandle_cancelNew(t *testing.T) {
})

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.Annotations[deployapi.DeploymentCancelledAnnotation] = deployapi.DeploymentCancelledAnnotationValue

Expand All @@ -688,6 +700,7 @@ func TestHandle_cleanupNewWithDeployers(t *testing.T) {
deletedDeployer := false

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.Annotations[deployapi.DeploymentCancelledAnnotation] = deployapi.DeploymentCancelledAnnotationValue

Expand Down Expand Up @@ -782,6 +795,7 @@ func TestHandle_cleanupPostNew(t *testing.T) {
})

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentCancelledAnnotation] = deployapi.DeploymentCancelledAnnotationValue
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.deploymentPhase)

Expand Down Expand Up @@ -845,6 +859,7 @@ func TestHandle_deployerPodDisappeared(t *testing.T) {
continue
}
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.phase)
deployment.CreationTimestamp = metav1.Now()
updatedDeployment = deployment

controller := okDeploymentController(client, nil, nil, true, v1.PodUnknown)
Expand Down Expand Up @@ -980,6 +995,7 @@ func TestHandle_transitionFromDeployer(t *testing.T) {

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.deploymentPhase)
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, deployment, nil, true, test.podPhase)

Expand Down
42 changes: 42 additions & 0 deletions pkg/apps/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,12 @@ func IsTerminatedDeployment(deployment runtime.Object) bool {
return IsCompleteDeployment(deployment) || IsFailedDeployment(deployment)
}

// IsNewDeployment returns true if the passed deployment is in new state.
func IsNewDeployment(deployment runtime.Object) bool {
current := DeploymentStatusFor(deployment)
return current == deployapi.DeploymentStatusNew
}

// IsCompleteDeployment returns true if the passed deployment is in state complete.
func IsCompleteDeployment(deployment runtime.Object) bool {
current := DeploymentStatusFor(deployment)
Expand Down Expand Up @@ -782,6 +788,42 @@ func DeploymentsForCleanup(configuration *deployapi.DeploymentConfig, deployment
return relevantDeployments
}

// GetTimeoutSecondsForStrategy returns the timeout in seconds defined in the
// deployment config strategy.
func GetTimeoutSecondsForStrategy(config *deployapi.DeploymentConfig) int64 {
var timeoutSeconds int64
switch config.Spec.Strategy.Type {
case deployapi.DeploymentStrategyTypeRolling:
timeoutSeconds = deployapi.DefaultRollingTimeoutSeconds
if t := config.Spec.Strategy.RollingParams.TimeoutSeconds; t != nil {
timeoutSeconds = *t
}
case deployapi.DeploymentStrategyTypeRecreate:
timeoutSeconds = deployapi.DefaultRecreateTimeoutSeconds
if t := config.Spec.Strategy.RecreateParams.TimeoutSeconds; t != nil {
timeoutSeconds = *t
}
case deployapi.DeploymentStrategyTypeCustom:
timeoutSeconds = deployapi.DefaultRecreateTimeoutSeconds
}
return timeoutSeconds
}

// RolloutExceededTimeoutSeconds returns true if the current deployment exceeded
// the timeoutSeconds defined for its strategy.
// Note that this is different than activeDeadlineSeconds which is the timeout
// set for the deployer pod. In some cases, the deployer pod cannot be created
// (like quota, etc...). In that case deployer controller use this function to
// measure if the created deployment (RC) exceeded the timeout.
func RolloutExceededTimeoutSeconds(config *deployapi.DeploymentConfig, latestRC *v1.ReplicationController) bool {
timeoutSeconds := GetTimeoutSecondsForStrategy(config)
// If user set the timeoutSeconds to 0, we assume there should be no timeout.
if timeoutSeconds <= 0 {
return false
}
return int64(time.Since(latestRC.CreationTimestamp.Time).Seconds()) > timeoutSeconds
}

// WaitForRunningDeployerPod waits a given period of time until the deployer pod
// for given replication controller is not running.
func WaitForRunningDeployerPod(podClient kcoreclient.PodsGetter, rc *api.ReplicationController, timeout time.Duration) error {
Expand Down
108 changes: 108 additions & 0 deletions pkg/apps/util/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -586,3 +586,111 @@ func TestRemoveCondition(t *testing.T) {
}
}
}

func TestRolloutExceededTimeoutSeconds(t *testing.T) {
now := time.Now()
tests := []struct {
name string
config *deployapi.DeploymentConfig
deploymentCreationTime time.Time
expectTimeout bool
}{
// Recreate strategy with deployment running for 20s (exceeding 10s timeout)
{
name: "recreate timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy.RecreateParams.TimeoutSeconds = &timeoutSeconds
return config
}(int64(10)),
deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: true,
},
// Recreate strategy with no timeout
{
name: "recreate no timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy.RecreateParams.TimeoutSeconds = &timeoutSeconds
return config
}(int64(0)),
deploymentCreationTime: now.Add(-700 * time.Second),
expectTimeout: false,
},

// Rolling strategy with deployment running for 20s (exceeding 10s timeout)
{
name: "rolling timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy = deploytest.OkRollingStrategy()
config.Spec.Strategy.RollingParams.TimeoutSeconds = &timeoutSeconds
return config
}(int64(10)),
deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: true,
},
// Rolling strategy with deployment with no timeout specified.
{
name: "rolling using default timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy = deploytest.OkRollingStrategy()
config.Spec.Strategy.RollingParams.TimeoutSeconds = nil
return config
}(0),
deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: false,
},
// Recreate strategy with deployment with no timeout specified.
{
name: "recreate using default timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy.RecreateParams.TimeoutSeconds = nil
return config
}(0),
deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: false,
},
// Custom strategy with deployment with no timeout specified.
{
name: "custom using default timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy = deploytest.OkCustomStrategy()
return config
}(0),
deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: false,
},
// Custom strategy use default timeout exceeding it.
{
name: "custom using default timeout timing out",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy = deploytest.OkCustomStrategy()
return config
}(0),
deploymentCreationTime: now.Add(-700 * time.Second),
expectTimeout: true,
},
}

for _, tc := range tests {
config := tc.config
deployment, err := MakeDeploymentV1(config, kapi.Codecs.LegacyCodec(deployv1.SchemeGroupVersion))
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
deployment.ObjectMeta.CreationTimestamp = metav1.Time{Time: tc.deploymentCreationTime}
gotTimeout := RolloutExceededTimeoutSeconds(config, deployment)
if tc.expectTimeout && !gotTimeout {
t.Errorf("[%s]: expected timeout, but got no timeout", tc.name)
}
if !tc.expectTimeout && gotTimeout {
t.Errorf("[%s]: expected no timeout, but got timeout", tc.name)
}

}
}

0 comments on commit 81ab37e

Please sign in to comment.