Skip to content

Commit

Permalink
fix(timeouts): some tasks inherit stage timeout override
Browse files Browse the repository at this point in the history
  • Loading branch information
emjburns committed Sep 22, 2017
1 parent d7b8150 commit 5567824
Show file tree
Hide file tree
Showing 15 changed files with 94 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package com.netflix.spinnaker.orca.bakery.tasks

import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.RetryableTask
import com.netflix.spinnaker.orca.OverridableTimeoutRetryableTask
import com.netflix.spinnaker.orca.TaskResult
import com.netflix.spinnaker.orca.bakery.api.BakeStatus
import com.netflix.spinnaker.orca.bakery.api.BakeryService
Expand All @@ -31,7 +31,7 @@ import retrofit.RetrofitError
@Slf4j
@Component
@CompileStatic
class MonitorBakeTask implements RetryableTask {
class MonitorBakeTask implements OverridableTimeoutRetryableTask {

long backoffPeriod = 30000
long timeout = 3600000 // 1hr
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package com.netflix.spinnaker.orca.clouddriver.tasks.cluster

import com.netflix.frigga.Names
import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.RetryableTask
import com.netflix.spinnaker.orca.OverridableTimeoutRetryableTask
import com.netflix.spinnaker.orca.TaskResult
import com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.support.TargetServerGroup
import com.netflix.spinnaker.orca.clouddriver.tasks.AbstractCloudProviderAwareTask
Expand All @@ -30,7 +30,7 @@ import groovy.util.logging.Slf4j
import org.springframework.beans.factory.annotation.Autowired

@Slf4j
abstract class AbstractWaitForClusterWideClouddriverTask extends AbstractCloudProviderAwareTask implements RetryableTask {
abstract class AbstractWaitForClusterWideClouddriverTask extends AbstractCloudProviderAwareTask implements OverridableTimeoutRetryableTask {
@Override
public long getBackoffPeriod() { 10000 }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.util.concurrent.TimeUnit
import com.fasterxml.jackson.databind.ObjectMapper
import com.netflix.frigga.Names
import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.RetryableTask
import com.netflix.spinnaker.orca.OverridableTimeoutRetryableTask
import com.netflix.spinnaker.orca.TaskResult
import com.netflix.spinnaker.orca.clouddriver.OortService
import com.netflix.spinnaker.orca.clouddriver.tasks.AbstractCloudProviderAwareTask
Expand All @@ -34,7 +34,7 @@ import org.springframework.beans.factory.annotation.Autowired
import retrofit.RetrofitError

@Slf4j
abstract class AbstractInstancesCheckTask extends AbstractCloudProviderAwareTask implements RetryableTask {
abstract class AbstractInstancesCheckTask extends AbstractCloudProviderAwareTask implements OverridableTimeoutRetryableTask {
long backoffPeriod = TimeUnit.SECONDS.toMillis(10)
long timeout = TimeUnit.HOURS.toMillis(2)
long serverGroupWaitTime = TimeUnit.MINUTES.toMillis(10)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ package com.netflix.spinnaker.orca.clouddriver.tasks.instance

import com.fasterxml.jackson.databind.ObjectMapper
import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.RetryableTask
import com.netflix.spinnaker.orca.OverridableTimeoutRetryableTask
import com.netflix.spinnaker.orca.TaskResult
import com.netflix.spinnaker.orca.clouddriver.OortService
import com.netflix.spinnaker.orca.pipeline.model.Stage
import org.springframework.beans.factory.annotation.Autowired

abstract class AbstractWaitForInstanceHealthChangeTask implements RetryableTask {
abstract class AbstractWaitForInstanceHealthChangeTask implements OverridableTimeoutRetryableTask {
long backoffPeriod = 5000
long timeout = 3600000

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package com.netflix.spinnaker.orca.clouddriver.tasks.instance

import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.RetryableTask
import com.netflix.spinnaker.orca.OverridableTimeoutRetryableTask
import com.netflix.spinnaker.orca.TaskResult
import com.netflix.spinnaker.orca.clouddriver.pipeline.instance.TerminatingInstance
import com.netflix.spinnaker.orca.clouddriver.pipeline.instance.TerminatingInstanceSupport
Expand All @@ -29,7 +29,7 @@ import org.springframework.stereotype.Component

@Slf4j
@Component
class WaitForTerminatedInstancesTask extends AbstractCloudProviderAwareTask implements RetryableTask {
class WaitForTerminatedInstancesTask extends AbstractCloudProviderAwareTask implements OverridableTimeoutRetryableTask {

long backoffPeriod = 10000
long timeout = 3600000
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright 2017 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.netflix.spinnaker.orca;

/**
* A retryable task whose timeout is taken from the top level stage
* if that value has been overridden.
*
* These are typically wait/monitor stages
*/
public interface OverridableTimeoutRetryableTask extends RetryableTask {

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package com.netflix.spinnaker.orca.front50.tasks

import java.util.concurrent.TimeUnit
import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.RetryableTask
import com.netflix.spinnaker.orca.OverridableTimeoutRetryableTask
import com.netflix.spinnaker.orca.TaskResult
import com.netflix.spinnaker.orca.pipeline.model.Execution
import com.netflix.spinnaker.orca.pipeline.model.Stage
Expand All @@ -29,7 +29,7 @@ import org.springframework.stereotype.Component

@Slf4j
@Component
class MonitorPipelineTask implements RetryableTask {
class MonitorPipelineTask implements OverridableTimeoutRetryableTask {

@Autowired
ExecutionRepository executionRepository
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package com.netflix.spinnaker.orca.igor.tasks

import java.util.concurrent.TimeUnit
import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.RetryableTask
import com.netflix.spinnaker.orca.OverridableTimeoutRetryableTask
import com.netflix.spinnaker.orca.TaskResult
import com.netflix.spinnaker.orca.igor.BuildArtifactFilter
import com.netflix.spinnaker.orca.igor.BuildService
Expand All @@ -30,7 +30,7 @@ import retrofit.RetrofitError

@Slf4j
@Component
class MonitorJenkinsJobTask implements RetryableTask {
class MonitorJenkinsJobTask implements OverridableTimeoutRetryableTask {

long backoffPeriod = 10000
long timeout = TimeUnit.HOURS.toMillis(2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package com.netflix.spinnaker.orca.igor.tasks

import java.util.concurrent.TimeUnit
import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.RetryableTask
import com.netflix.spinnaker.orca.OverridableTimeoutRetryableTask
import com.netflix.spinnaker.orca.TaskResult
import com.netflix.spinnaker.orca.igor.BuildService
import com.netflix.spinnaker.orca.pipeline.model.Stage
Expand All @@ -29,7 +29,7 @@ import retrofit.RetrofitError

@Slf4j
@Component
class MonitorQueuedJenkinsJobTask implements RetryableTask {
class MonitorQueuedJenkinsJobTask implements OverridableTimeoutRetryableTask {

long backoffPeriod = 10000
long timeout = TimeUnit.HOURS.toMillis(2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package com.netflix.spinnaker.orca.kayenta.tasks

import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.RetryableTask
import com.netflix.spinnaker.orca.OverridableTimeoutRetryableTask
import com.netflix.spinnaker.orca.TaskResult
import com.netflix.spinnaker.orca.kayenta.KayentaService
import com.netflix.spinnaker.orca.pipeline.model.Pipeline
Expand All @@ -32,7 +32,7 @@ import java.util.concurrent.TimeUnit

@Slf4j
@Component
class MonitorCanaryTask implements RetryableTask {
class MonitorCanaryTask implements OverridableTimeoutRetryableTask {

long backoffPeriod = 1000
long timeout = TimeUnit.HOURS.toMillis(12)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package com.netflix.spinnaker.orca.mine.tasks

import java.util.concurrent.TimeUnit
import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.RetryableTask
import com.netflix.spinnaker.orca.OverridableTimeoutRetryableTask
import com.netflix.spinnaker.orca.TaskResult
import com.netflix.spinnaker.orca.clouddriver.tasks.AbstractCloudProviderAwareTask
import com.netflix.spinnaker.orca.mine.MineService
Expand All @@ -30,7 +30,7 @@ import retrofit.RetrofitError

@Component
@Slf4j
class MonitorAcaTaskTask extends AbstractCloudProviderAwareTask implements RetryableTask {
class MonitorAcaTaskTask extends AbstractCloudProviderAwareTask implements OverridableTimeoutRetryableTask {
long backoffPeriod = 10000
long timeout = TimeUnit.DAYS.toMillis(2)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package com.netflix.spinnaker.orca.mine.tasks

import java.util.concurrent.TimeUnit
import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.RetryableTask
import com.netflix.spinnaker.orca.OverridableTimeoutRetryableTask
import com.netflix.spinnaker.orca.TaskResult
import com.netflix.spinnaker.orca.clouddriver.KatoService
import com.netflix.spinnaker.orca.clouddriver.tasks.AbstractCloudProviderAwareTask
Expand All @@ -31,7 +31,7 @@ import retrofit.RetrofitError

@Component
@Slf4j
class MonitorCanaryTask extends AbstractCloudProviderAwareTask implements RetryableTask {
class MonitorCanaryTask extends AbstractCloudProviderAwareTask implements OverridableTimeoutRetryableTask {
long backoffPeriod = TimeUnit.MINUTES.toMillis(1)
long timeout = TimeUnit.DAYS.toMillis(2)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,8 @@ package com.netflix.spinnaker.orca.q.handler

import com.netflix.spectator.api.Registry
import com.netflix.spectator.api.histogram.BucketCounter
import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.*
import com.netflix.spinnaker.orca.ExecutionStatus.*
import com.netflix.spinnaker.orca.RetryableTask
import com.netflix.spinnaker.orca.Task
import com.netflix.spinnaker.orca.TaskResult
import com.netflix.spinnaker.orca.exceptions.ExceptionHandler
import com.netflix.spinnaker.orca.exceptions.TimeoutException
import com.netflix.spinnaker.orca.pipeline.model.Execution
Expand Down Expand Up @@ -179,7 +176,13 @@ class RunTaskHandler(
val pausedDuration = stage.getExecution().pausedDurationRelativeTo(startTime)
val elapsedTime = Duration.between(startTime, clock.instant())
val throttleTime = message.getAttribute<TotalThrottleTimeAttribute>()?.totalThrottleTimeMs ?: 0
if (elapsedTime.minus(pausedDuration).minusMillis(throttleTime) > timeout.toDuration()) {
val actualTimeout = (
if (this is OverridableTimeoutRetryableTask && stage.getTopLevelTimeout().isPresent)
stage.getTopLevelTimeout().get().toDuration()
else
timeout.toDuration()
)
if (elapsedTime.minus(pausedDuration).minusMillis(throttleTime) > actualTimeout) {
val durationString = formatTimeout(elapsedTime.toMillis())
val msg = StringBuilder("${javaClass.simpleName} of stage ${stage.getName()} timed out after $durationString. ")
msg.append("pausedDuration: ${formatTimeout(pausedDuration.toMillis())}, ")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@

package com.netflix.spinnaker.orca.q

import com.netflix.spinnaker.orca.OverridableTimeoutRetryableTask
import com.netflix.spinnaker.orca.RetryableTask
import com.netflix.spinnaker.orca.Task

interface DummyTask : RetryableTask
interface InvalidTask : Task
interface DummyTimeoutOverrideTask : OverridableTimeoutRetryableTask
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ object RunTaskHandlerTest : SubjectSpek<RunTaskHandler>({
val repository: ExecutionRepository = mock()
val stageNavigator: StageNavigator = mock()
val task: DummyTask = mock()
val timeoutOverrideTask: DummyTimeoutOverrideTask = mock()
val exceptionHandler: ExceptionHandler = mock()
val clock = fixedClock()
val contextParameterProcessor = ContextParameterProcessor()
Expand All @@ -59,14 +60,14 @@ object RunTaskHandlerTest : SubjectSpek<RunTaskHandler>({
repository,
stageNavigator,
contextParameterProcessor,
listOf(task),
listOf(task, timeoutOverrideTask),
clock,
listOf(exceptionHandler),
NoopRegistry()
)
}

fun resetMocks() = reset(queue, repository, task, exceptionHandler)
fun resetMocks() = reset(queue, repository, task, timeoutOverrideTask, exceptionHandler)

describe("running a task") {

Expand Down Expand Up @@ -743,7 +744,7 @@ object RunTaskHandlerTest : SubjectSpek<RunTaskHandler>({

timeoutOverride.toMillis().let { listOf(it.toInt(), it, it.toDouble()) }.forEach { stageTimeoutMs ->
and("the override is a ${stageTimeoutMs.javaClass.simpleName}") {
and("the task is between the default and overridden duration") {
and("the stage is between the default and overridden duration") {
val pipeline = pipeline {
stage {
type = "whatever"
Expand Down Expand Up @@ -925,6 +926,38 @@ object RunTaskHandlerTest : SubjectSpek<RunTaskHandler>({
}
}
}

and("the task is an overridabletimeout task that shouldn't time out") {
val pipeline = pipeline {
stage {
type = "whatever"
context["stageTimeoutMs"] = stageTimeoutMs
startTime = clock.instant().minusMillis(timeout.toMillis() + 1).toEpochMilli() //started 5.1 minutes ago
task {
id = "1"
implementingClass = DummyTimeoutOverrideTask::class.jvmName
status = RUNNING
startTime = clock.instant().minusMillis(timeout.toMillis() + 1).toEpochMilli() //started 5.1 minutes ago
}
}
}
val message = RunTask(Pipeline::class.java, pipeline.id, "foo", pipeline.stages.first().id, "1", DummyTimeoutOverrideTask::class.java)

beforeGroup {
whenever(repository.retrievePipeline(message.executionId)) doReturn pipeline
whenever(timeoutOverrideTask.timeout) doReturn timeout.toMillis()
}

afterGroup(::resetMocks)

on("receiving $message") {
subject.handle(message)
}

it("executes the task") {
verify(timeoutOverrideTask).execute(any())
}
}
}
}
}
Expand Down

0 comments on commit 5567824

Please sign in to comment.