diff --git a/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/instance/WaitForUpInstancesTask.groovy b/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/instance/WaitForUpInstancesTask.groovy index 82ca958c08..b688b07cf2 100644 --- a/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/instance/WaitForUpInstancesTask.groovy +++ b/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/instance/WaitForUpInstancesTask.groovy @@ -20,8 +20,11 @@ import com.netflix.spinnaker.orca.clouddriver.utils.HealthHelper import com.netflix.spinnaker.orca.clouddriver.utils.HealthHelper.HealthCountSnapshot import com.netflix.spinnaker.orca.pipeline.model.Stage import groovy.util.logging.Slf4j +import org.slf4j.MDC import org.springframework.stereotype.Component +import java.util.concurrent.TimeUnit + @Component @Slf4j class WaitForUpInstancesTask extends AbstractWaitingForInstancesTask { @@ -182,6 +185,20 @@ class WaitForUpInstancesTask extends AbstractWaitingForInstancesTask { def cloudProvider = stage.context.cloudProvider + Optional taskStartTime = Optional.ofNullable(MDC.get("taskStartTime")); + if (taskStartTime.isPresent()) { + if (System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(10) > Long.valueOf(taskStartTime.get())) { + // expectation is reconciliation has happened within 10 minutes and that the + // current server group capacity should be preferred + log.error( + "Short circuiting initial target capacity determination after 10 minutes (serverGroup: {}, executionId: {})", + "${cloudProvider}:${serverGroup.region}:${serverGroup.name}", + stage.execution.id + ) + return serverGroupCapacity + } + } + def initialTargetCapacity = getInitialTargetCapacity(stage, serverGroup) if (!initialTargetCapacity) { log.debug( @@ -192,7 +209,8 @@ class WaitForUpInstancesTask extends AbstractWaitingForInstancesTask { return serverGroupCapacity } - if (serverGroup.capacity.max == 0 && initialTargetCapacity.max != 0) { + if ((serverGroup.capacity.max == 0 && initialTargetCapacity.max != 0) || + (serverGroup.capacity.desired == 0 && initialTargetCapacity.desired > 0)) { log.info( "Overriding server group capacity (serverGroup: {}, initialTargetCapacity: {}, executionId: {})", "${cloudProvider}:${serverGroup.region}:${serverGroup.name}", diff --git a/orca-clouddriver/src/test/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/instance/WaitForUpInstancesTaskSpec.groovy b/orca-clouddriver/src/test/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/instance/WaitForUpInstancesTaskSpec.groovy index c7aba61744..fbb1923327 100644 --- a/orca-clouddriver/src/test/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/instance/WaitForUpInstancesTaskSpec.groovy +++ b/orca-clouddriver/src/test/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/instance/WaitForUpInstancesTaskSpec.groovy @@ -21,12 +21,15 @@ import com.netflix.spinnaker.orca.clouddriver.OortService import com.netflix.spinnaker.orca.jackson.OrcaObjectMapper import com.netflix.spinnaker.orca.pipeline.model.Execution import com.netflix.spinnaker.orca.pipeline.model.Stage +import org.slf4j.MDC import retrofit.client.Response import retrofit.mime.TypedString import spock.lang.Specification import spock.lang.Subject import spock.lang.Unroll +import java.util.concurrent.TimeUnit + import static com.netflix.spinnaker.orca.test.model.ExecutionBuilder.stage class WaitForUpInstancesTaskSpec extends Specification { @@ -40,6 +43,10 @@ class WaitForUpInstancesTaskSpec extends Specification { def mapper = OrcaObjectMapper.newInstance() + void cleanup() { + MDC.clear() + } + void "should check cluster to get server groups"() { given: def pipeline = Execution.newPipeline("orca") @@ -504,14 +511,19 @@ class WaitForUpInstancesTaskSpec extends Specification { def serverGroup = [name: "app-v001", region: "us-west-2", capacity: serverGroupCapacity] + and: + MDC.put("taskStartTime", taskStartTime.toString()) + expect: WaitForUpInstancesTask.getServerGroupCapacity(stage, serverGroup) == expectedServerGroupCapacity where: - katoTasks | serverGroupCapacity || expectedServerGroupCapacity - null | [min: 0, max: 0, desired: 0] || [min: 0, max: 0, desired: 0] - [[resultObjects: [[deployments: [deployment("app-v001", "us-west-2", 0, 1, 1)]]]]] | [min: 0, max: 0, desired: 0] || [min: 0, max: 1, desired: 1] // should take initial capacity b/c max = 0 - [[resultObjects: [[deployments: [deployment("app-v001", "us-west-2", 0, 1, 1)]]]]] | [min: 0, max: 2, desired: 2] || [min: 0, max: 2, desired: 2] // should take current capacity b/c max > 0 + katoTasks | taskStartTime | serverGroupCapacity || expectedServerGroupCapacity + null | startTime(0) | [min: 0, max: 0, desired: 0] || [min: 0, max: 0, desired: 0] + [[resultObjects: [[deployments: [deployment("app-v001", "us-west-2", 0, 1, 1)]]]]] | startTime(9) | [min: 0, max: 0, desired: 0] || [min: 0, max: 1, desired: 1] // should take initial capacity b/c max = 0 + [[resultObjects: [[deployments: [deployment("app-v001", "us-west-2", 0, 1, 1)]]]]] | startTime(9) | [min: 0, max: 400, desired: 0] || [min: 0, max: 1, desired: 1] // should take initial capacity b/c desired = 0 + [[resultObjects: [[deployments: [deployment("app-v001", "us-west-2", 0, 1, 1)]]]]] | startTime(9) | [min: 0, max: 2, desired: 2] || [min: 0, max: 2, desired: 2] // should take current capacity b/c max > 0 + [[resultObjects: [[deployments: [deployment("app-v001", "us-west-2", 0, 1, 1)]]]]] | startTime(11) | [min: 0, max: 0, desired: 0] || [min: 0, max: 0, desired: 0] // should take current capacity b/c timeout } static Map deployment(String serverGroupName, String location, int min, int max, int desired) { @@ -519,4 +531,8 @@ class WaitForUpInstancesTaskSpec extends Specification { serverGroupName: serverGroupName, location: location, capacity: [min: min, max: max, desired: desired] ] } + + static Long startTime(int minutesOld) { + return System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(minutesOld) + } }