Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retry ECS RunTask on AGENT error which can happen as its normal operation #1723

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import com.amazonaws.services.ecs.model.DescribeTaskDefinitionResult;
import com.amazonaws.services.ecs.model.DescribeTasksRequest;
import com.amazonaws.services.ecs.model.DescribeTasksResult;
import com.amazonaws.services.ecs.model.Failure;
import com.amazonaws.services.ecs.model.InvalidParameterException;
import com.amazonaws.services.ecs.model.ListTagsForResourceRequest;
import com.amazonaws.services.ecs.model.ListTagsForResourceResult;
Expand Down Expand Up @@ -55,13 +56,14 @@ public class DefaultEcsClient
private final long rateLimitMaxJitterSecs; // 0.0 <= jitterSecs < rateLimitBaseJitterSecs
private final long rateLimitMaxBaseWaitSecs; // Max baseWaitSecs.
private final long rateLimitBaseIncrementalSecs;
private final int retryDelayOnAgentErrorSecs;

protected DefaultEcsClient(
final EcsClientConfig config,
final AmazonECSClient client,
final AWSLogs logs)
{
this(config, client, logs, 60, 10, 50, 10);
this(config, client, logs, 60, 10, 50, 10, 3);
}

protected DefaultEcsClient(
Expand All @@ -71,7 +73,8 @@ protected DefaultEcsClient(
final int rateLimitMaxRetry,
final long rateLimitMaxJitterSecs,
final long rateLimitMaxBaseWaitSecs,
final long rateLimitBaseIncrementalSecs
final long rateLimitBaseIncrementalSecs,
final int retryDelayOnAgentErrorSecs
)
{
this.config = config;
Expand All @@ -81,6 +84,7 @@ protected DefaultEcsClient(
this.rateLimitMaxJitterSecs = rateLimitMaxJitterSecs;
this.rateLimitMaxBaseWaitSecs = rateLimitMaxBaseWaitSecs;
this.rateLimitBaseIncrementalSecs = rateLimitBaseIncrementalSecs;
this.retryDelayOnAgentErrorSecs = retryDelayOnAgentErrorSecs;
}

@Override
Expand All @@ -89,15 +93,7 @@ public EcsClientConfig getConfig()
return config;
}

/**
* Run task on AWS ECS.
* https://docs.aws.amazon.com/cli/latest/reference/ecs/run-task.html
*
* @param request
* @return
*/
@Override
public RunTaskResult submitTask(final RunTaskRequest request)
private RunTaskResult runTask(final RunTaskRequest request)
throws ConfigException
{
try {
Expand All @@ -117,6 +113,37 @@ public RunTaskResult submitTask(final RunTaskRequest request)
}
}

private boolean isAgentError(final RunTaskResult result)
{
for (final Failure f : result.getFailures()) {
if (f.getReason().equals("AGENT")) {
return true;
}
}
return false;
}

/**
* Run task on AWS ECS.
* Retry once on AGENT error, which can happen as a part of the agent's normal operation.
* https://docs.aws.amazon.com/cli/latest/reference/ecs/run-task.html
* https://docs.aws.amazon.com/AmazonECS/latest/developerguide/api_failures_messages.html
*
* @param request
* @return
*/
@Override
public RunTaskResult submitTask(final RunTaskRequest request)
{
RunTaskResult result = runTask(request);
if (isAgentError(result)) {
logger.debug("Submitting a task failed with AGENT error. Will be retried in {} sec.", String.valueOf(retryDelayOnAgentErrorSecs));
waitWithRandomJitter(retryDelayOnAgentErrorSecs, 0);
result = runTask(request);
}
return result;
}

@Override
public TaskDefinition getTaskDefinition(final String taskDefinitionArn)
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package io.digdag.standards.command.ecs;

import com.amazonaws.services.ecs.AmazonECSClient;
import com.amazonaws.services.ecs.model.Failure;
import com.amazonaws.services.ecs.model.ListTagsForResourceRequest;
import com.amazonaws.services.ecs.model.ListTagsForResourceResult;
import com.amazonaws.services.ecs.model.ListTaskDefinitionsRequest;
import com.amazonaws.services.ecs.model.ListTaskDefinitionsResult;
import com.amazonaws.services.ecs.model.RunTaskRequest;
import com.amazonaws.services.ecs.model.RunTaskResult;
import com.amazonaws.services.ecs.model.Tag;
import com.amazonaws.services.ecs.model.TaskDefinition;
import com.amazonaws.services.logs.AWSLogs;
Expand Down Expand Up @@ -32,8 +35,10 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyLong;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.when;
Expand All @@ -57,7 +62,7 @@ public class DefaultEcsClientTest
@Before
public void setUp()
{
ecsClient = spy(new DefaultEcsClient(ecsClientConfig, rawEcsClient, logs, 10, 2, 1, 5));
ecsClient = spy(new DefaultEcsClient(ecsClientConfig, rawEcsClient, logs, 10, 2, 1, 5, 1));
}

@Test
Expand Down Expand Up @@ -134,6 +139,42 @@ public void testGetTaskDefinitionByTagsWithPredicateAndDidNotFindMatchedOne()
assertFalse(actual.isPresent());
}

@Test
public void testSubmitTask()
{
final RunTaskResult expectedResult = mock(RunTaskResult.class);
doReturn(expectedResult).when(rawEcsClient).runTask(any());

final RunTaskRequest request = mock(RunTaskRequest.class);
final RunTaskResult actualResult = ecsClient.submitTask(request);

assertThat(actualResult, is(expectedResult));
Mockito.verify(rawEcsClient, times(1)).runTask(any());
Mockito.verify(ecsClient, times(0)).waitWithRandomJitter(anyLong(), anyLong());
}

@Test
public void testSumbitTaskRetryOnceOnAgentError()
{
final RunTaskResult failureResult = mock(RunTaskResult.class);
final Failure failure = mock(Failure.class);
doReturn("AGENT").when(failure).getReason();
doReturn(Arrays.asList(failure)).when(failureResult).getFailures();

final RunTaskResult expectedResult = mock(RunTaskResult.class);

when(rawEcsClient.runTask(any()))
.thenReturn(failureResult)
.thenReturn(expectedResult);

final RunTaskRequest request = mock(RunTaskRequest.class);
final RunTaskResult actualResult = ecsClient.submitTask(request);

assertThat(actualResult, is(expectedResult));
Mockito.verify(rawEcsClient, times(2)).runTask(any());
Mockito.verify(ecsClient, times(1)).waitWithRandomJitter(anyLong(), anyLong());
}

@Test
public void testRetryOnRateLimit()
{
Expand Down