From adbf4a93b8a98e3b4a30124ccb5ae4b2de57bf8b Mon Sep 17 00:00:00 2001 From: fenxiong Date: Tue, 22 Oct 2019 13:11:47 -0700 Subject: [PATCH] Issue a stop when start container failed with EOF error. When start container failed with EOF there's a chance that the container is started anyway, so issue a stop for this case. --- agent/dockerclient/dockerapi/errors.go | 4 +++- agent/engine/task_manager.go | 14 +++++++++++++- agent/engine/task_manager_test.go | 12 ++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/agent/dockerclient/dockerapi/errors.go b/agent/dockerclient/dockerapi/errors.go index 88552d8b68b..9d0acc8f038 100644 --- a/agent/dockerclient/dockerapi/errors.go +++ b/agent/dockerclient/dockerapi/errors.go @@ -24,6 +24,8 @@ const ( DockerTimeoutErrorName = "DockerTimeoutError" // CannotInspectContainerErrorName is the name of container inspect error. CannotInspectContainerErrorName = "CannotInspectContainerError" + // CannotStartContainerErrorName is the name of container start error. + CannotStartContainerErrorName = "CannotStartContainerError" // CannotDescribeContainerErrorName is the name of describe container error. CannotDescribeContainerErrorName = "CannotDescribeContainerError" ) @@ -201,7 +203,7 @@ func (err CannotStartContainerError) Error() string { // ErrorName returns name of the CannotStartContainerError func (err CannotStartContainerError) ErrorName() string { - return "CannotStartContainerError" + return CannotStartContainerErrorName } // CannotInspectContainerError indicates any error when trying to inspect a container diff --git a/agent/engine/task_manager.go b/agent/engine/task_manager.go index 141432872f5..39a3a961f6c 100644 --- a/agent/engine/task_manager.go +++ b/agent/engine/task_manager.go @@ -15,6 +15,8 @@ package engine import ( "context" + "io" + "strings" "sync" "time" @@ -666,10 +668,20 @@ func (mtask *managedTask) handleEventError(containerChange dockerContainerChange container.SetKnownStatus(currentKnownStatus) container.SetDesiredStatus(apicontainerstatus.ContainerStopped) errorName := event.Error.ErrorName() + errorStr := event.Error.Error() + shouldForceStop := false if errorName == dockerapi.DockerTimeoutErrorName || errorName == dockerapi.CannotInspectContainerErrorName { // If there's an error with inspecting the container or in case of timeout error, - // we'll also assume that the container has transitioned to RUNNING and issue + // we'll assume that the container has transitioned to RUNNING and issue // a stop. See #1043 for details + shouldForceStop = true + } else if errorName == dockerapi.CannotStartContainerErrorName && strings.HasSuffix(errorStr, io.EOF.Error()) { + // If we get an EOF error from Docker when starting the container, we don't really know whether the + // container is started anyway. So issuing a stop here as well. See #1708. + shouldForceStop = true + } + + if shouldForceStop { seelog.Warnf("Managed task [%s]: forcing container [%s] to stop", mtask.Arn, container.Name) go mtask.engine.transitionContainer(mtask.Task, container, apicontainerstatus.ContainerStopped) diff --git a/agent/engine/task_manager_test.go b/agent/engine/task_manager_test.go index 514d5781c28..6c2c2bb374c 100644 --- a/agent/engine/task_manager_test.go +++ b/agent/engine/task_manager_test.go @@ -139,6 +139,18 @@ func TestHandleEventError(t *testing.T) { ExpectedContainerDesiredStatusStopped: true, ExpectedOK: false, }, + { + Name: "Start failed with EOF error", + EventStatus: apicontainerstatus.ContainerRunning, + CurrentContainerKnownStatus: apicontainerstatus.ContainerCreated, + Error: &dockerapi.CannotStartContainerError{ + FromError: errors.New("error during connect: Post http://%2Fvar%2Frun%2Fdocker.sock/v1.19/containers/containerid/start: EOF"), + }, + ExpectedContainerKnownStatusSet: true, + ExpectedContainerKnownStatus: apicontainerstatus.ContainerCreated, + ExpectedContainerDesiredStatusStopped: true, + ExpectedOK: false, + }, { Name: "Inspect failed during create", EventStatus: apicontainerstatus.ContainerCreated,