diff --git a/agent/engine/dependencygraph/graph.go b/agent/engine/dependencygraph/graph.go index 73276068170..7ef8d64f1ea 100644 --- a/agent/engine/dependencygraph/graph.go +++ b/agent/engine/dependencygraph/graph.go @@ -47,37 +47,42 @@ const ( var ( // CredentialsNotResolvedErr is the error where a container needs to wait for // credentials before it can process by agent - CredentialsNotResolvedErr = &DependencyError{err: errors.New("dependency graph: container execution credentials not available")} + CredentialsNotResolvedErr = &dependencyError{err: errors.New("dependency graph: container execution credentials not available")} // DependentContainerNotResolvedErr is the error where a dependent container isn't in expected state - DependentContainerNotResolvedErr = &DependencyError{err: errors.New("dependency graph: dependent container not in expected state")} + DependentContainerNotResolvedErr = &dependencyError{err: errors.New("dependency graph: dependent container not in expected state")} // ContainerPastDesiredStatusErr is the error where the container status is bigger than desired status - ContainerPastDesiredStatusErr = &DependencyError{err: errors.New("container transition: container status is equal or greater than desired status")} + ContainerPastDesiredStatusErr = &dependencyError{err: errors.New("container transition: container status is equal or greater than desired status")} // ErrContainerDependencyNotResolved is when the container's dependencies // on other containers are not resolved - ErrContainerDependencyNotResolved = &DependencyError{err: errors.New("dependency graph: dependency on containers not resolved")} + ErrContainerDependencyNotResolved = &dependencyError{err: errors.New("dependency graph: dependency on containers not resolved")} // ErrResourceDependencyNotResolved is when the container's dependencies // on task resources are not resolved - ErrResourceDependencyNotResolved = &DependencyError{err: errors.New("dependency graph: dependency on resources not resolved")} + ErrResourceDependencyNotResolved = &dependencyError{err: errors.New("dependency graph: dependency on resources not resolved")} // ResourcePastDesiredStatusErr is the error where the task resource known status is bigger than desired status - ResourcePastDesiredStatusErr = &DependencyError{err: errors.New("task resource transition: task resource status is equal or greater than desired status")} + ResourcePastDesiredStatusErr = &dependencyError{err: errors.New("task resource transition: task resource status is equal or greater than desired status")} // ErrContainerDependencyNotResolvedForResource is when the resource's dependencies // on other containers are not resolved - ErrContainerDependencyNotResolvedForResource = &DependencyError{err: errors.New("dependency graph: resource's dependency on containers not resolved")} + ErrContainerDependencyNotResolvedForResource = &dependencyError{err: errors.New("dependency graph: resource's dependency on containers not resolved")} ) // DependencyError represents an error of a container dependency. These errors can be either terminal or non-terminal. // Terminal dependency errors indicate that a given dependency can never be fulfilled (e.g. a container with a SUCCESS // dependency has stopped with an exit code other than zero). -type DependencyError struct { +type DependencyError interface { + Error() string + IsTerminal() bool +} + +type dependencyError struct { err error isTerminal bool } -func (de *DependencyError) Error() string { +func (de *dependencyError) Error() string { return de.err.Error() } -func (de *DependencyError) IsTerminal() bool { +func (de *dependencyError) IsTerminal() bool { return de.isTerminal } @@ -140,7 +145,7 @@ func DependenciesAreResolved(target *apicontainer.Container, id string, manager credentials.Manager, resources []taskresource.TaskResource, - cfg *config.Config) (*apicontainer.DependsOn, *DependencyError) { + cfg *config.Config) (*apicontainer.DependsOn, DependencyError) { if !executionCredentialsResolved(target, id, manager) { return nil, CredentialsNotResolvedErr } @@ -260,7 +265,7 @@ func verifyStatusResolvable(target *apicontainer.Container, existingContainers m // (map from name to container). The `resolves` function passed should return true if the named container is resolved. func verifyContainerOrderingStatusResolvable(target *apicontainer.Container, existingContainers map[string]*apicontainer.Container, - cfg *config.Config, resolves func(*apicontainer.Container, *apicontainer.Container, string, *config.Config) bool) (*apicontainer.DependsOn, *DependencyError) { + cfg *config.Config, resolves func(*apicontainer.Container, *apicontainer.Container, string, *config.Config) bool) (*apicontainer.DependsOn, DependencyError) { targetGoal := target.GetDesiredStatus() targetKnown := target.GetKnownStatus() @@ -276,7 +281,7 @@ func verifyContainerOrderingStatusResolvable(target *apicontainer.Container, exi for _, dependency := range targetDependencies { dependencyContainer, ok := existingContainers[dependency.ContainerName] if !ok { - return nil, &DependencyError{err: fmt.Errorf("dependency graph: container ordering dependency [%v] for target [%v] does not exist.", dependencyContainer, target), isTerminal: true} + return nil, &dependencyError{err: fmt.Errorf("dependency graph: container ordering dependency [%v] for target [%v] does not exist.", dependencyContainer, target), isTerminal: true} } // We want to check whether the dependency container has timed out only if target has not been created yet. @@ -284,7 +289,7 @@ func verifyContainerOrderingStatusResolvable(target *apicontainer.Container, exi // However, if dependency container has already stopped, then it cannot time out. if targetKnown < apicontainerstatus.ContainerCreated && dependencyContainer.GetKnownStatus() != apicontainerstatus.ContainerStopped { if hasDependencyTimedOut(dependencyContainer, dependency.Condition) { - return nil, &DependencyError{err: fmt.Errorf("dependency graph: container ordering dependency [%v] for target [%v] has timed out.", dependencyContainer, target), isTerminal: true} + return nil, &dependencyError{err: fmt.Errorf("dependency graph: container ordering dependency [%v] for target [%v] has timed out.", dependencyContainer, target), isTerminal: true} } } @@ -292,13 +297,13 @@ func verifyContainerOrderingStatusResolvable(target *apicontainer.Container, exi // can then never progress to its desired state when the dependency condition is 'SUCCESS' if dependency.Condition == successCondition && dependencyContainer.GetKnownStatus() == apicontainerstatus.ContainerStopped && !hasDependencyStoppedSuccessfully(dependencyContainer) { - return nil, &DependencyError{err: fmt.Errorf("dependency graph: failed to resolve container ordering dependency [%v] for target [%v] as dependency did not exit successfully.", dependencyContainer, target), isTerminal: true} + return nil, &dependencyError{err: fmt.Errorf("dependency graph: failed to resolve container ordering dependency [%v] for target [%v] as dependency did not exit successfully.", dependencyContainer, target), isTerminal: true} } // For any of the dependency conditions - START/COMPLETE/SUCCESS/HEALTHY, if the dependency container has // not started and will not start in the future, this dependency can never be resolved. if dependencyContainer.HasNotAndWillNotStart() { - return nil, &DependencyError{err: fmt.Errorf("dependency graph: failed to resolve container ordering dependency [%v] for target [%v] because dependency will never start", dependencyContainer, target), isTerminal: true} + return nil, &dependencyError{err: fmt.Errorf("dependency graph: failed to resolve container ordering dependency [%v] for target [%v] because dependency will never start", dependencyContainer, target), isTerminal: true} } if !resolves(target, dependencyContainer, dependency.Condition, cfg) { @@ -306,14 +311,14 @@ func verifyContainerOrderingStatusResolvable(target *apicontainer.Container, exi } } if blockedDependency != nil { - return blockedDependency, &DependencyError{err: fmt.Errorf("dependency graph: failed to resolve the container ordering dependency [%v] for target [%v]", blockedDependency, target)} + return blockedDependency, &dependencyError{err: fmt.Errorf("dependency graph: failed to resolve the container ordering dependency [%v] for target [%v]", blockedDependency, target)} } return nil, nil } func verifyTransitionDependenciesResolved(target *apicontainer.Container, existingContainers map[string]*apicontainer.Container, - existingResources map[string]taskresource.TaskResource) *DependencyError { + existingResources map[string]taskresource.TaskResource) DependencyError { if !verifyContainerDependenciesResolved(target, existingContainers) { return ErrContainerDependencyNotResolved @@ -487,7 +492,7 @@ func verifyContainerOrderingStatus(dependsOnContainer *apicontainer.Container) b dependsOnContainerDesiredStatus == dependsOnContainer.GetSteadyStateStatus() } -func verifyShutdownOrder(target *apicontainer.Container, existingContainers map[string]*apicontainer.Container) *DependencyError { +func verifyShutdownOrder(target *apicontainer.Container, existingContainers map[string]*apicontainer.Container) DependencyError { // We considered adding this to the task state, but this will be at most 45 loops, // so we err'd on the side of having less state. missingShutdownDependencies := []string{} @@ -509,7 +514,7 @@ func verifyShutdownOrder(target *apicontainer.Container, existingContainers map[ return nil } - return &DependencyError{err: fmt.Errorf("dependency graph: target %s needs other containers stopped before it can stop: [%s]", + return &dependencyError{err: fmt.Errorf("dependency graph: target %s needs other containers stopped before it can stop: [%s]", target.Name, strings.Join(missingShutdownDependencies, "], ["))} } diff --git a/agent/engine/task_manager.go b/agent/engine/task_manager.go index 55e781f6b01..7030caf5911 100644 --- a/agent/engine/task_manager.go +++ b/agent/engine/task_manager.go @@ -87,7 +87,7 @@ type containerTransition struct { nextState apicontainerstatus.ContainerStatus actionRequired bool blockedOn *apicontainer.DependsOn - reason *dependencygraph.DependencyError + reason dependencygraph.DependencyError } // resourceTransition defines the struct for a resource to transition. @@ -1142,7 +1142,7 @@ func (mtask *managedTask) startContainerTransitions(transitionFunc containerTran return anyCanTransition, blocked, transitions, reasons } -func (mtask *managedTask) handleTerminalDependencyError(container *apicontainer.Container, error *dependencygraph.DependencyError) { +func (mtask *managedTask) handleTerminalDependencyError(container *apicontainer.Container, error dependencygraph.DependencyError) { container.SetDesiredStatus(apicontainerstatus.ContainerStopped) exitCode := 143 container.SetKnownExitCode(&exitCode) @@ -1419,11 +1419,19 @@ func (mtask *managedTask) handleContainersUnableToTransitionState() { // are unable to start. Therefore, if there are essential containers that haven't started yet, we need to // stop the task since they are not going to start anymore. stopTask := false + containersRunning := 0 for _, c := range mtask.Containers { if c.IsEssential() && !c.IsKnownSteadyState() { stopTask = true break } + if c.IsKnownSteadyState() { + containersRunning++ + } + } + + if containersRunning == 0 { + stopTask = true } if stopTask { diff --git a/agent/engine/task_manager_test.go b/agent/engine/task_manager_test.go index 1ab79ddfcc2..9c92377da49 100644 --- a/agent/engine/task_manager_test.go +++ b/agent/engine/task_manager_test.go @@ -240,7 +240,7 @@ func TestContainerNextState(t *testing.T) { containerDesiredStatus apicontainerstatus.ContainerStatus expectedContainerStatus apicontainerstatus.ContainerStatus expectedTransitionActionable bool - reason error + reason dependencygraph.DependencyError }{ // NONE -> RUNNING transition is allowed and actionable, when desired is Running // The expected next status is Pulled @@ -928,8 +928,8 @@ func TestOnContainersUnableToTransitionStateForDesiredRunningTask(t *testing.T) } task.handleContainersUnableToTransitionState() - assert.Equal(t, task.GetDesiredStatus(), apitaskstatus.TaskStopped) - assert.Equal(t, task.Containers[0].GetDesiredStatus(), apicontainerstatus.ContainerStopped) + assert.Equal(t, apitaskstatus.TaskStopped, task.GetDesiredStatus()) + assert.Equal(t, apicontainerstatus.ContainerStopped, task.Containers[0].GetDesiredStatus()) } // TODO: Test progressContainers workflow