Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement transition function for MANIFEST_PULLED state #4152

Merged
merged 4 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ additional details on each available environment variable.
| `ECS_APPARMOR_CAPABLE` | `true` | Whether AppArmor is available on the container instance. | `false` | `false` |
| `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION` | 10m | Default time to wait to delete containers for a stopped task (see also `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION_JITTER`). If set to less than 1 second, the value is ignored. | 3h | 3h |
| `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION_JITTER` | 1h | Jitter value for the task engine cleanup wait duration. When specified, the actual cleanup wait duration time for each task will be the duration specified in `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION` plus a random duration between 0 and the jitter duration. | blank | blank |
| `ECS_MANIFEST_PULL_TIMEOUT` | 10m | Timeout before giving up on fetching image manifest for a container image. | 1m | 1m |
| `ECS_CONTAINER_STOP_TIMEOUT` | 10m | Instance scoped configuration for time to wait for the container to exit normally before being forcibly killed. | 30s | 30s |
| `ECS_CONTAINER_START_TIMEOUT` | 10m | Timeout before giving up on starting a container. | 3m | 8m |
| `ECS_CONTAINER_CREATE_TIMEOUT` | 10m | Timeout before giving up on creating a container. Minimum value is 1m. If user sets a value below minimum it will be set to min. | 4m | 4m |
Expand Down
3 changes: 3 additions & 0 deletions agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,7 @@ func environmentConfig() (Config, error) {
DeleteNonECSImagesEnabled: parseBooleanDefaultFalseConfig("ECS_ENABLE_UNTRACKED_IMAGE_CLEANUP"),
TaskCPUMemLimit: parseBooleanDefaultTrueConfig("ECS_ENABLE_TASK_CPU_MEM_LIMIT"),
DockerStopTimeout: parseDockerStopTimeout(),
ManifestPullTimeout: parseManifestPullTimeout(),
ContainerStartTimeout: parseContainerStartTimeout(),
ContainerCreateTimeout: parseContainerCreateTimeout(),
DependentContainersPullUpfront: parseBooleanDefaultFalseConfig("ECS_PULL_DEPENDENT_CONTAINERS_UPFRONT"),
Expand Down Expand Up @@ -625,6 +626,7 @@ func (cfg *Config) String() string {
"PollingMetricsWaitDuration: %v, "+
"ReservedMem: %v, "+
"TaskCleanupWaitDuration: %v, "+
"ManifestPullTimeout: %v, "+
"DockerStopTimeout: %v, "+
"ContainerStartTimeout: %v, "+
"ContainerCreateTimeout: %v, "+
Expand All @@ -644,6 +646,7 @@ func (cfg *Config) String() string {
cfg.PollingMetricsWaitDuration,
cfg.ReservedMemory,
cfg.TaskCleanupWaitDuration,
cfg.ManifestPullTimeout,
cfg.DockerStopTimeout,
cfg.ContainerStartTimeout,
cfg.ContainerCreateTimeout,
Expand Down
26 changes: 26 additions & 0 deletions agent/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ func TestEnvironmentConfig(t *testing.T) {
defer setTestEnv("ECS_CLUSTER", "myCluster")()
defer setTestEnv("ECS_RESERVED_PORTS_UDP", "[42,99]")()
defer setTestEnv("ECS_RESERVED_MEMORY", "20")()
defer setTestEnv("ECS_MANIFEST_PULL_TIMEOUT", "85s")()
defer setTestEnv("ECS_CONTAINER_STOP_TIMEOUT", "60s")()
defer setTestEnv("ECS_CONTAINER_START_TIMEOUT", "5m")()
defer setTestEnv("ECS_CONTAINER_CREATE_TIMEOUT", "4m")()
Expand Down Expand Up @@ -176,6 +177,7 @@ func TestEnvironmentConfig(t *testing.T) {
assert.Contains(t, conf.ReservedPortsUDP, uint16(42))
assert.Contains(t, conf.ReservedPortsUDP, uint16(99))
assert.Equal(t, uint16(20), conf.ReservedMemory)
assert.Equal(t, 85*time.Second, conf.ManifestPullTimeout)
expectedDurationDockerStopTimeout, _ := time.ParseDuration("60s")
assert.Equal(t, expectedDurationDockerStopTimeout, conf.DockerStopTimeout)
expectedDurationContainerStartTimeout, _ := time.ParseDuration("5m")
Expand Down Expand Up @@ -361,6 +363,14 @@ func TestInvalidFormatContainerCreateTimeout(t *testing.T) {
assert.Equal(t, defaultContainerCreateTimeout, conf.ContainerCreateTimeout, "Wrong value for ContainerCreateTimeout")
}

func TestInvalidFormatManifestPullTimeout(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_MANIFEST_PULL_TIMEOUT", "invalid")()
conf, err := NewConfig(ec2.NewBlackholeEC2MetadataClient())
assert.NoError(t, err)
assert.Equal(t, defaultManifestPullTimeout, conf.ManifestPullTimeout)
}

func TestInvalidFormatDockerInactivityTimeout(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_IMAGE_PULL_INACTIVITY_TIMEOUT", "invalid")()
Expand Down Expand Up @@ -402,6 +412,14 @@ func TestZeroValueContainerCreateTimeout(t *testing.T) {
assert.Equal(t, defaultContainerCreateTimeout, conf.ContainerCreateTimeout, "Wrong value for ContainerCreateTimeout")
}

func TestZeroValueManifestPullTimeout(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_MANIFEST_PULL_TIMEOUT", "0s")()
conf, err := NewConfig(ec2.NewBlackholeEC2MetadataClient())
assert.NoError(t, err)
assert.Equal(t, defaultManifestPullTimeout, conf.ManifestPullTimeout)
}

func TestInvalidValueContainerStartTimeout(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_CONTAINER_START_TIMEOUT", "-10s")()
Expand All @@ -418,6 +436,14 @@ func TestInvalidValueContainerCreateTimeout(t *testing.T) {
assert.Equal(t, minimumContainerCreateTimeout, conf.ContainerCreateTimeout, "Wrong value for ContainerCreataeTimeout")
}

func TestInvalidValueManifestPullTimeout(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_MANIFEST_PULL_TIMEOUT", "-10s")()
conf, err := NewConfig(ec2.NewBlackholeEC2MetadataClient())
assert.NoError(t, err)
assert.Equal(t, minimumManifestPullTimeout, conf.ManifestPullTimeout)
}

func TestZeroValueDockerPullInactivityTimeout(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_DOCKER_PULL_INACTIVITY_TIMEOUT", "0s")()
Expand Down
5 changes: 5 additions & 0 deletions agent/config/config_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ const (
// Default cgroup memory system root path, this is the default used if the
// path has not been configured through ECS_CGROUP_PATH
defaultCgroupPath = "/sys/fs/cgroup"
// minimumManifestPullTimeout is the minimum timeout allowed for manifest pulls
minimumManifestPullTimeout = 30 * time.Second
// defaultManifestPullTimeout is the default timeout for manifest pulls
defaultManifestPullTimeout = 1 * time.Minute
// defaultContainerStartTimeout specifies the value for container start timeout duration
defaultContainerStartTimeout = 3 * time.Minute
// minimumContainerStartTimeout specifies the minimum value for starting a container
Expand All @@ -70,6 +74,7 @@ func DefaultConfig() Config {
ReservedMemory: 0,
AvailableLoggingDrivers: []dockerclient.LoggingDriver{dockerclient.JSONFileDriver, dockerclient.NoneDriver},
TaskCleanupWaitDuration: DefaultTaskCleanupWaitDuration,
ManifestPullTimeout: defaultManifestPullTimeout,
DockerStopTimeout: defaultDockerStopTimeout,
ContainerStartTimeout: defaultContainerStartTimeout,
ContainerCreateTimeout: defaultContainerCreateTimeout,
Expand Down
1 change: 1 addition & 0 deletions agent/config/config_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func TestConfigDefault(t *testing.T) {
assert.Equal(t, 30*time.Second, cfg.DockerStopTimeout, "Default docker stop container timeout set incorrectly")
assert.Equal(t, 3*time.Minute, cfg.ContainerStartTimeout, "Default docker start container timeout set incorrectly")
assert.Equal(t, 4*time.Minute, cfg.ContainerCreateTimeout, "Default docker create container timeout set incorrectly")
assert.Equal(t, 1*time.Minute, cfg.ManifestPullTimeout, "Default manifest pull timeout set incorrectly")
assert.False(t, cfg.PrivilegedDisabled.Enabled(), "Default PrivilegedDisabled set incorrectly")
assert.Equal(t, []dockerclient.LoggingDriver{dockerclient.JSONFileDriver, dockerclient.NoneDriver},
cfg.AvailableLoggingDrivers, "Default logging drivers set incorrectly")
Expand Down
5 changes: 5 additions & 0 deletions agent/config/config_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ const (
dnsPort = 53
// NetBIOS over TCP/IP
netBIOSPort = 139
// minimumManifestPullTimeout is the minimum timeout allowed for manifest pulls
minimumManifestPullTimeout = 30 * time.Second
// defaultManifestPullTimeout is the default timeout for manifest pulls
defaultManifestPullTimeout = 1 * time.Minute
// defaultContainerStartTimeout specifies the value for container start timeout duration
defaultContainerStartTimeout = 8 * time.Minute
// minimumContainerStartTimeout specifies the minimum value for starting a container
Expand Down Expand Up @@ -121,6 +125,7 @@ func DefaultConfig() Config {
ReservedMemory: 0,
AvailableLoggingDrivers: []dockerclient.LoggingDriver{dockerclient.JSONFileDriver, dockerclient.NoneDriver, dockerclient.AWSLogsDriver},
TaskCleanupWaitDuration: DefaultTaskCleanupWaitDuration,
ManifestPullTimeout: defaultManifestPullTimeout,
DockerStopTimeout: defaultDockerStopTimeout,
ContainerStartTimeout: defaultContainerStartTimeout,
ContainerCreateTimeout: defaultContainerCreateTimeout,
Expand Down
13 changes: 13 additions & 0 deletions agent/config/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,19 @@ func parseDockerStopTimeout() time.Duration {
return dockerStopTimeout
}

func parseManifestPullTimeout() time.Duration {
var timeout time.Duration
parsedTimeout := parseEnvVariableDuration("ECS_MANIFEST_PULL_TIMEOUT")
if parsedTimeout >= minimumManifestPullTimeout {
timeout = parsedTimeout
} else if parsedTimeout != 0 {
// Parsed timeout too low
timeout = minimumManifestPullTimeout
seelog.Warnf("Discarded invalid value for manifest pull timeout, parsed as: %v", parsedTimeout)
}
return timeout
}

func parseContainerStartTimeout() time.Duration {
var containerStartTimeout time.Duration
parsedStartTimeout := parseEnvVariableDuration("ECS_CONTAINER_START_TIMEOUT")
Expand Down
3 changes: 3 additions & 0 deletions agent/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ type Config struct {
// This doesn't reserve memory usage on the instance
ReservedMemory uint16

// ManifestPullTimeout is the amount of time to wait for a manifest pull
ManifestPullTimeout time.Duration

// DockerStopTimeout specifies the amount of time before a SIGKILL is issued to
// containers managed by ECS
DockerStopTimeout time.Duration
Expand Down
6 changes: 6 additions & 0 deletions agent/dockerclient/dockerapi/docker_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,9 @@ func (dg *dockerGoClient) WithVersion(version dockerclient.DockerVersion) (Docke
versionedClient := &dockerGoClient{
sdkClientFactory: dg.sdkClientFactory,
version: version,
ecrClientFactory: dg.ecrClientFactory,
auth: dg.auth,
ecrTokenCache: dg.ecrTokenCache,
config: dg.config,
context: dg.context,
manifestPullBackoff: dg.manifestPullBackoff,
Expand Down Expand Up @@ -371,6 +373,10 @@ func (dg *dockerGoClient) PullImageManifest(
})

if err != nil {
if errors.Is(err, context.DeadlineExceeded) {
timeoutErr := &DockerTimeoutError{time.Since(startTime), "MANIFEST_PULLED"}
return registry.DistributionInspect{}, timeoutErr
}
return registry.DistributionInspect{}, wrapManifestPullErrorAsNamedError(imageRef, err)
}
if ctxErr := ctx.Err(); ctxErr != nil {
Expand Down
9 changes: 9 additions & 0 deletions agent/engine/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import (
"github.com/aws/aws-sdk-go/aws"
"github.com/cihub/seelog"
dockercontainer "github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/registry"
"github.com/golang/mock/gomock"
"github.com/pborman/uuid"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -146,6 +147,7 @@ func waitForTaskStoppedByCheckStatus(task *apitask.Task) {
// removed matches with the generated container name during cleanup operation in the
// test.
func validateContainerRunWorkflow(t *testing.T,
ctrl *gomock.Controller,
container *apicontainer.Container,
task *apitask.Task,
imageManager *mock_engine.MockImageManager,
Expand All @@ -156,6 +158,13 @@ func validateContainerRunWorkflow(t *testing.T,
createdContainerName chan<- string,
assertions func(),
) {
// Prepare mock image manifest digest for test
manifestPullClient := mock_dockerapi.NewMockDockerClient(ctrl)
client.EXPECT().WithVersion(dockerclient.Version_1_35).Return(manifestPullClient, nil)
manifestPullClient.EXPECT().
PullImageManifest(gomock.Any(), container.Image, container.RegistryAuthentication).
Return(registry.DistributionInspect{}, nil)

imageManager.EXPECT().AddAllImageStates(gomock.Any()).AnyTimes()
client.EXPECT().PullImage(gomock.Any(), container.Image, nil, gomock.Any()).Return(dockerapi.DockerContainerMetadata{})
imageManager.EXPECT().RecordContainerReference(container).Return(nil)
Expand Down
8 changes: 6 additions & 2 deletions agent/engine/docker_image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,12 @@ func (imageManager *dockerImageManager) RecordContainerReference(container *apic
return err
}
container.ImageID = imageInspected.ID
imageDigest := imageManager.fetchRepoDigest(imageInspected, container)
container.SetImageDigest(imageDigest)
// For older Docker versions imageDigest is not populated during transition to
// MANIFEST_PULLED state. Populate it here if that's the case.
if container.GetImageDigest() == "" {
imageDigest := imageManager.fetchRepoDigest(imageInspected, container)
container.SetImageDigest(imageDigest)
}
added := imageManager.addContainerReferenceToExistingImageState(container)
if !added {
imageManager.addContainerReferenceToNewImageState(container, imageInspected.Size)
Expand Down
Loading
Loading