From b41718a2506a63ef97fa84a6f04b9d79ce02ce22 Mon Sep 17 00:00:00 2001 From: Alex Collins Date: Thu, 11 Mar 2021 08:11:52 -0800 Subject: [PATCH 1/3] fix(executor): Make docker executor more robust Signed-off-by: Alex Collins --- workflow/executor/docker/docker.go | 36 ++++++++++++++++++------------ 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/workflow/executor/docker/docker.go b/workflow/executor/docker/docker.go index ca92e438148f..cd4930e2da5c 100644 --- a/workflow/executor/docker/docker.go +++ b/workflow/executor/docker/docker.go @@ -25,6 +25,8 @@ import ( var errContainerNotExist = fmt.Errorf("container does not exist") // sentinel error +var started = time.Now() + type DockerExecutor struct { namespace string podName string @@ -206,7 +208,7 @@ func (d *DockerExecutor) syncContainerIDs(ctx context.Context, containerNames [] "ps", "--all", // container could have already exited, but there could also have been two containers for the same pod (old container not yet cleaned-up) "--no-trunc", // display long container IDs - "--format={{.Status}}|{{.Label \"io.kubernetes.container.name\"}}|{{.ID}}", // similar to `Up 3 hours,main,035a98c4e72e` + "--format={{.Status}}|{{.Label \"io.kubernetes.container.name\"}}|{{.ID}}|{{.CreatedAt}}", // similar to `Up 3 hours,main,035a98c4e72e,2021-03-08 17:25:15 -0800 PST` // https://github.com/kubernetes/kubernetes/blob/ca6bdba014f0a98efe0e0dd4e15f57d1c121d6c9/pkg/kubelet/dockertools/labels.go#L37 "--filter=label=io.kubernetes.pod.namespace="+d.namespace, "--filter=label=io.kubernetes.pod.name="+d.podName, @@ -214,27 +216,33 @@ func (d *DockerExecutor) syncContainerIDs(ctx context.Context, containerNames [] if err != nil { return err } + containerStatus := make(map[string]string) for _, l := range strings.Split(string(output), "\n") { parts := strings.Split(strings.TrimSpace(l), "|") - if len(parts) != 3 { + if len(parts) != 4 { continue } status := strings.SplitN(parts[0], " ", 2)[0] // Created,Exited,Up, containerName := parts[1] containerID := parts[2] - if d.containers[containerName] == "" && containerID != "" { - if status == "Created" { // for "Created" we must check to see if it was an early (non-zero) exit - output, err := common.RunCommand("docker", "inspect", containerID, "--format={{.State.ExitCode}}") - if err != nil { - return err - } - if strings.TrimSpace(string(output)) == "0" { // this remain "0" until it is not "Created" anymore - continue - } - } - d.containers[containerName] = containerID - log.Infof("mapped container name %q to container ID %q", containerName, containerID) + createdAt, err := time.Parse("2006-01-02 15:04:05 -0700 MST", parts[3]) + if err != nil { + return err + } + if containerID == "" || d.containers[containerName] == containerID { + continue + } + if createdAt.Before(started.Add(-15 * time.Second)) { + log.Infof("ignoring container %q created at %s, too long before process started", containerName, createdAt) + continue + } + if status == "Created" && containerStatus[containerName] != "" { + log.Infof("ignoring created container %q that would %s -> %s", containerName, containerStatus[containerName], status) + continue } + d.containers[containerName] = containerID + containerStatus[containerName] = status + log.Infof("mapped container name %q to container ID %q (created at %v, status %s)", containerName, containerID, createdAt, status) } if d.haveContainers(containerNames) { return nil From dbf87a213110d673d829d4ed91aed7fb0b44fe82 Mon Sep 17 00:00:00 2001 From: Alex Collins Date: Thu, 11 Mar 2021 08:16:35 -0800 Subject: [PATCH 2/3] fix(executor): Make docker executor more robust Signed-off-by: Alex Collins --- workflow/executor/docker/docker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/executor/docker/docker.go b/workflow/executor/docker/docker.go index cd4930e2da5c..78a634a9a432 100644 --- a/workflow/executor/docker/docker.go +++ b/workflow/executor/docker/docker.go @@ -233,7 +233,7 @@ func (d *DockerExecutor) syncContainerIDs(ctx context.Context, containerNames [] continue } if createdAt.Before(started.Add(-15 * time.Second)) { - log.Infof("ignoring container %q created at %s, too long before process started", containerName, createdAt) + log.Infof("ignoring container %q created at %v, too long before process started", containerName, createdAt) continue } if status == "Created" && containerStatus[containerName] != "" { From d271fbd721a1701c0f847099e6d7192ff70c86ac Mon Sep 17 00:00:00 2001 From: Alex Collins Date: Thu, 11 Mar 2021 08:47:06 -0800 Subject: [PATCH 3/3] fix(executor): Make docker executor more robust Signed-off-by: Alex Collins