From 91c08cdd83386bfcf48fcb237dd05216bc61b7a0 Mon Sep 17 00:00:00 2001 From: Alex Collins Date: Fri, 16 Apr 2021 16:05:56 -0700 Subject: [PATCH] fix(executor): More logs for PNS sidecar termination. #5627 (#5683) --- .github/workflows/ci-build.yaml | 3 ++ Makefile | 19 +++++---- .../e2e/cron/cron-and-malformed-template.yaml | 1 - test/e2e/cron/param.yaml | 1 - .../mixins/workflow-controller-configmap.yaml | 2 +- test/e2e/smoke/basic-2.yaml | 3 +- test/e2e/testdata/wf-default-ns.yaml | 1 - test/stress/massive-workflow.yaml | 1 - workflow/executor/executor.go | 2 +- workflow/executor/pns/pns.go | 39 ++++--------------- 10 files changed, 23 insertions(+), 49 deletions(-) diff --git a/.github/workflows/ci-build.yaml b/.github/workflows/ci-build.yaml index 01d9f94270da..651ca8a72513 100644 --- a/.github/workflows/ci-build.yaml +++ b/.github/workflows/ci-build.yaml @@ -120,6 +120,9 @@ jobs: - run: make executor-image wait STATIC_FILES=false DEV_IMAGE=true timeout-minutes: 3 - run: make ${{matrix.test}} E2E_TIMEOUT=1m STATIC_FILES=false + - name: Get wait container logs + if: ${{ failure() }} + run: kubectl get pod -o name | xargs -L 1 kubectl logs -c wait - name: Upload logs if: ${{ failure() }} uses: actions/upload-artifact@v1 diff --git a/Makefile b/Makefile index 0cea188825da..508f748ba9f3 100644 --- a/Makefile +++ b/Makefile @@ -129,12 +129,12 @@ define protoc perl -i -pe 's|argoproj/argo-workflows/|argoproj/argo-workflows/v3/|g' `echo "$(1)" | sed 's/proto/pb.go/g'` endef -# docker_build,image_name +# docker_build,target,image_name define docker_build - docker buildx build -t $(IMAGE_NAMESPACE)/$(1):$(VERSION) --target $(1) --progress plain . - docker run --rm -t $(IMAGE_NAMESPACE)/$(1):$(VERSION) version - if [ $(K3D) = true ]; then k3d image import $(IMAGE_NAMESPACE)/$(1):$(VERSION); fi - if [ $(DOCKER_PUSH) = true ] && [ $(IMAGE_NAMESPACE) != argoproj ] ; then docker push $(IMAGE_NAMESPACE)/$(1):$(VERSION) ; fi + docker buildx build -t $(IMAGE_NAMESPACE)/$(2):$(VERSION) --target $(1) --progress plain . + docker run --rm -t $(IMAGE_NAMESPACE)/$(2):$(VERSION) version + if [ $(K3D) = true ]; then k3d image import $(IMAGE_NAMESPACE)/$(2):$(VERSION); fi + if [ $(DOCKER_PUSH) = true ] && [ $(IMAGE_NAMESPACE) != argoproj ] ; then docker push $(IMAGE_NAMESPACE)/$(2):$(VERSION) ; fi endef ifndef $(GOPATH) @@ -202,7 +202,7 @@ argo-server.key: cli-image: dist/argocli.image dist/argocli.image: $(CLI_PKGS) go.sum argo-server.crt argo-server.key - $(call docker_build,argocli) + $(call docker_build,argocli,argocli) touch dist/argocli.image .PHONY: clis @@ -225,7 +225,7 @@ endif controller-image: dist/controller.image dist/controller.image: $(CONTROLLER_PKGS) go.sum Dockerfile - $(call docker_build,workflow-controller) + $(call docker_build,workflow-controller,workflow-controller) touch dist/controller.image # argoexec @@ -243,12 +243,11 @@ executor-image: dist/argoexec.image ifeq ($(DEV_IMAGE),true) dist/argoexec.image: dist/argoexec [ -e argoexec ] || mv dist/argoexec . - $(call docker_build,argoexec-dev) + $(call docker_build,argoexec-dev,argoexec) mv argoexec dist/ - docker tag argoproj/argoexec-dev:latest argoproj/argoexec:latest else dist/argoexec.image: $(ARGOEXEC_PKGS) go.sum - $(call docker_build,argoexec) + $(call docker_build,argoexec,argoexec) endif touch dist/argoexec.image diff --git a/test/e2e/cron/cron-and-malformed-template.yaml b/test/e2e/cron/cron-and-malformed-template.yaml index 0deddaecbd3c..936a5a3b4c6d 100644 --- a/test/e2e/cron/cron-and-malformed-template.yaml +++ b/test/e2e/cron/cron-and-malformed-template.yaml @@ -19,7 +19,6 @@ spec: - name: whalesay container: image: python:alpine3.6 - imagePullPolicy: IfNotPresent command: ["sh", -c] args: ["echo hello"] diff --git a/test/e2e/cron/param.yaml b/test/e2e/cron/param.yaml index 42e7d8a58f28..16093c9ab11f 100644 --- a/test/e2e/cron/param.yaml +++ b/test/e2e/cron/param.yaml @@ -26,6 +26,5 @@ spec: - name: message container: image: python:alpine3.6 - imagePullPolicy: IfNotPresent command: ["sh", -c] args: ["echo {{inputs.parameters.message}}"] diff --git a/test/e2e/manifests/mixins/workflow-controller-configmap.yaml b/test/e2e/manifests/mixins/workflow-controller-configmap.yaml index 1376bd451036..3e172fe98f56 100644 --- a/test/e2e/manifests/mixins/workflow-controller-configmap.yaml +++ b/test/e2e/manifests/mixins/workflow-controller-configmap.yaml @@ -11,7 +11,7 @@ data: podSpecPatch: | terminationGracePeriodSeconds: 3 executor: | - imagePullPolicy: IfNotPresent + imagePullPolicy: Never resources: requests: cpu: 0.1 diff --git a/test/e2e/smoke/basic-2.yaml b/test/e2e/smoke/basic-2.yaml index 26012a21b088..42ec7b03273c 100644 --- a/test/e2e/smoke/basic-2.yaml +++ b/test/e2e/smoke/basic-2.yaml @@ -7,5 +7,4 @@ spec: templates: - name: run-workflow container: - image: argoproj/argosay:v2 - imagePullPolicy: IfNotPresent \ No newline at end of file + image: argoproj/argosay:v2 \ No newline at end of file diff --git a/test/e2e/testdata/wf-default-ns.yaml b/test/e2e/testdata/wf-default-ns.yaml index 72345cb3ead1..613336779cc6 100644 --- a/test/e2e/testdata/wf-default-ns.yaml +++ b/test/e2e/testdata/wf-default-ns.yaml @@ -20,6 +20,5 @@ spec: - name: whalesay container: image: python:alpine3.6 - imagePullPolicy: IfNotPresent command: [ "sh", -c ] args: [ "echo hello" ] \ No newline at end of file diff --git a/test/stress/massive-workflow.yaml b/test/stress/massive-workflow.yaml index 3fb0bcd78adf..a2c17ecd9380 100644 --- a/test/stress/massive-workflow.yaml +++ b/test/stress/massive-workflow.yaml @@ -31,7 +31,6 @@ spec: stress: "true" container: image: argoproj/argosay:v2 - imagePullPolicy: IfNotPresent resources: requests: memory: 2Mi diff --git a/workflow/executor/executor.go b/workflow/executor/executor.go index e95a225988bb..7cbb0516321d 100644 --- a/workflow/executor/executor.go +++ b/workflow/executor/executor.go @@ -1081,10 +1081,10 @@ func (we *WorkflowExecutor) KillSidecars(ctx context.Context) error { sidecarNames = append(sidecarNames, n) } } + log.Infof("Killing sidecars %q", sidecarNames) if len(sidecarNames) == 0 { return nil // exit early as GetTerminationGracePeriodDuration performs `get pod` } - log.Infof("Killing sidecars %s", strings.Join(sidecarNames, ",")) terminationGracePeriodDuration, _ := we.GetTerminationGracePeriodDuration(ctx) return we.RuntimeExecutor.Kill(ctx, sidecarNames, terminationGracePeriodDuration) } diff --git a/workflow/executor/pns/pns.go b/workflow/executor/pns/pns.go index 1f25ef7645c0..205a42ee0dad 100644 --- a/workflow/executor/pns/pns.go +++ b/workflow/executor/pns/pns.go @@ -157,32 +157,6 @@ func (p *PNSExecutor) Wait(ctx context.Context, containerNames []string) error { time.Sleep(1 * time.Second) } -OUTER: - for _, containerName := range containerNames { - pid := p.getContainerPID(containerName) - if pid == 0 { - log.Infof("container %q pid unknown - maybe short running, or late starting container", containerName) - continue - } - log.Infof("Waiting for %q pid %d to complete", containerName, pid) - for { - select { - case <-ctx.Done(): - return ctx.Err() - default: - p, err := gops.FindProcess(pid) - if err != nil { - return fmt.Errorf("failed to find %q process: %w", containerName, err) - } - if p == nil { - log.Infof("%q pid %d completed", containerName, pid) - continue OUTER - } - time.Sleep(time.Second) - } - } - } - return p.K8sAPIExecutor.Wait(ctx, containerNames) } @@ -331,12 +305,15 @@ func (p *PNSExecutor) secureRootFiles() error { return err } - // the main container may have switched (e.g. gone from busybox to the user's container) - if prevInfo, ok := p.pidFileHandles[pid]; ok { - _ = prevInfo.Close() + if p.pidFileHandles[pid] != fs { + // the main container may have switched (e.g. gone from busybox to the user's container) + if prevInfo, ok := p.pidFileHandles[pid]; ok { + _ = prevInfo.Close() + } + p.pidFileHandles[pid] = fs + log.Infof("secured root for pid %d root: %s (%q)", pid, proc.Executable(), fs.Name()) } - p.pidFileHandles[pid] = fs - log.Infof("secured root for pid %d root: %s", pid, proc.Executable()) + containerName, err := containerNameForPID(pid) if err != nil { return err