diff --git a/pkg/apis/workflow/v1alpha1/workflow_types.go b/pkg/apis/workflow/v1alpha1/workflow_types.go index f24021ffd72f..4fdaa6b37781 100644 --- a/pkg/apis/workflow/v1alpha1/workflow_types.go +++ b/pkg/apis/workflow/v1alpha1/workflow_types.go @@ -5,8 +5,11 @@ import ( "fmt" "hash/fnv" "net/url" + "os" "path" + "path/filepath" "reflect" + "regexp" "sort" "strings" "time" @@ -19,6 +22,7 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/wait" + argoerrs "github.com/argoproj/argo-workflows/v3/errors" "github.com/argoproj/argo-workflows/v3/util/slice" ) @@ -904,6 +908,37 @@ type Artifact struct { FromExpression string `json:"fromExpression,omitempty" protobuf:"bytes,11,opt,name=fromExpression"` } +// CleanPath validates and cleans the artifact path. +func (a *Artifact) CleanPath() error { + if a.Path == "" { + return argoerrs.InternalErrorf("Artifact '%s' did not specify a path", a.Name) + } + + // Ensure that the artifact path does not use directory traversal to escape a + // "safe" sub-directory, assuming malicious user input is present. For example: + // inputs: + // artifacts: + // - name: a1 + // path: /tmp/safe/{{ inputs.parameters.user-input }} + // + // Any resolved path should always be within the /tmp/safe/ directory. + safeDir := "" + slashDotDotRe := regexp.MustCompile(fmt.Sprintf(`%c..$`, os.PathSeparator)) + slashDotDotSlash := fmt.Sprintf(`%c..%c`, os.PathSeparator, os.PathSeparator) + if strings.Contains(a.Path, slashDotDotSlash) { + safeDir = a.Path[:strings.Index(a.Path, slashDotDotSlash)] + } else if slashDotDotRe.FindStringIndex(a.Path) != nil { + safeDir = a.Path[:len(a.Path)-3] + } + cleaned := filepath.Clean(a.Path) + safeDirWithSlash := fmt.Sprintf(`%s%c`, safeDir, os.PathSeparator) + if len(safeDir) > 0 && (!strings.HasPrefix(cleaned, safeDirWithSlash) || len(cleaned) <= len(safeDirWithSlash)) { + return argoerrs.InternalErrorf("Artifact '%s' attempted to use a path containing '..'. Directory traversal is not permitted", a.Name) + } + a.Path = cleaned + return nil +} + // PodGC describes how to delete completed pods as they complete type PodGC struct { // Strategy is the strategy to use. One of "OnPodCompletion", "OnPodSuccess", "OnWorkflowCompletion", "OnWorkflowSuccess" diff --git a/pkg/apis/workflow/v1alpha1/workflow_types_test.go b/pkg/apis/workflow/v1alpha1/workflow_types_test.go index 64b87f440e45..43d68c45101b 100644 --- a/pkg/apis/workflow/v1alpha1/workflow_types_test.go +++ b/pkg/apis/workflow/v1alpha1/workflow_types_test.go @@ -116,6 +116,114 @@ func TestWorkflowHappenedBetween(t *testing.T) { })) } +func TestArtifact_ValidatePath(t *testing.T) { + t.Run("empty path fails", func(t *testing.T) { + a1 := Artifact{Name: "a1", Path: ""} + err := a1.CleanPath() + assert.EqualError(t, err, "Artifact 'a1' did not specify a path") + assert.Equal(t, "", a1.Path) + }) + + t.Run("directory traversal above safe base dir fails", func(t *testing.T) { + var assertPathError = func(err error) { + if assert.Error(t, err) { + assert.Contains(t, err.Error(), "Directory traversal is not permitted") + } + } + + a1 := Artifact{Name: "a1", Path: "/tmp/.."} + assertPathError(a1.CleanPath()) + assert.Equal(t, "/tmp/..", a1.Path) + + a2 := Artifact{Name: "a2", Path: "/tmp/../"} + assertPathError(a2.CleanPath()) + assert.Equal(t, "/tmp/../", a2.Path) + + a3 := Artifact{Name: "a3", Path: "/tmp/../../etc/passwd"} + assertPathError(a3.CleanPath()) + assert.Equal(t, "/tmp/../../etc/passwd", a3.Path) + + a4 := Artifact{Name: "a4", Path: "/tmp/../tmp"} + assertPathError(a4.CleanPath()) + assert.Equal(t, "/tmp/../tmp", a4.Path) + + a5 := Artifact{Name: "a5", Path: "/tmp/../tmp/"} + assertPathError(a5.CleanPath()) + assert.Equal(t, "/tmp/../tmp/", a5.Path) + + a6 := Artifact{Name: "a6", Path: "/tmp/subdir/../../tmp/subdir/"} + assertPathError(a6.CleanPath()) + assert.Equal(t, "/tmp/subdir/../../tmp/subdir/", a6.Path) + + a7 := Artifact{Name: "a7", Path: "/tmp/../tmp-imposter"} + assertPathError(a7.CleanPath()) + assert.Equal(t, "/tmp/../tmp-imposter", a7.Path) + }) + + t.Run("directory traversal with no safe base dir succeeds", func(t *testing.T) { + a1 := Artifact{Name: "a1", Path: ".."} + err := a1.CleanPath() + assert.NoError(t, err) + assert.Equal(t, "..", a1.Path) + + a2 := Artifact{Name: "a2", Path: "../"} + err = a2.CleanPath() + assert.NoError(t, err) + assert.Equal(t, "..", a2.Path) + + a3 := Artifact{Name: "a3", Path: "../.."} + err = a3.CleanPath() + assert.NoError(t, err) + assert.Equal(t, "../..", a3.Path) + + a4 := Artifact{Name: "a4", Path: "../etc/passwd"} + err = a4.CleanPath() + assert.NoError(t, err) + assert.Equal(t, "../etc/passwd", a4.Path) + }) + + t.Run("directory traversal ending within safe base dir succeeds", func(t *testing.T) { + a1 := Artifact{Name: "a1", Path: "/tmp/../tmp/abcd"} + err := a1.CleanPath() + assert.NoError(t, err) + assert.Equal(t, "/tmp/abcd", a1.Path) + + a2 := Artifact{Name: "a2", Path: "/tmp/subdir/../../tmp/subdir/abcd"} + err = a2.CleanPath() + assert.NoError(t, err) + assert.Equal(t, "/tmp/subdir/abcd", a2.Path) + }) + + t.Run("artifact path filenames are allowed to contain double dots", func(t *testing.T) { + a1 := Artifact{Name: "a1", Path: "/tmp/..artifact.txt"} + err := a1.CleanPath() + assert.NoError(t, err) + assert.Equal(t, "/tmp/..artifact.txt", a1.Path) + + a2 := Artifact{Name: "a2", Path: "/tmp/artif..t.txt"} + err = a2.CleanPath() + assert.NoError(t, err) + assert.Equal(t, "/tmp/artif..t.txt", a2.Path) + }) + + t.Run("normal artifact path succeeds", func(t *testing.T) { + a1 := Artifact{Name: "a1", Path: "/tmp"} + err := a1.CleanPath() + assert.NoError(t, err) + assert.Equal(t, "/tmp", a1.Path) + + a2 := Artifact{Name: "a2", Path: "/tmp/"} + err = a2.CleanPath() + assert.NoError(t, err) + assert.Equal(t, "/tmp", a2.Path) + + a3 := Artifact{Name: "a3", Path: "/tmp/abcd/some-artifact.txt"} + err = a3.CleanPath() + assert.NoError(t, err) + assert.Equal(t, "/tmp/abcd/some-artifact.txt", a3.Path) + }) +} + func TestArtifactLocation_IsArchiveLogs(t *testing.T) { var l *ArtifactLocation assert.False(t, l.IsArchiveLogs()) diff --git a/workflow/controller/workflowpod.go b/workflow/controller/workflowpod.go index 03cbab01ab93..4929ab071d13 100644 --- a/workflow/controller/workflowpod.go +++ b/workflow/controller/workflowpod.go @@ -960,8 +960,9 @@ func (woc *wfOperationCtx) addInputArtifactsVolumes(pod *apiv1.Pod, tmpl *wfv1.T continue } for _, art := range tmpl.Inputs.Artifacts { - if art.Path == "" { - return errors.Errorf(errors.CodeBadRequest, "inputs.artifacts.%s did not specify a path", art.Name) + err := art.CleanPath() + if err != nil { + return errors.Errorf(errors.CodeBadRequest, "error in inputs.artifacts.%s: %s", art.Name, err.Error()) } if !art.HasLocationOrKey() && art.Optional { woc.log.Infof("skip volume mount of %s (%s): optional artifact was not provided", diff --git a/workflow/executor/executor.go b/workflow/executor/executor.go index 6c3b47cdcc3f..d96cb8ed33a7 100644 --- a/workflow/executor/executor.go +++ b/workflow/executor/executor.go @@ -155,6 +155,10 @@ func (we *WorkflowExecutor) LoadArtifacts(ctx context.Context) error { return argoerrs.Errorf(argoerrs.CodeNotFound, "required artifact '%s' not supplied", art.Name) } } + err := art.CleanPath() + if err != nil { + return err + } driverArt, err := we.newDriverArt(&art) if err != nil { return fmt.Errorf("failed to load artifact '%s': %w", art.Name, err) @@ -164,15 +168,12 @@ func (we *WorkflowExecutor) LoadArtifacts(ctx context.Context) error { return err } // Determine the file path of where to load the artifact - if art.Path == "" { - return argoerrs.InternalErrorf("Artifact %s did not specify a path", art.Name) - } var artPath string mnt := common.FindOverlappingVolume(&we.Template, art.Path) if mnt == nil { artPath = path.Join(common.ExecutorArtifactBaseDir, art.Name) } else { - // If we get here, it means the input artifact path overlaps with an user specified + // If we get here, it means the input artifact path overlaps with a user-specified // volumeMount in the container. Because we also implement input artifacts as volume // mounts, we need to load the artifact into the user specified volume mount, // as opposed to the `input-artifacts` volume that is an implementation detail @@ -286,8 +287,9 @@ func (we *WorkflowExecutor) SaveArtifacts(ctx context.Context) error { func (we *WorkflowExecutor) saveArtifact(ctx context.Context, containerName string, art *wfv1.Artifact) error { // Determine the file path of where to find the artifact - if art.Path == "" { - return argoerrs.InternalErrorf("Artifact %s did not specify a path", art.Name) + err := art.CleanPath() + if err != nil { + return err } fileName, localArtPath, err := we.stageArchiveFile(containerName, art) if err != nil { diff --git a/workflow/executor/executor_test.go b/workflow/executor/executor_test.go index b754aaf40828..27151b10c8f3 100644 --- a/workflow/executor/executor_test.go +++ b/workflow/executor/executor_test.go @@ -34,6 +34,7 @@ func TestWorkflowExecutor_LoadArtifacts(t *testing.T) { {"ErrNotSupplied", wfv1.Artifact{Name: "foo"}, "required artifact 'foo' not supplied"}, {"ErrFailedToLoad", wfv1.Artifact{ Name: "foo", + Path: "/tmp/foo.txt", ArtifactLocation: wfv1.ArtifactLocation{ S3: &wfv1.S3Artifact{ Key: "my-key", @@ -48,7 +49,17 @@ func TestWorkflowExecutor_LoadArtifacts(t *testing.T) { Key: "my-key", }, }, - }, "Artifact foo did not specify a path"}, + }, "Artifact 'foo' did not specify a path"}, + {"ErrDirTraversal", wfv1.Artifact{ + Name: "foo", + Path: "/tmp/../etc/passwd", + ArtifactLocation: wfv1.ArtifactLocation{ + S3: &wfv1.S3Artifact{ + S3Bucket: wfv1.S3Bucket{Endpoint: "my-endpoint", Bucket: "my-bucket"}, + Key: "my-key", + }, + }, + }, "Artifact 'foo' attempted to use a path containing '..'. Directory traversal is not permitted"}, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { diff --git a/workflow/validate/validate.go b/workflow/validate/validate.go index 27894653dc6b..ceffc7799f86 100644 --- a/workflow/validate/validate.go +++ b/workflow/validate/validate.go @@ -526,8 +526,9 @@ func validateInputs(tmpl *wfv1.Template) (map[string]interface{}, error) { artRef := fmt.Sprintf("inputs.artifacts.%s", art.Name) scope[artRef] = true if tmpl.IsLeaf() { - if art.Path == "" { - return nil, errors.Errorf(errors.CodeBadRequest, "templates.%s.%s.path not specified", tmpl.Name, artRef) + err = art.CleanPath() + if err != nil { + return nil, errors.Errorf(errors.CodeBadRequest, "error in templates.%s.%s: %s", tmpl.Name, artRef, err.Error()) } scope[fmt.Sprintf("inputs.artifacts.%s.path", art.Name)] = true } else { @@ -954,8 +955,9 @@ func validateOutputs(scope map[string]interface{}, globalParams map[string]strin for _, art := range tmpl.Outputs.Artifacts { artRef := fmt.Sprintf("outputs.artifacts.%s", art.Name) if tmpl.IsLeaf() { - if art.Path == "" { - return errors.Errorf(errors.CodeBadRequest, "templates.%s.%s.path not specified", tmpl.Name, artRef) + err = art.CleanPath() + if err != nil { + return errors.Errorf(errors.CodeBadRequest, "error in templates.%s.%s: %s", tmpl.Name, artRef, err.Error()) } } else { if art.Path != "" {