From d1073d0dc0fba5140101b5fcf2ddc5abcdf623f7 Mon Sep 17 00:00:00 2001 From: francisco souza <108725+fsouza@users.noreply.github.com> Date: Sat, 27 May 2023 12:12:45 -0400 Subject: [PATCH] upload: use a regexp for handling gsutil special case Also add many tests to try to avoid edge cases. --- fakestorage/upload.go | 33 +++++++----- fakestorage/upload_test.go | 102 +++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 12 deletions(-) diff --git a/fakestorage/upload.go b/fakestorage/upload.go index d0399a550b..3aaeb0f027 100644 --- a/fakestorage/upload.go +++ b/fakestorage/upload.go @@ -15,6 +15,7 @@ import ( "mime/multipart" "net/http" "net/url" + "regexp" "strconv" "strings" "time" @@ -33,6 +34,19 @@ const ( uploadTypeResumable = "resumable" ) +// per RFC 2045, double quotes should be used whenever parameters have a value +// that includes some special character - anything in the set: ()<>@,;:\"/[]?= +// (including space). gsutil likes to use `=` in the boundary, but incorrectly +// quotes it using single quotes. +// +// We do exclude \ and " from the regexp because those are not supported by the +// mime package. +// +// This has been reported to gsutil +// (https://github.com/GoogleCloudPlatform/gsutil/issues/1466). If that issue +// ever gets closed, we should be able to get rid of this hack. +var gsutilBoundary = regexp.MustCompile(`boundary='([^']*[()<>@,;:"/\[\]?= ]+[^']*)'`) + type multipartMetadata struct { ContentType string `json:"contentType"` ContentEncoding string `json:"contentEncoding"` @@ -306,18 +320,7 @@ func getObjectACL(predefinedACL string) []storage.ACLRule { func (s *Server) multipartUpload(bucketName string, r *http.Request) jsonResponse { defer r.Body.Close() - requestContentType := r.Header.Get(contentTypeHeader) - // gsutil is observed to submit incorrectly-quoted bounary strings - // like: boundary='===============5900997287163282353==' - // See https://github.com/GoogleCloudPlatform/gsutil/issues/1466 - // Having an "=" character in the boundary param requires the value be quoted, - // but ' is not a quote char, " is. - // If we see a string like "boundary='=", which is always invalid anyway, - // attempt to rescue the situation by converting all ' to ". - if strings.Contains(requestContentType, "boundary='=") { - requestContentType = strings.ReplaceAll(requestContentType, "'", `"`) - } - _, params, err := mime.ParseMediaType(requestContentType) + params, err := parseContentTypeParams(r.Header.Get(contentTypeHeader)) if err != nil { return jsonResponse{ status: http.StatusBadRequest, @@ -386,6 +389,12 @@ func (s *Server) multipartUpload(bucketName string, r *http.Request) jsonRespons return jsonResponse{data: newObjectResponse(obj.ObjectAttrs)} } +func parseContentTypeParams(requestContentType string) (map[string]string, error) { + requestContentType = gsutilBoundary.ReplaceAllString(requestContentType, `boundary="$1"`) + _, params, err := mime.ParseMediaType(requestContentType) + return params, err +} + func (s *Server) resumableUpload(bucketName string, r *http.Request) jsonResponse { if r.URL.Query().Has("upload_id") { return s.uploadFileContent(r) diff --git a/fakestorage/upload_test.go b/fakestorage/upload_test.go index 2efbae34e6..828f4fb80a 100644 --- a/fakestorage/upload_test.go +++ b/fakestorage/upload_test.go @@ -23,6 +23,7 @@ import ( "cloud.google.com/go/storage" "github.com/fsouza/fake-gcs-server/internal/checksum" + "github.com/google/go-cmp/cmp" "google.golang.org/api/googleapi" ) @@ -1014,3 +1015,104 @@ func isACLPublic(acl []storage.ACLRule) bool { } return false } + +func TestParseContentTypeParams(t *testing.T) { + t.Parallel() + tests := []struct { + name string + input string + expectedParams map[string]string + }{ + { + name: "no boundary", + input: "multipart/related", + expectedParams: map[string]string{}, + }, + { + name: "with boundary", + input: "multipart/related; boundary=something", + expectedParams: map[string]string{"boundary": "something"}, + }, + { + name: "with quoted boundary", + input: `multipart/related; boundary="something"`, + expectedParams: map[string]string{"boundary": "something"}, + }, + { + name: "boundaries that have a single quote, but don't use special chars", + input: `multipart/related; boundary='something'`, + expectedParams: map[string]string{"boundary": "'something'"}, + }, + { + name: "special characters within single quotes", + input: `text/plain; boundary='===============1523364337061494617=='`, + expectedParams: map[string]string{"boundary": "===============1523364337061494617=="}, + }, + { + name: "special characters within single quotes + other parameters", + input: `text/plain; boundary='===============1523364337061494617=='; some=thing`, + expectedParams: map[string]string{"boundary": "===============1523364337061494617==", "some": "thing"}, + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + params, err := parseContentTypeParams(test.input) + if err != nil { + t.Fatal(err) + } + + if diff := cmp.Diff(params, test.expectedParams); diff != "" { + t.Errorf("unexpected params: %v", diff) + } + }) + } +} + +func TestParseContentTypeParamsGsutilEdgeCases(t *testing.T) { + t.Parallel() + + // "hardening" of the regex, to make sure we don't miss anything. As we + // run into bugs, this slice will grow. + testCases := []string{ + "===============5900997287163282353==", + "===============5900997287163282353", + "590099728(7163282353", + "something with spaces", + " ", + "===============5900997287163282353==590099728===", + "(", + ")", + "<", + ">", + "@", + ",", + ";", + ":", + "/", + "[", + "]", + "?", + "=", + } + + for _, testCase := range testCases { + testCase := testCase + t.Run(testCase, func(t *testing.T) { + t.Parallel() + input := fmt.Sprintf("text/plain; a=b; boundary='%s'; c=d", testCase) + expectedParams := map[string]string{"boundary": testCase, "a": "b", "c": "d"} + + params, err := parseContentTypeParams(input) + if err != nil { + t.Fatal(err) + } + + if diff := cmp.Diff(params, expectedParams); diff != "" { + t.Errorf("unexpected params: %v", diff) + } + }) + } +}