From 84df1b0dbe066caed884f9e9558b672f6eb19068 Mon Sep 17 00:00:00 2001 From: James Benton Date: Fri, 27 Sep 2019 15:25:01 +0100 Subject: [PATCH] http: Add support for using SHA1, MD5 or SHA256 hashes. --- cache/cache.go | 13 +++++++++++ cache/disk/disk.go | 24 ++++++-------------- cache/disk/disk_test.go | 50 ++++++++++++++++++++++++++++++++++++----- cache/s3/s3.go | 39 ++++++++++++++++++++++++++++---- server/http.go | 17 ++++++++++---- 5 files changed, 112 insertions(+), 31 deletions(-) diff --git a/cache/cache.go b/cache/cache.go index 061f65424..3488bb4b6 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -1,6 +1,7 @@ package cache import ( + "crypto" "io" ) @@ -45,6 +46,18 @@ func (e *Error) Error() string { return e.Text } +// Get hash type from length of hash string +func GetHashType(hash string) crypto.Hash { + if len(hash) == 32 { + return crypto.MD5 + } else if len(hash) == 40 { + return crypto.SHA1 + } else if len(hash) == 64 { + return crypto.SHA256 + } + return 0 +} + // Cache backends implement this interface, and are optionally used // by DiskCache. CacheProxy implementations are expected to be safe // for concurrent use. diff --git a/cache/disk/disk.go b/cache/disk/disk.go index 5d1bf511d..fafc0c841 100644 --- a/cache/disk/disk.go +++ b/cache/disk/disk.go @@ -1,7 +1,6 @@ package disk import ( - "crypto/sha256" "encoding/hex" "fmt" "io" @@ -58,8 +57,6 @@ type nameAndInfo struct { info os.FileInfo } -const sha256HashStrSize = sha256.Size * 2 // Two hex characters per byte. - // New returns a new instance of a filesystem-based cache rooted at `dir`, // with a maximum size of `maxSizeBytes` bytes and an optional backend `proxy`. // DiskCache is safe for concurrent use. @@ -195,11 +192,9 @@ func (c *DiskCache) loadExistingFiles() error { // a non-nil error is returned. func (c *DiskCache) Put(kind cache.EntryKind, hash string, expectedSize int64, r io.Reader) error { - // The hash format is checked properly in the http/grpc code. - // Just perform a simple/fast check here, to catch bad tests. - if len(hash) != sha256HashStrSize { - return fmt.Errorf("Invalid hash size: %d, expected: %d", - len(hash), sha256.Size) + hashType := cache.GetHashType(hash) + if hashType == 0 { + return fmt.Errorf("Invalid hash: %s", hash) } key := cacheKey(kind, hash) @@ -272,7 +267,7 @@ func (c *DiskCache) Put(kind cache.EntryKind, hash string, expectedSize int64, r var bytesCopied int64 = 0 if kind == cache.CAS { - hasher := sha256.New() + hasher := hashType.New() if bytesCopied, err = io.Copy(io.MultiWriter(f, hasher), r); err != nil { return err } @@ -347,11 +342,8 @@ func (c *DiskCache) availableOrTryProxy(key string) (bool, *lruItem) { // it is returned. func (c *DiskCache) Get(kind cache.EntryKind, hash string) (io.ReadCloser, int64, error) { - // The hash format is checked properly in the http/grpc code. - // Just perform a simple/fast check here, to catch bad tests. - if len(hash) != sha256HashStrSize { - return nil, -1, fmt.Errorf("Invalid hash size: %d, expected: %d", - len(hash), sha256.Size) + if cache.GetHashType(hash) == 0 { + return nil, -1, fmt.Errorf("Invalid hash: %s", hash) } var err error @@ -461,9 +453,7 @@ func (c *DiskCache) Get(kind cache.EntryKind, hash string) (io.ReadCloser, int64 // one) will be checked. func (c *DiskCache) Contains(kind cache.EntryKind, hash string) bool { - // The hash format is checked properly in the http/grpc code. - // Just perform a simple/fast check here, to catch bad tests. - if len(hash) != sha256HashStrSize { + if cache.GetHashType(hash) == 0 { return false } diff --git a/cache/disk/disk_test.go b/cache/disk/disk_test.go index 86934aafb..1970415cf 100644 --- a/cache/disk/disk_test.go +++ b/cache/disk/disk_test.go @@ -68,7 +68,9 @@ func checkItems(cache *DiskCache, expSize int64, expNum int) error { const KEY = "a-key" const CONTENTS = "hello" -const CONTENTS_HASH = "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824" +const CONTENTS_MD5 = "5d41402abc4b2a76b9719d911017c592" +const CONTENTS_SHA1 = "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d" +const CONTENTS_SHA256 = "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824" func TestCacheBasics(t *testing.T) { cacheDir := tempDir(t) @@ -81,7 +83,7 @@ func TestCacheBasics(t *testing.T) { } // Non-existing item - rdr, sizeBytes, err := testCache.Get(cache.CAS, CONTENTS_HASH) + rdr, sizeBytes, err := testCache.Get(cache.CAS, CONTENTS_SHA256) if err != nil { t.Fatal(err) } @@ -90,7 +92,7 @@ func TestCacheBasics(t *testing.T) { } // Add an item - err = testCache.Put(cache.CAS, CONTENTS_HASH, int64(len(CONTENTS)), strings.NewReader(CONTENTS)) + err = testCache.Put(cache.CAS, CONTENTS_SHA256, int64(len(CONTENTS)), strings.NewReader(CONTENTS)) if err != nil { t.Fatal(err) } @@ -103,7 +105,7 @@ func TestCacheBasics(t *testing.T) { } // Get the item back - rdr, sizeBytes, err = testCache.Get(cache.CAS, CONTENTS_HASH) + rdr, sizeBytes, err = testCache.Get(cache.CAS, CONTENTS_SHA256) if err != nil { t.Fatal(err) } @@ -137,7 +139,7 @@ func TestCacheEviction(t *testing.T) { strReader := strings.NewReader(strings.Repeat("a", i)) // Suitably-sized, unique key for these testcases: - key := fmt.Sprintf("%0*d", sha256HashStrSize, i) + key := fmt.Sprintf("%0*d", sha256.Size*2, i) if len(key) != sha256.Size*2 { t.Fatalf("invalid testcase- key length should be %d, not %d: %s", sha256.Size*2, len(key), key) @@ -281,7 +283,7 @@ func TestCacheExistingFiles(t *testing.T) { } // Adding a new file should evict items[0] (the oldest) - err = testCache.Put(cache.CAS, CONTENTS_HASH, int64(len(CONTENTS)), strings.NewReader(CONTENTS)) + err = testCache.Put(cache.CAS, CONTENTS_SHA256, int64(len(CONTENTS)), strings.NewReader(CONTENTS)) if err != nil { t.Fatal(err) } @@ -449,6 +451,42 @@ func TestDistinctKeyspaces(t *testing.T) { } } +func TestCacheHashTypes(t *testing.T) { + // Test that we can use CAS and AC with a MD5 and a SHA1 hash + cacheDir := testutils.TempDir(t) + defer os.RemoveAll(cacheDir) + + cacheSize := int64(1024) + testCache := New(cacheDir, cacheSize, nil) + + var err error + err = putGetCompareBytes(cache.CAS, CONTENTS_MD5, []byte(CONTENTS), testCache) + if err != nil { + t.Fatal(err) + } + + err = putGetCompareBytes(cache.AC, CONTENTS_MD5, []byte(CONTENTS), testCache) + if err != nil { + t.Fatal(err) + } + + err = putGetCompareBytes(cache.CAS, CONTENTS_SHA1, []byte(CONTENTS), testCache) + if err != nil { + t.Fatal(err) + } + + err = putGetCompareBytes(cache.AC, CONTENTS_SHA1, []byte(CONTENTS), testCache) + if err != nil { + t.Fatal(err) + } + + _, numItems := testCache.Stats() + if numItems != 4 { + t.Fatalf("Expected test cache size 4 but was %d", + numItems) + } +} + // Code copied from cache/http/http_test.go type testServer struct { srv *httptest.Server diff --git a/cache/s3/s3.go b/cache/s3/s3.go index 068e4efa9..71a15babe 100644 --- a/cache/s3/s3.go +++ b/cache/s3/s3.go @@ -1,9 +1,14 @@ package s3 import ( + "bytes" "errors" + "crypto" + "encoding/base64" + "encoding/hex" "fmt" "io" + "io/ioutil" "log" "github.com/buchgr/bazel-remote/cache" @@ -99,9 +104,35 @@ func logResponse(log cache.Logger, method, bucket, key string, err error) { } func (c *s3Cache) uploadFile(item uploadReq) { - uploadDigest := "" + uploadDigestSha256 := "" + uploadDigestMd5Base64 := "" if item.kind == cache.CAS { - uploadDigest = item.hash + hashType := cache.GetHashType(item.hash) + if hashType == crypto.SHA256 { + // Use SHA256 hash as-is + uploadDigestSha256 = item.hash + } else if hashType == crypto.MD5 { + // Convert MD5 hex string to base64 encoding + md5Bytes, err := hex.DecodeString(item.hash) + if err != nil { + return + } + uploadDigestMd5Base64 = base64.StdEncoding.EncodeToString(md5Bytes) + } else { + // Hash the data using SHA256 + data, err := ioutil.ReadAll(item.rdr) + if err != nil { + return + } + hasher := crypto.SHA256.New() + if _, err := io.Copy(io.Writer(hasher), bytes.NewBuffer(data)); err != nil { + return + } + uploadDigestSha256 = hex.EncodeToString(hasher.Sum(nil)) + + // Create a new reader as we read item.rdr for hashing + item.rdr = bytes.NewReader(data) + } } _, err := c.mcore.PutObject( @@ -109,8 +140,8 @@ func (c *s3Cache) uploadFile(item uploadReq) { c.objectKey(item.hash, item.kind), // objectName item.rdr, // reader item.size, // objectSize - "", // md5base64 - uploadDigest, // sha256 + uploadDigestMd5Base64, // md5base64 + uploadDigestSha256, // sha256 map[string]string{ "Content-Type": "application/octet-stream", }, // metadata diff --git a/server/http.go b/server/http.go index d0263715f..de601236c 100644 --- a/server/http.go +++ b/server/http.go @@ -2,6 +2,9 @@ package server import ( "bytes" + _ "crypto/md5" + _ "crypto/sha1" + _ "crypto/sha256" "encoding/json" "fmt" "html" @@ -19,7 +22,7 @@ import ( "github.com/golang/protobuf/proto" ) -var blobNameSHA256 = regexp.MustCompile("^/?(.*/)?(ac/|cas/)([a-f0-9]{64})$") +var blobName = regexp.MustCompile("^/?(.*/)?(ac/|cas/)([a-f0-9]{32,64})$") // HTTPCache ... type HTTPCache interface { @@ -69,22 +72,28 @@ func NewHTTPCache(cache *disk.DiskCache, accessLogger cache.Logger, errorLogger // Parse cache artifact information from the request URL func parseRequestURL(url string, validateAC bool) (cache.EntryKind, string, error) { - m := blobNameSHA256.FindStringSubmatch(url) + m := blobName.FindStringSubmatch(url) if m == nil { - err := fmt.Errorf("resource name must be a SHA256 hash in hex. "+ + err := fmt.Errorf("resource name must be a hash in hex. "+ "got '%s'", html.EscapeString(url)) return 0, "", err } parts := m[2:] if len(parts) != 2 { - err := fmt.Errorf("the path '%s' is invalid. expected (ac/|cas/)sha256", + err := fmt.Errorf("the path '%s' is invalid. expected (ac/|cas/)hash", html.EscapeString(url)) return 0, "", err } // The regex ensures that parts[0] can only be "ac/" or "cas/" hash := parts[1] + if cache.GetHashType(hash) == 0 { + err := fmt.Errorf("hash must be md5, sha1 or sha256. "+ + "got '%s'", html.EscapeString(url)) + return 0, "", err + } + if parts[0] == "cas/" { return cache.CAS, hash, nil }