Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

http: Add support for using SHA1, MD5 or SHA256 hashes. #175

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions cache/cache.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package cache

import (
"crypto"
"io"
)

Expand Down Expand Up @@ -45,6 +46,18 @@ func (e *Error) Error() string {
return e.Text
}

// Get hash type from length of hash string
func GetHashType(hash string) crypto.Hash {
if len(hash) == 32 {
return crypto.MD5
} else if len(hash) == 40 {
return crypto.SHA1
} else if len(hash) == 64 {
return crypto.SHA256
}
return 0
}

// Cache backends implement this interface, and are optionally used
// by DiskCache. CacheProxy implementations are expected to be safe
// for concurrent use.
Expand Down
24 changes: 7 additions & 17 deletions cache/disk/disk.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package disk

import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
Expand Down Expand Up @@ -58,8 +57,6 @@ type nameAndInfo struct {
info os.FileInfo
}

const sha256HashStrSize = sha256.Size * 2 // Two hex characters per byte.

// New returns a new instance of a filesystem-based cache rooted at `dir`,
// with a maximum size of `maxSizeBytes` bytes and an optional backend `proxy`.
// DiskCache is safe for concurrent use.
Expand Down Expand Up @@ -195,11 +192,9 @@ func (c *DiskCache) loadExistingFiles() error {
// a non-nil error is returned.
func (c *DiskCache) Put(kind cache.EntryKind, hash string, expectedSize int64, r io.Reader) error {

// The hash format is checked properly in the http/grpc code.
// Just perform a simple/fast check here, to catch bad tests.
if len(hash) != sha256HashStrSize {
return fmt.Errorf("Invalid hash size: %d, expected: %d",
len(hash), sha256.Size)
hashType := cache.GetHashType(hash)
if hashType == 0 {
return fmt.Errorf("Invalid hash: %s", hash)
}

key := cacheKey(kind, hash)
Expand Down Expand Up @@ -272,7 +267,7 @@ func (c *DiskCache) Put(kind cache.EntryKind, hash string, expectedSize int64, r

var bytesCopied int64 = 0
if kind == cache.CAS {
hasher := sha256.New()
hasher := hashType.New()
if bytesCopied, err = io.Copy(io.MultiWriter(f, hasher), r); err != nil {
return err
}
Expand Down Expand Up @@ -347,11 +342,8 @@ func (c *DiskCache) availableOrTryProxy(key string) (bool, *lruItem) {
// it is returned.
func (c *DiskCache) Get(kind cache.EntryKind, hash string) (io.ReadCloser, int64, error) {

// The hash format is checked properly in the http/grpc code.
// Just perform a simple/fast check here, to catch bad tests.
if len(hash) != sha256HashStrSize {
return nil, -1, fmt.Errorf("Invalid hash size: %d, expected: %d",
len(hash), sha256.Size)
if cache.GetHashType(hash) == 0 {
return nil, -1, fmt.Errorf("Invalid hash: %s", hash)
}

var err error
Expand Down Expand Up @@ -461,9 +453,7 @@ func (c *DiskCache) Get(kind cache.EntryKind, hash string) (io.ReadCloser, int64
// one) will be checked.
func (c *DiskCache) Contains(kind cache.EntryKind, hash string) bool {

// The hash format is checked properly in the http/grpc code.
// Just perform a simple/fast check here, to catch bad tests.
if len(hash) != sha256HashStrSize {
if cache.GetHashType(hash) == 0 {
return false
}

Expand Down
50 changes: 44 additions & 6 deletions cache/disk/disk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ func checkItems(cache *DiskCache, expSize int64, expNum int) error {

const KEY = "a-key"
const CONTENTS = "hello"
const CONTENTS_HASH = "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
const CONTENTS_MD5 = "5d41402abc4b2a76b9719d911017c592"
const CONTENTS_SHA1 = "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d"
const CONTENTS_SHA256 = "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"

func TestCacheBasics(t *testing.T) {
cacheDir := tempDir(t)
Expand All @@ -81,7 +83,7 @@ func TestCacheBasics(t *testing.T) {
}

// Non-existing item
rdr, sizeBytes, err := testCache.Get(cache.CAS, CONTENTS_HASH)
rdr, sizeBytes, err := testCache.Get(cache.CAS, CONTENTS_SHA256)
if err != nil {
t.Fatal(err)
}
Expand All @@ -90,7 +92,7 @@ func TestCacheBasics(t *testing.T) {
}

// Add an item
err = testCache.Put(cache.CAS, CONTENTS_HASH, int64(len(CONTENTS)), strings.NewReader(CONTENTS))
err = testCache.Put(cache.CAS, CONTENTS_SHA256, int64(len(CONTENTS)), strings.NewReader(CONTENTS))
if err != nil {
t.Fatal(err)
}
Expand All @@ -103,7 +105,7 @@ func TestCacheBasics(t *testing.T) {
}

// Get the item back
rdr, sizeBytes, err = testCache.Get(cache.CAS, CONTENTS_HASH)
rdr, sizeBytes, err = testCache.Get(cache.CAS, CONTENTS_SHA256)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -137,7 +139,7 @@ func TestCacheEviction(t *testing.T) {
strReader := strings.NewReader(strings.Repeat("a", i))

// Suitably-sized, unique key for these testcases:
key := fmt.Sprintf("%0*d", sha256HashStrSize, i)
key := fmt.Sprintf("%0*d", sha256.Size*2, i)
if len(key) != sha256.Size*2 {
t.Fatalf("invalid testcase- key length should be %d, not %d: %s",
sha256.Size*2, len(key), key)
Expand Down Expand Up @@ -281,7 +283,7 @@ func TestCacheExistingFiles(t *testing.T) {
}

// Adding a new file should evict items[0] (the oldest)
err = testCache.Put(cache.CAS, CONTENTS_HASH, int64(len(CONTENTS)), strings.NewReader(CONTENTS))
err = testCache.Put(cache.CAS, CONTENTS_SHA256, int64(len(CONTENTS)), strings.NewReader(CONTENTS))
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -449,6 +451,42 @@ func TestDistinctKeyspaces(t *testing.T) {
}
}

func TestCacheHashTypes(t *testing.T) {
// Test that we can use CAS and AC with a MD5 and a SHA1 hash
cacheDir := testutils.TempDir(t)
defer os.RemoveAll(cacheDir)

cacheSize := int64(1024)
testCache := New(cacheDir, cacheSize, nil)

var err error
err = putGetCompareBytes(cache.CAS, CONTENTS_MD5, []byte(CONTENTS), testCache)
if err != nil {
t.Fatal(err)
}

err = putGetCompareBytes(cache.AC, CONTENTS_MD5, []byte(CONTENTS), testCache)
if err != nil {
t.Fatal(err)
}

err = putGetCompareBytes(cache.CAS, CONTENTS_SHA1, []byte(CONTENTS), testCache)
if err != nil {
t.Fatal(err)
}

err = putGetCompareBytes(cache.AC, CONTENTS_SHA1, []byte(CONTENTS), testCache)
if err != nil {
t.Fatal(err)
}

_, numItems := testCache.Stats()
if numItems != 4 {
t.Fatalf("Expected test cache size 4 but was %d",
numItems)
}
}

// Code copied from cache/http/http_test.go
type testServer struct {
srv *httptest.Server
Expand Down
39 changes: 35 additions & 4 deletions cache/s3/s3.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
package s3

import (
"bytes"
"errors"
"crypto"
"encoding/base64"
"encoding/hex"
"fmt"
"io"
"io/ioutil"
"log"

"github.com/buchgr/bazel-remote/cache"
Expand Down Expand Up @@ -99,18 +104,44 @@ func logResponse(log cache.Logger, method, bucket, key string, err error) {
}

func (c *s3Cache) uploadFile(item uploadReq) {
uploadDigest := ""
uploadDigestSha256 := ""
uploadDigestMd5Base64 := ""
if item.kind == cache.CAS {
uploadDigest = item.hash
hashType := cache.GetHashType(item.hash)
if hashType == crypto.SHA256 {
// Use SHA256 hash as-is
uploadDigestSha256 = item.hash
} else if hashType == crypto.MD5 {
// Convert MD5 hex string to base64 encoding
md5Bytes, err := hex.DecodeString(item.hash)
if err != nil {
return
}
uploadDigestMd5Base64 = base64.StdEncoding.EncodeToString(md5Bytes)
} else {
// Hash the data using SHA256
data, err := ioutil.ReadAll(item.rdr)
if err != nil {
return
}
hasher := crypto.SHA256.New()
if _, err := io.Copy(io.Writer(hasher), bytes.NewBuffer(data)); err != nil {
return
}
uploadDigestSha256 = hex.EncodeToString(hasher.Sum(nil))

// Create a new reader as we read item.rdr for hashing
item.rdr = bytes.NewReader(data)
}
}

_, err := c.mcore.PutObject(
c.bucket, // bucketName
c.objectKey(item.hash, item.kind), // objectName
item.rdr, // reader
item.size, // objectSize
"", // md5base64
uploadDigest, // sha256
uploadDigestMd5Base64, // md5base64
uploadDigestSha256, // sha256
map[string]string{
"Content-Type": "application/octet-stream",
}, // metadata
Expand Down
17 changes: 13 additions & 4 deletions server/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ package server

import (
"bytes"
_ "crypto/md5"
_ "crypto/sha1"
_ "crypto/sha256"
"encoding/json"
"fmt"
"html"
Expand All @@ -19,7 +22,7 @@ import (
"github.com/golang/protobuf/proto"
)

var blobNameSHA256 = regexp.MustCompile("^/?(.*/)?(ac/|cas/)([a-f0-9]{64})$")
var blobName = regexp.MustCompile("^/?(.*/)?(ac/|cas/)([a-f0-9]{32,64})$")

// HTTPCache ...
type HTTPCache interface {
Expand Down Expand Up @@ -69,22 +72,28 @@ func NewHTTPCache(cache *disk.DiskCache, accessLogger cache.Logger, errorLogger

// Parse cache artifact information from the request URL
func parseRequestURL(url string, validateAC bool) (cache.EntryKind, string, error) {
m := blobNameSHA256.FindStringSubmatch(url)
m := blobName.FindStringSubmatch(url)
if m == nil {
err := fmt.Errorf("resource name must be a SHA256 hash in hex. "+
err := fmt.Errorf("resource name must be a hash in hex. "+
"got '%s'", html.EscapeString(url))
return 0, "", err
}

parts := m[2:]
if len(parts) != 2 {
err := fmt.Errorf("the path '%s' is invalid. expected (ac/|cas/)sha256",
err := fmt.Errorf("the path '%s' is invalid. expected (ac/|cas/)hash",
html.EscapeString(url))
return 0, "", err
}

// The regex ensures that parts[0] can only be "ac/" or "cas/"
hash := parts[1]
if cache.GetHashType(hash) == 0 {
err := fmt.Errorf("hash must be md5, sha1 or sha256. "+
"got '%s'", html.EscapeString(url))
return 0, "", err
}

if parts[0] == "cas/" {
return cache.CAS, hash, nil
}
Expand Down