Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update objects timestamps on cache hit #542

Merged
merged 4 commits into from
Apr 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .bazelci/system-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ rm -rf $test_cache_dir
--s3.access_key_id minioadmin \
--s3.secret_access_key minioadmin \
--s3.disable_ssl \
--s3.update_timestamps \
> log.stdout 2> log.stderr &
test_cache_pid=$!
echo "Test cache pid: $test_cache_pid"
Expand Down Expand Up @@ -141,6 +142,7 @@ rm -rf $test_cache_dir
--s3.access_key_id minioadmin \
--s3.secret_access_key minioadmin \
--s3.disable_ssl \
--s3.update_timestamps \
> log.stdout 2> log.stderr &
test_cache_pid=$!
echo "Test cache pid: $test_cache_pid"
Expand Down Expand Up @@ -219,6 +221,7 @@ rm -rf $test_cache_dir
--s3.access_key_id minioadmin \
--s3.secret_access_key minioadmin \
--s3.disable_ssl \
--s3.update_timestamps \
> log.stdout 2> log.stderr &
test_cache_pid=$!
echo "Test cache pid: $test_cache_pid"
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,10 @@ OPTIONS:
backend. (default: false, ie enable TLS/SSL)
[$BAZEL_REMOTE_S3_DISABLE_SSL]

--s3.update_timestamps Whether to update timestamps of object on cache
hit. (default: false)
[$BAZEL_REMOTE_S3_UPDATE_TIMESTAMPS]

--s3.iam_role_endpoint value Endpoint for using IAM security credentials.
By default it will look for credentials in the standard locations for the
AWS platform. Applies to s3 auth method(s): iam_role.
Expand Down
4 changes: 3 additions & 1 deletion cache/s3proxy/minio-test-setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ production use.
--s3.auth_method access_key \
--s3.access_key_id minioadmin \
--s3.secret_access_key minioadmin \
--s3.disable_ssl
--s3.disable_ssl \
--s3.update_timestamps


## Build something with the bazel-remote cache

Expand Down
52 changes: 38 additions & 14 deletions cache/s3proxy/s3proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@ type uploadReq struct {
}

type s3Cache struct {
mcore *minio.Core
prefix string
bucket string
uploadQueue chan<- uploadReq
accessLogger cache.Logger
errorLogger cache.Logger
v2mode bool
objectKey func(hash string, kind cache.EntryKind) string
mcore *minio.Core
prefix string
bucket string
uploadQueue chan<- uploadReq
accessLogger cache.Logger
errorLogger cache.Logger
v2mode bool
updateTimestamps bool
objectKey func(hash string, kind cache.EntryKind) string
}

var (
Expand All @@ -56,6 +57,7 @@ func New(
Prefix string,
Credentials *credentials.Credentials,
DisableSSL bool,
UpdateTimestamps bool,
Region string,

storageMode string, accessLogger cache.Logger,
Expand Down Expand Up @@ -88,12 +90,13 @@ func New(
}

c := &s3Cache{
mcore: minioCore,
prefix: Prefix,
bucket: Bucket,
accessLogger: accessLogger,
errorLogger: errorLogger,
v2mode: storageMode == "zstd",
mcore: minioCore,
prefix: Prefix,
bucket: Bucket,
accessLogger: accessLogger,
errorLogger: errorLogger,
v2mode: storageMode == "zstd",
updateTimestamps: UpdateTimestamps,
}

if c.v2mode {
Expand Down Expand Up @@ -196,6 +199,23 @@ func (c *s3Cache) Put(ctx context.Context, kind cache.EntryKind, hash string, si
}
}

func (c *s3Cache) UpdateModificationTimestamp(ctx context.Context, bucket string, object string) {
src := minio.CopySrcOptions{
Bucket: bucket,
Object: object,
}

dst := minio.CopyDestOptions{
Bucket: bucket,
Object: object,
ReplaceMetadata: true,
}

_, err := c.mcore.ComposeObject(context.Background(), dst, src)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that the copying is done server side- nice. Are there any downsides to having this always on?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question - I don't see any downsides in terms of performance or storage usage but I'm running self hosted s3 storage. As I understand AWS s3 pricing documentation there is some cost for coping object - you pay the S3 charges for storage in the selected destination S3 storage classes, for the primary copy, for replication PUT request. So making this turn on by default could potentially generate costs for people using AWS so maybe it's better to leave it as opt-in?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, let's make it opt-in to be on the safe side.


logResponse(c.accessLogger, "COMPOSE", bucket, object, err)
}

func (c *s3Cache) Get(ctx context.Context, kind cache.EntryKind, hash string) (io.ReadCloser, int64, error) {

rc, info, _, err := c.mcore.GetObject(
Expand All @@ -216,6 +236,10 @@ func (c *s3Cache) Get(ctx context.Context, kind cache.EntryKind, hash string) (i
}
cacheHits.Inc()

if c.updateTimestamps {
c.UpdateModificationTimestamp(ctx, c.bucket, c.objectKey(hash, kind))
}

logResponse(c.accessLogger, "DOWNLOAD", c.bucket, c.objectKey(hash, kind), nil)

if kind == cache.CAS && c.v2mode {
Expand Down
1 change: 1 addition & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,7 @@ func get(ctx *cli.Context) (*Config, error) {
AccessKeyID: ctx.String("s3.access_key_id"),
SecretAccessKey: ctx.String("s3.secret_access_key"),
DisableSSL: ctx.Bool("s3.disable_ssl"),
UpdateTimestamps: ctx.Bool("s3.update_timestamps"),
mostynb marked this conversation as resolved.
Show resolved Hide resolved
IAMRoleEndpoint: ctx.String("s3.iam_role_endpoint"),
Region: ctx.String("s3.region"),
AWSProfile: ctx.String("s3.aws_profile"),
Expand Down
1 change: 1 addition & 0 deletions config/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ func (c *Config) setProxy() error {
c.S3CloudStorage.Prefix,
creds,
c.S3CloudStorage.DisableSSL,
c.S3CloudStorage.UpdateTimestamps,
c.S3CloudStorage.Region,
c.StorageMode, c.AccessLogger, c.ErrorLogger, c.NumUploaders, c.MaxQueuedUploads)
return nil
Expand Down
1 change: 1 addition & 0 deletions config/s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ type S3CloudStorageConfig struct {
AccessKeyID string `yaml:"access_key_id"`
SecretAccessKey string `yaml:"secret_access_key"`
DisableSSL bool `yaml:"disable_ssl"`
UpdateTimestamps bool `yaml:"update_timestamps"`
IAMRoleEndpoint string `yaml:"iam_role_endpoint"`
Region string `yaml:"region"`
KeyVersion *int `yaml:"key_version"`
Expand Down
6 changes: 6 additions & 0 deletions utils/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,12 @@ func GetCliFlags() []cli.Flag {
DefaultText: "false, ie enable TLS/SSL",
EnvVars: []string{"BAZEL_REMOTE_S3_DISABLE_SSL"},
},
&cli.BoolFlag{
Name: "s3.update_timestamps",
Usage: "Whether to update timestamps of object on cache hit.",
DefaultText: "false",
EnvVars: []string{"BAZEL_REMOTE_S3_UPDATE_TIMESTAMPS"},
},
&cli.StringFlag{
Name: "s3.iam_role_endpoint",
Value: "",
Expand Down