Skip to content

Commit

Permalink
Cache doc and license storage buckets (#3232)
Browse files Browse the repository at this point in the history
For large repositories, buckets can be slow to iterate over. Use memory
backed buckets to reduce file access for both doc and license files.
This can speed up builds by up to 2/3 on large repositories by avoiding
iterating over os storage buckets. As an example running build of a
single proto file in a large workspace of 96k files takes 1.60s with
`v1.36.0`, 1.06s with `v1.31.0` and 0.56s with this change.
  • Loading branch information
emcfarlane committed Aug 14, 2024
1 parent b063948 commit fd8006c
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 10 deletions.
12 changes: 10 additions & 2 deletions private/buf/bufworkspace/workspace_targeting.go
Original file line number Diff line number Diff line change
Expand Up @@ -557,10 +557,18 @@ func getMappedModuleBucketAndModuleTargeting(
),
)
}
docStorageReadBucket, err := bufmodule.GetDocStorageReadBucket(ctx, moduleBucket)
if err != nil {
return nil, nil, err
}
licenseStorageReadBucket, err := bufmodule.GetLicenseStorageReadBucket(ctx, moduleBucket)
if err != nil {
return nil, nil, err
}
rootBuckets = append(
rootBuckets,
bufmodule.GetDocStorageReadBucket(ctx, moduleBucket),
bufmodule.GetLicenseStorageReadBucket(moduleBucket),
docStorageReadBucket,
licenseStorageReadBucket,
)
mappedModuleBucket := storage.MultiReadBucket(rootBuckets...)
moduleTargeting, err := newModuleTargeting(
Expand Down
36 changes: 28 additions & 8 deletions private/bufpkg/bufmodule/module_read_bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/bufbuild/buf/private/pkg/normalpath"
"github.com/bufbuild/buf/private/pkg/slicesext"
"github.com/bufbuild/buf/private/pkg/storage"
"github.com/bufbuild/buf/private/pkg/storage/storagemem"
"github.com/bufbuild/buf/private/pkg/syserror"
"github.com/bufbuild/protocompile/parser/fastscan"
"go.uber.org/multierr"
Expand Down Expand Up @@ -228,20 +229,39 @@ func GetLicenseFile(ctx context.Context, moduleReadBucket ModuleReadBucket) (Fil
// GetDocStorageReadBucket gets a storage.ReadBucket that just contains the documentation file(s).
//
// This is needed for i.e. using RootToExcludes in NewWorkspaceForBucket.
func GetDocStorageReadBucket(ctx context.Context, bucket storage.ReadBucket) storage.ReadBucket {
return storage.MapReadBucket(
bucket,
storage.MatchPathEqual(getDocFilePathForStorageReadBucket(ctx, bucket)),
func GetDocStorageReadBucket(ctx context.Context, bucket storage.ReadBucket) (storage.ReadBucket, error) {
// Store the documentation file in a new memory bucket for performance reasons.
docFilePath := getDocFilePathForStorageReadBucket(ctx, bucket)
if docFilePath == "" {
return storage.MultiReadBucket(), nil // nop bucket
}
content, err := storage.ReadPath(ctx, bucket, docFilePath)
if err != nil {
return nil, err
}
return storagemem.NewReadBucket(
map[string][]byte{
docFilePath: content,
},
)
}

// GetLicenseStorageReadBucket gets a storage.ReadBucket that just contains the license file(s).
//
// This is needed for i.e. using RootToExcludes in NewWorkspaceForBucket.
func GetLicenseStorageReadBucket(bucket storage.ReadBucket) storage.ReadBucket {
return storage.MapReadBucket(
bucket,
storage.MatchPathEqual(licenseFilePath),
func GetLicenseStorageReadBucket(ctx context.Context, bucket storage.ReadBucket) (storage.ReadBucket, error) {
// Store the license file in a new memory bucket for performance reasons.
content, err := storage.ReadPath(ctx, bucket, licenseFilePath)
if err != nil {
if errors.Is(err, fs.ErrNotExist) {
return storage.MultiReadBucket(), nil // nop bucket
}
return nil, err
}
return storagemem.NewReadBucket(
map[string][]byte{
licenseFilePath: content,
},
)
}

Expand Down

0 comments on commit fd8006c

Please sign in to comment.