From fd8006c2a4fbf62b4a88796116f1fca2f0eaaac4 Mon Sep 17 00:00:00 2001 From: Edward McFarlane <3036610+emcfarlane@users.noreply.github.com> Date: Wed, 14 Aug 2024 13:16:19 -0400 Subject: [PATCH] Cache doc and license storage buckets (#3232) For large repositories, buckets can be slow to iterate over. Use memory backed buckets to reduce file access for both doc and license files. This can speed up builds by up to 2/3 on large repositories by avoiding iterating over os storage buckets. As an example running build of a single proto file in a large workspace of 96k files takes 1.60s with `v1.36.0`, 1.06s with `v1.31.0` and 0.56s with this change. --- .../buf/bufworkspace/workspace_targeting.go | 12 +++++-- .../bufpkg/bufmodule/module_read_bucket.go | 36 ++++++++++++++----- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/private/buf/bufworkspace/workspace_targeting.go b/private/buf/bufworkspace/workspace_targeting.go index db2abf4d8b..351bca7eb7 100644 --- a/private/buf/bufworkspace/workspace_targeting.go +++ b/private/buf/bufworkspace/workspace_targeting.go @@ -557,10 +557,18 @@ func getMappedModuleBucketAndModuleTargeting( ), ) } + docStorageReadBucket, err := bufmodule.GetDocStorageReadBucket(ctx, moduleBucket) + if err != nil { + return nil, nil, err + } + licenseStorageReadBucket, err := bufmodule.GetLicenseStorageReadBucket(ctx, moduleBucket) + if err != nil { + return nil, nil, err + } rootBuckets = append( rootBuckets, - bufmodule.GetDocStorageReadBucket(ctx, moduleBucket), - bufmodule.GetLicenseStorageReadBucket(moduleBucket), + docStorageReadBucket, + licenseStorageReadBucket, ) mappedModuleBucket := storage.MultiReadBucket(rootBuckets...) moduleTargeting, err := newModuleTargeting( diff --git a/private/bufpkg/bufmodule/module_read_bucket.go b/private/bufpkg/bufmodule/module_read_bucket.go index 411ae506d9..4c346b4e32 100644 --- a/private/bufpkg/bufmodule/module_read_bucket.go +++ b/private/bufpkg/bufmodule/module_read_bucket.go @@ -26,6 +26,7 @@ import ( "github.com/bufbuild/buf/private/pkg/normalpath" "github.com/bufbuild/buf/private/pkg/slicesext" "github.com/bufbuild/buf/private/pkg/storage" + "github.com/bufbuild/buf/private/pkg/storage/storagemem" "github.com/bufbuild/buf/private/pkg/syserror" "github.com/bufbuild/protocompile/parser/fastscan" "go.uber.org/multierr" @@ -228,20 +229,39 @@ func GetLicenseFile(ctx context.Context, moduleReadBucket ModuleReadBucket) (Fil // GetDocStorageReadBucket gets a storage.ReadBucket that just contains the documentation file(s). // // This is needed for i.e. using RootToExcludes in NewWorkspaceForBucket. -func GetDocStorageReadBucket(ctx context.Context, bucket storage.ReadBucket) storage.ReadBucket { - return storage.MapReadBucket( - bucket, - storage.MatchPathEqual(getDocFilePathForStorageReadBucket(ctx, bucket)), +func GetDocStorageReadBucket(ctx context.Context, bucket storage.ReadBucket) (storage.ReadBucket, error) { + // Store the documentation file in a new memory bucket for performance reasons. + docFilePath := getDocFilePathForStorageReadBucket(ctx, bucket) + if docFilePath == "" { + return storage.MultiReadBucket(), nil // nop bucket + } + content, err := storage.ReadPath(ctx, bucket, docFilePath) + if err != nil { + return nil, err + } + return storagemem.NewReadBucket( + map[string][]byte{ + docFilePath: content, + }, ) } // GetLicenseStorageReadBucket gets a storage.ReadBucket that just contains the license file(s). // // This is needed for i.e. using RootToExcludes in NewWorkspaceForBucket. -func GetLicenseStorageReadBucket(bucket storage.ReadBucket) storage.ReadBucket { - return storage.MapReadBucket( - bucket, - storage.MatchPathEqual(licenseFilePath), +func GetLicenseStorageReadBucket(ctx context.Context, bucket storage.ReadBucket) (storage.ReadBucket, error) { + // Store the license file in a new memory bucket for performance reasons. + content, err := storage.ReadPath(ctx, bucket, licenseFilePath) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return storage.MultiReadBucket(), nil // nop bucket + } + return nil, err + } + return storagemem.NewReadBucket( + map[string][]byte{ + licenseFilePath: content, + }, ) }