diff --git a/go.mod b/go.mod index e51cab5..ace8bd5 100644 --- a/go.mod +++ b/go.mod @@ -4,11 +4,14 @@ go 1.20 require ( github.com/hashicorp/go-retryablehttp v0.7.4 + github.com/klauspost/compress v1.16.7 github.com/psanford/memfs v0.0.0-20230130182539-4dbf7e3e865e github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.8.4 gitlab.alpinelinux.org/alpine/go v0.7.0 go.lsp.dev/uri v0.3.0 + go.opentelemetry.io/otel v1.16.0 + go.opentelemetry.io/otel/trace v1.16.0 golang.org/x/build v0.0.0-20220928220451-9294235e16f5 golang.org/x/sync v0.3.0 golang.org/x/sys v0.9.0 @@ -19,14 +22,9 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/go-logr/logr v1.2.4 // indirect github.com/go-logr/stdr v1.2.2 // indirect - github.com/google/go-cmp v0.5.9 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect - github.com/klauspost/compress v1.16.7 // indirect - github.com/klauspost/readahead v1.4.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - go.opentelemetry.io/otel v1.16.0 // indirect go.opentelemetry.io/otel/metric v1.16.0 // indirect - go.opentelemetry.io/otel/trace v1.16.0 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 188457a..69b7f3a 100644 --- a/go.sum +++ b/go.sum @@ -11,7 +11,6 @@ github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI= @@ -20,8 +19,6 @@ github.com/hashicorp/go-retryablehttp v0.7.4 h1:ZQgVdpTdAL7WpMIwLzCfbalOcSUdkDZn github.com/hashicorp/go-retryablehttp v0.7.4/go.mod h1:Jy/gPYAdjqffZ/yFGCFV2doI5wjtH1ewM9u8iYVjtX8= github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= -github.com/klauspost/readahead v1.4.0 h1:w4hQ3BpdLjBnRQkZyNi+nwdHU7eGP9buTexWK9lU7gY= -github.com/klauspost/readahead v1.4.0/go.mod h1:7bolpMKhT5LKskLwYXGSDOyA2TYtMFgdgV0Y8gy7QhA= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/psanford/memfs v0.0.0-20230130182539-4dbf7e3e865e h1:51xcRlSMBU5rhM9KahnJGfEsBPVPz3182TgFRowA8yY= diff --git a/pkg/apk/expandapk.go b/pkg/apk/expandapk.go index 7bab7ee..9981da8 100644 --- a/pkg/apk/expandapk.go +++ b/pkg/apk/expandapk.go @@ -20,7 +20,6 @@ import ( "os" "path/filepath" "strings" - "sync" "github.com/klauspost/compress/gzip" @@ -46,47 +45,80 @@ type APKExpanded struct { // The control data filename (a.k.a. ".PKGINFO") in tar.gz format ControlFile string - // The package data filename in tar.gz format + // The package data filename in .tar.gz format PackageFile string + // The package data filename in .tar format. + tarFile string + ControlHash []byte PackageHash []byte - - sync.Mutex - packageData io.ReadCloser } -const meg = 1 << 20 - -func (a *APKExpanded) SetPackageData(rc io.ReadCloser) { - a.Lock() - defer a.Unlock() +// This exists so we can have a bufio.ReadCloser. +type readCloser struct { + io.Reader + CloseFunc func() error +} - a.packageData = rc +// Close implements io.ReadCloser +func (rc *readCloser) Close() error { + return rc.CloseFunc() } +const meg = 1 << 20 + func (a *APKExpanded) PackageData() (io.ReadCloser, error) { - a.Lock() - defer a.Unlock() + // Use min(1MB, a.Size) bufio to avoid GC pressure for small packages. + bufSize := meg + if total := int(a.Size); total != 0 && total < bufSize { + bufSize = total + } - if a.packageData != nil { - rc := a.packageData - a.packageData = nil - return rc, nil + uf, err := os.Open(a.tarFile) + if err == nil { + return &readCloser{ + Reader: bufio.NewReaderSize(uf, bufSize), + CloseFunc: uf.Close, + }, nil + } else if !os.IsNotExist(err) { + return nil, fmt.Errorf("opening package data file: %w", err) } + // Handle old caches without the uncompressed file. f, err := os.Open(a.PackageFile) if err != nil { - return nil, err + return nil, fmt.Errorf("opening %q: %w", a.PackageFile, err) } - // Use min(1MB, a.Size) bufio to avoid GC pressure for small packages. - bufSize := meg - if total := int(a.Size); total != 0 && total < bufSize { - bufSize = total + br := bufio.NewReaderSize(f, bufSize) + zr, err := gzip.NewReader(br) + if err != nil { + return nil, fmt.Errorf("parsing %q: %w", a.PackageFile, err) } - return gzip.NewReader(bufio.NewReaderSize(f, bufSize)) + uf, err = os.Create(a.tarFile) + if err != nil { + return nil, fmt.Errorf("opening tar file %q: %w", a.tarFile, err) + } + bw := bufio.NewWriterSize(uf, 1<<20) + if _, err := io.Copy(bw, zr); err != nil { + return nil, fmt.Errorf("expanding %q: %w", a.PackageFile, err) + } + if err := bw.Flush(); err != nil { + return nil, fmt.Errorf("flushing %q: %w", a.tarFile, err) + } + + if _, err := uf.Seek(0, io.SeekStart); err != nil { + return nil, fmt.Errorf("seeking %q: %w", a.tarFile, err) + } + + br.Reset(uf) + + return &readCloser{ + Reader: br, + CloseFunc: uf.Close, + }, nil } func (a *APKExpanded) APK() (io.ReadCloser, error) { @@ -128,14 +160,10 @@ func (m *multiReadCloser) Close() error { } func (a *APKExpanded) Close() error { - errs := []error{} - if a.packageData != nil { - errs = append(errs, a.packageData.Close()) - } - if a.tempDir != "" { - errs = append(errs, os.RemoveAll(a.tempDir)) + if a.tempDir == "" { + return nil } - return errors.Join(errs...) + return os.RemoveAll(a.tempDir) } // An implementation of io.Writer designed specifically for use in the expandApk() method. @@ -338,13 +366,29 @@ func ExpandApk(ctx context.Context, source io.Reader, cacheDir string) (*APKExpa hashes = append(hashes, h.Sum(nil)) gzipStreams = append(gzipStreams, sw.CurrentName()) } else { - if err := checkSums(ctx, gzi); err != nil { + // While we verify checksums, also tee the tar to a separate file. + tarfilename := strings.TrimSuffix(sw.CurrentName(), ".gz") + tarfile, err := os.Create(tarfilename) + if err != nil { + return nil, fmt.Errorf("opening tar file: %w", err) + } + bw := bufio.NewWriterSize(tarfile, 1<<20) + tr := io.TeeReader(gzi, bw) + + if err := checkSums(ctx, tr); err != nil { return nil, fmt.Errorf("checking sums: %w", err) } - if _, err := io.Copy(io.Discard, gzi); err != nil { + if _, err := io.Copy(io.Discard, tr); err != nil { return nil, fmt.Errorf("expandApk error 3: %w", err) } + if err := bw.Flush(); err != nil { + return nil, fmt.Errorf("flushing tarfile: %w", err) + } + + if err := tarfile.Close(); err != nil { + return nil, fmt.Errorf("closing tarfile: %w", err) + } gzipStreams = append(gzipStreams, sw.CurrentName()) hashes = append(hashes, h.Sum(nil)) break @@ -395,6 +439,8 @@ func ExpandApk(ctx context.Context, source io.Reader, cacheDir string) (*APKExpa expanded.SignatureFile = gzipStreams[0] } + expanded.tarFile = strings.TrimSuffix(expanded.PackageFile, ".gz") + return &expanded, nil } diff --git a/pkg/apk/implementation.go b/pkg/apk/implementation.go index 8f730a2..1e8aea3 100644 --- a/pkg/apk/implementation.go +++ b/pkg/apk/implementation.go @@ -43,7 +43,6 @@ import ( apkfs "github.com/chainguard-dev/go-apk/pkg/fs" logger "github.com/chainguard-dev/go-apk/pkg/logger" "github.com/hashicorp/go-retryablehttp" - "github.com/klauspost/readahead" ) type APK struct { @@ -543,15 +542,6 @@ func (a *APK) FixateWorld(ctx context.Context, sourceDateEpoch *time.Time) error return fmt.Errorf("expanding %s: %w", pkg.Name, err) } - pd, err := exp.PackageData() - if err != nil { - return fmt.Errorf("package data: %w", err) - } - - // Start gunzipping this ahead of time so we can install it faster. - // We may want to tune these numbers a bit based on package size or count. - exp.SetPackageData(readahead.NewReadCloser(pd)) - expanded[i] = exp close(done[i]) @@ -680,6 +670,12 @@ func (a *APK) cachePackage(ctx context.Context, pkg *repository.RepositoryPackag exp.PackageFile = datDst + tarDst := strings.TrimSuffix(exp.PackageFile, ".gz") + if err := os.Rename(exp.tarFile, tarDst); err != nil { + return nil, fmt.Errorf("renaming control file: %w", err) + } + exp.tarFile = tarDst + return exp, nil } @@ -742,6 +738,8 @@ func (a *APK) cachedPackage(ctx context.Context, pkg *repository.RepositoryPacka return nil, err } + exp.tarFile = strings.TrimSuffix(exp.PackageFile, ".gz") + return &exp, nil }