Skip to content
This repository has been archived by the owner on Jun 5, 2024. It is now read-only.

Commit

Permalink
Cache uncompressed data section as well (#98)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonjohnsonjr authored Aug 19, 2023
1 parent 6d91e7d commit bec5668
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 50 deletions.
8 changes: 3 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@ go 1.20

require (
github.com/hashicorp/go-retryablehttp v0.7.4
github.com/klauspost/compress v1.16.7
github.com/psanford/memfs v0.0.0-20230130182539-4dbf7e3e865e
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.8.4
gitlab.alpinelinux.org/alpine/go v0.7.0
go.lsp.dev/uri v0.3.0
go.opentelemetry.io/otel v1.16.0
go.opentelemetry.io/otel/trace v1.16.0
golang.org/x/build v0.0.0-20220928220451-9294235e16f5
golang.org/x/sync v0.3.0
golang.org/x/sys v0.9.0
Expand All @@ -19,14 +22,9 @@ require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-logr/logr v1.2.4 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/klauspost/compress v1.16.7 // indirect
github.com/klauspost/readahead v1.4.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
go.opentelemetry.io/otel v1.16.0 // indirect
go.opentelemetry.io/otel/metric v1.16.0 // indirect
go.opentelemetry.io/otel/trace v1.16.0 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
3 changes: 0 additions & 3 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI=
Expand All @@ -20,8 +19,6 @@ github.com/hashicorp/go-retryablehttp v0.7.4 h1:ZQgVdpTdAL7WpMIwLzCfbalOcSUdkDZn
github.com/hashicorp/go-retryablehttp v0.7.4/go.mod h1:Jy/gPYAdjqffZ/yFGCFV2doI5wjtH1ewM9u8iYVjtX8=
github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/klauspost/readahead v1.4.0 h1:w4hQ3BpdLjBnRQkZyNi+nwdHU7eGP9buTexWK9lU7gY=
github.com/klauspost/readahead v1.4.0/go.mod h1:7bolpMKhT5LKskLwYXGSDOyA2TYtMFgdgV0Y8gy7QhA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/psanford/memfs v0.0.0-20230130182539-4dbf7e3e865e h1:51xcRlSMBU5rhM9KahnJGfEsBPVPz3182TgFRowA8yY=
Expand Down
110 changes: 78 additions & 32 deletions pkg/apk/expandapk.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"os"
"path/filepath"
"strings"
"sync"

"github.com/klauspost/compress/gzip"

Expand All @@ -46,47 +45,80 @@ type APKExpanded struct {
// The control data filename (a.k.a. ".PKGINFO") in tar.gz format
ControlFile string

// The package data filename in tar.gz format
// The package data filename in .tar.gz format
PackageFile string

// The package data filename in .tar format.
tarFile string

ControlHash []byte
PackageHash []byte

sync.Mutex
packageData io.ReadCloser
}

const meg = 1 << 20

func (a *APKExpanded) SetPackageData(rc io.ReadCloser) {
a.Lock()
defer a.Unlock()
// This exists so we can have a bufio.ReadCloser.
type readCloser struct {
io.Reader
CloseFunc func() error
}

a.packageData = rc
// Close implements io.ReadCloser
func (rc *readCloser) Close() error {
return rc.CloseFunc()
}

const meg = 1 << 20

func (a *APKExpanded) PackageData() (io.ReadCloser, error) {
a.Lock()
defer a.Unlock()
// Use min(1MB, a.Size) bufio to avoid GC pressure for small packages.
bufSize := meg
if total := int(a.Size); total != 0 && total < bufSize {
bufSize = total
}

if a.packageData != nil {
rc := a.packageData
a.packageData = nil
return rc, nil
uf, err := os.Open(a.tarFile)
if err == nil {
return &readCloser{
Reader: bufio.NewReaderSize(uf, bufSize),
CloseFunc: uf.Close,
}, nil
} else if !os.IsNotExist(err) {
return nil, fmt.Errorf("opening package data file: %w", err)
}

// Handle old caches without the uncompressed file.
f, err := os.Open(a.PackageFile)
if err != nil {
return nil, err
return nil, fmt.Errorf("opening %q: %w", a.PackageFile, err)
}

// Use min(1MB, a.Size) bufio to avoid GC pressure for small packages.
bufSize := meg
if total := int(a.Size); total != 0 && total < bufSize {
bufSize = total
br := bufio.NewReaderSize(f, bufSize)
zr, err := gzip.NewReader(br)
if err != nil {
return nil, fmt.Errorf("parsing %q: %w", a.PackageFile, err)
}

return gzip.NewReader(bufio.NewReaderSize(f, bufSize))
uf, err = os.Create(a.tarFile)
if err != nil {
return nil, fmt.Errorf("opening tar file %q: %w", a.tarFile, err)
}
bw := bufio.NewWriterSize(uf, 1<<20)
if _, err := io.Copy(bw, zr); err != nil {
return nil, fmt.Errorf("expanding %q: %w", a.PackageFile, err)
}
if err := bw.Flush(); err != nil {
return nil, fmt.Errorf("flushing %q: %w", a.tarFile, err)
}

if _, err := uf.Seek(0, io.SeekStart); err != nil {
return nil, fmt.Errorf("seeking %q: %w", a.tarFile, err)
}

br.Reset(uf)

return &readCloser{
Reader: br,
CloseFunc: uf.Close,
}, nil
}

func (a *APKExpanded) APK() (io.ReadCloser, error) {
Expand Down Expand Up @@ -128,14 +160,10 @@ func (m *multiReadCloser) Close() error {
}

func (a *APKExpanded) Close() error {
errs := []error{}
if a.packageData != nil {
errs = append(errs, a.packageData.Close())
}
if a.tempDir != "" {
errs = append(errs, os.RemoveAll(a.tempDir))
if a.tempDir == "" {
return nil
}
return errors.Join(errs...)
return os.RemoveAll(a.tempDir)
}

// An implementation of io.Writer designed specifically for use in the expandApk() method.
Expand Down Expand Up @@ -338,13 +366,29 @@ func ExpandApk(ctx context.Context, source io.Reader, cacheDir string) (*APKExpa
hashes = append(hashes, h.Sum(nil))
gzipStreams = append(gzipStreams, sw.CurrentName())
} else {
if err := checkSums(ctx, gzi); err != nil {
// While we verify checksums, also tee the tar to a separate file.
tarfilename := strings.TrimSuffix(sw.CurrentName(), ".gz")
tarfile, err := os.Create(tarfilename)
if err != nil {
return nil, fmt.Errorf("opening tar file: %w", err)
}
bw := bufio.NewWriterSize(tarfile, 1<<20)
tr := io.TeeReader(gzi, bw)

if err := checkSums(ctx, tr); err != nil {
return nil, fmt.Errorf("checking sums: %w", err)
}
if _, err := io.Copy(io.Discard, gzi); err != nil {
if _, err := io.Copy(io.Discard, tr); err != nil {
return nil, fmt.Errorf("expandApk error 3: %w", err)
}

if err := bw.Flush(); err != nil {
return nil, fmt.Errorf("flushing tarfile: %w", err)
}

if err := tarfile.Close(); err != nil {
return nil, fmt.Errorf("closing tarfile: %w", err)
}
gzipStreams = append(gzipStreams, sw.CurrentName())
hashes = append(hashes, h.Sum(nil))
break
Expand Down Expand Up @@ -395,6 +439,8 @@ func ExpandApk(ctx context.Context, source io.Reader, cacheDir string) (*APKExpa
expanded.SignatureFile = gzipStreams[0]
}

expanded.tarFile = strings.TrimSuffix(expanded.PackageFile, ".gz")

return &expanded, nil
}

Expand Down
18 changes: 8 additions & 10 deletions pkg/apk/implementation.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ import (
apkfs "github.com/chainguard-dev/go-apk/pkg/fs"
logger "github.com/chainguard-dev/go-apk/pkg/logger"
"github.com/hashicorp/go-retryablehttp"
"github.com/klauspost/readahead"
)

type APK struct {
Expand Down Expand Up @@ -543,15 +542,6 @@ func (a *APK) FixateWorld(ctx context.Context, sourceDateEpoch *time.Time) error
return fmt.Errorf("expanding %s: %w", pkg.Name, err)
}

pd, err := exp.PackageData()
if err != nil {
return fmt.Errorf("package data: %w", err)
}

// Start gunzipping this ahead of time so we can install it faster.
// We may want to tune these numbers a bit based on package size or count.
exp.SetPackageData(readahead.NewReadCloser(pd))

expanded[i] = exp
close(done[i])

Expand Down Expand Up @@ -680,6 +670,12 @@ func (a *APK) cachePackage(ctx context.Context, pkg *repository.RepositoryPackag

exp.PackageFile = datDst

tarDst := strings.TrimSuffix(exp.PackageFile, ".gz")
if err := os.Rename(exp.tarFile, tarDst); err != nil {
return nil, fmt.Errorf("renaming control file: %w", err)
}
exp.tarFile = tarDst

return exp, nil
}

Expand Down Expand Up @@ -742,6 +738,8 @@ func (a *APK) cachedPackage(ctx context.Context, pkg *repository.RepositoryPacka
return nil, err
}

exp.tarFile = strings.TrimSuffix(exp.PackageFile, ".gz")

return &exp, nil
}

Expand Down

0 comments on commit bec5668

Please sign in to comment.