Skip to content
This repository has been archived by the owner on Jun 5, 2024. It is now read-only.

Commit

Permalink
Rework how we cache APKs (#77)
Browse files Browse the repository at this point in the history
* Rework how we cache APKs

Rather than caching the entire APK, we will cache the APK sections
separately. It is trivial to recombine them using cat to produce the
exact original APK, so we don't lose any data.

Doing this gives us two useful things:

1. The process of splitting the APK is relatively expensive, as we have
   to parse the targz stream to know where to split, exactly.
2. The individual sections have content hashes in the APK data model,
   so we can use those hashes as keys in the cache (filenames).
   When we write to the cache, we compute the hashes ourselves, so we
   get cache invalidation for free.

Signed-off-by: Jon Johnson <jon.johnson@chainguard.dev>

* Add context around generated checksum to tests

This will make it possible to regenerate the Checksum byte slice if
things change for any reason.

Signed-off-by: Jon Johnson <jon.johnson@chainguard.dev>

---------

Signed-off-by: Jon Johnson <jon.johnson@chainguard.dev>
  • Loading branch information
jonjohnsonjr authored Jul 8, 2023
1 parent b216a5e commit eda0bb8
Show file tree
Hide file tree
Showing 6 changed files with 368 additions and 87 deletions.
66 changes: 49 additions & 17 deletions pkg/apk/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"os"
"path/filepath"
"strings"

"gitlab.alpinelinux.org/alpine/go/pkg/repository"
)

// cache
Expand Down Expand Up @@ -52,23 +54,21 @@ func (t *cacheTransport) RoundTrip(request *http.Request) (*http.Response, error
if request.URL == nil {
return nil, fmt.Errorf("no URL in request")
}
cacheFile, err := t.cachePathFromURL(*request.URL)
cacheFile, err := cachePathFromURL(t.root, *request.URL)
if err != nil {
return nil, fmt.Errorf("invalid cache path based on URL: %w", err)
}

// If an etag isn't required, then check the cache based on a simple
// filename-based naming scheme.
if !t.etagRequired {
// Try to open the file in the cache, and if we hit an error then
// try to populate the file in the cache.
// We don't cache the response for these because they get cached later in cachePackage.

// Try to open the file in the cache.
// If we hit an error, just send the request.
f, err := os.Open(cacheFile)
if err != nil {
return t.retrieveAndSaveFile(request, func(r *http.Response) (string, error) {
// On the non-etag path, we simply name files based on the URL.
return cacheFile, nil
})
return t.wrapped.Do(request)
}

return &http.Response{
StatusCode: http.StatusOK,
Body: f,
Expand All @@ -87,7 +87,7 @@ func (t *cacheTransport) RoundTrip(request *http.Request) (*http.Response, error
}
// We simulate content-based addressing with the etag values using an .etag
// file extension.
etagFile := filepath.Join(filepath.Dir(cacheFile), initialEtag+".etag")
etagFile := cacheFileFromEtag(cacheFile, initialEtag)
f, err := os.Open(etagFile)
if err != nil {
return t.retrieveAndSaveFile(request, func(r *http.Response) (string, error) {
Expand All @@ -97,7 +97,8 @@ func (t *cacheTransport) RoundTrip(request *http.Request) (*http.Response, error
if !ok {
return "", fmt.Errorf("GET response did not contain an etag, but HEAD returned %q", initialEtag)
}
return filepath.Join(filepath.Dir(cacheFile), finalEtag+".etag"), nil

return cacheFileFromEtag(cacheFile, finalEtag), nil
})
}
return &http.Response{
Expand All @@ -106,6 +107,19 @@ func (t *cacheTransport) RoundTrip(request *http.Request) (*http.Response, error
}, nil
}

func cacheFileFromEtag(cacheFile, etag string) string {
cacheDir := filepath.Dir(cacheFile)
ext := ".etag"

// Keep all the index files under APKINDEX/ with appropriate file extension.
if strings.HasSuffix(cacheFile, "APKINDEX.tar.gz") {
cacheDir = filepath.Join(cacheDir, "APKINDEX")
ext = ".tar.gz"
}

return filepath.Join(cacheDir, etag+ext)
}

func etagFromResponse(resp *http.Response) (string, bool) {
remoteEtag, ok := resp.Header[http.CanonicalHeaderKey("etag")]
if !ok || len(remoteEtag) == 0 || remoteEtag[0] == "" {
Expand Down Expand Up @@ -168,8 +182,26 @@ func (t *cacheTransport) retrieveAndSaveFile(request *http.Request, cp cachePlac
return resp, nil
}

func cacheDirForPackage(root string, pkg *repository.RepositoryPackage) (string, error) {
u, err := packageAsURL(pkg)
if err != nil {
return "", err
}

p, err := cachePathFromURL(root, *u)
if err != nil {
return "", err
}

if ext := filepath.Ext(p); ext != ".apk" {
return "", fmt.Errorf("unexpected ext (%s) to cache dir: %q", ext, p)
}

return strings.TrimSuffix(p, ".apk"), nil
}

// cachePathFromURL given a URL, figure out what the cache path would be
func (t *cacheTransport) cachePathFromURL(u url.URL) (string, error) {
func cachePathFromURL(root string, u url.URL) (string, error) {
// the last two levels are what we append. For example https://example.com/foo/bar/x86_64/baz.apk
// means we want to append x86_64/baz.apk to our cache root
u2 := u
Expand All @@ -185,12 +217,12 @@ func (t *cacheTransport) cachePathFromURL(u url.URL) (string, error) {

// url encode it so it can be a single directory
repoDir = url.QueryEscape(u2.String())
cacheFile := filepath.Join(t.root, repoDir, dir, filename)
// validate it is within t.root
cacheFile := filepath.Join(root, repoDir, dir, filename)
// validate it is within root
cacheFile = filepath.Clean(cacheFile)
root := filepath.Clean(t.root)
if !strings.HasPrefix(cacheFile, root) {
return "", fmt.Errorf("cache file %s is not within root %s", cacheFile, root)
cleanroot := filepath.Clean(root)
if !strings.HasPrefix(cacheFile, cleanroot) {
return "", fmt.Errorf("cache file %s is not within root %s", cacheFile, cleanroot)
}
return cacheFile, nil
}
78 changes: 51 additions & 27 deletions pkg/apk/expandapk.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,22 +36,59 @@ type APKExpanded struct {
// The temporary parent directory containing all exploded .tar/.tar.gz contents
tempDir string

// The package signature (a.k.a. ".SIGN...") in tar.gz format
Signature []byte
// The package signature filename (a.k.a. ".SIGN...") in tar.gz format
SignatureFile string

// the control data (a.k.a. ".PKGINFO") in tar.gz format
ControlData []byte
// The control data filename (a.k.a. ".PKGINFO") in tar.gz format
ControlFile string

// A stream reader to the contents in tar.gz format
PackageData io.ReadCloser
// The package data filename in tar.gz format
PackageFile string
}

func (a *APKExpanded) Close() error {
if a.PackageData != nil {
if err := a.PackageData.Close(); err != nil {
return fmt.Errorf("APKExpanded.Close error 1: %v", err)
func (a *APKExpanded) APK() (io.ReadCloser, error) {
rs := []io.Reader{}
cs := []io.Closer{}

for _, fn := range []string{a.SignatureFile, a.ControlFile, a.PackageFile} {
if fn != "" {
f, err := os.Open(fn)
if err != nil {
return nil, err
}
rs = append(rs, f)
cs = append(cs, f)
}
}

return &multiReadCloser{
r: io.MultiReader(rs...),
closers: cs,
}, nil
}

type multiReadCloser struct {
r io.Reader
closers []io.Closer
}

func (m *multiReadCloser) Read(p []byte) (int, error) {
return m.r.Read(p)
}

func (m *multiReadCloser) Close() error {
errs := make([]error, len(m.closers))
for i, closer := range m.closers {
errs[i] = closer.Close()
}
return errors.Join(errs...)
}

func (a *APKExpanded) Close() error {
if a.tempDir == "" {
return nil
}

return os.RemoveAll(a.tempDir)
}

Expand Down Expand Up @@ -207,6 +244,7 @@ func ExpandApk(ctx context.Context, source io.Reader) (*APKExpanded, error) {
if err != nil {
return nil, err
}

sw, err := newExpandApkWriter(dir, "stream", "tar.gz")
if err != nil {
return nil, fmt.Errorf("expandApk error 1: %w", err)
Expand Down Expand Up @@ -324,29 +362,15 @@ func ExpandApk(ctx context.Context, source io.Reader) (*APKExpanded, error) {
return nil, fmt.Errorf("invalid number of tar streams: %d", numGzipStreams)
}

controlData, err := os.ReadFile(gzipStreams[controlDataIndex])
if err != nil {
return nil, fmt.Errorf("unable to read control data: %w", err)
}

packageData, err := os.Open(gzipStreams[controlDataIndex+1])
if err != nil {
return nil, fmt.Errorf("could not open package data file %s for reading: %w", gzipStreams[controlDataIndex+1], err)
}

expanded := APKExpanded{
tempDir: dir,
Signed: signed,
Size: totalSize,
ControlData: controlData,
PackageData: packageData,
ControlFile: gzipStreams[controlDataIndex],
PackageFile: gzipStreams[controlDataIndex+1],
}
if signed {
b, err := os.ReadFile(gzipStreams[0])
if err != nil {
return nil, fmt.Errorf("could not read signature file %s: %w", gzipStreams[0], err)
}
expanded.Signature = b
expanded.SignatureFile = gzipStreams[0]
}

return &expanded, nil
Expand Down
Loading

0 comments on commit eda0bb8

Please sign in to comment.