Skip to content

Commit

Permalink
Refactor to support caching compression. (#867)
Browse files Browse the repository at this point in the history
* Refactor to support caching compression.

Today `tarball.LayerFromOpener` must either compress or decompress its input based on which it receives.

Virtually all clients of this interface have an uncompressed tarball they want to turn into a layer, but the API we have presents two fun problems.
1. If we simply present the uncompressed tarball (as most clients do) then we will often end up compressing the layer (expensive!) twice.  The first time is to compute the layer's digest, and the second is typically to publish the layer as part of `remote.Write`.
2. If we present a precompressed tarball (as ko does), then we will have an extra decompression (less expensive, but still redundant) to compute the diff id.

This refactors the layer to store an opener for each form of the layer.  The constructor sniffs whether the layer is compressed and based on this makes the "other" opener (that requires conversion) a lazy thunk that performs the conversion each time (this is effectively what we were doing before based on the stored `compressed` bit).  The advantage of this approach is that by setting them up before option evaluation we can make them memoizing on their first invocation.  Given that most consumers of this library only pass the uncompressed form (or should), I have only provided an option for memoizing the compression.

* Fix style stuff

* Add a detailed comment for LayerFromOpener with practical learnings

* Update comments

* Try renaming variables

* Fix the Gunzip utility's close bug
  • Loading branch information
mattmoor committed Dec 13, 2020
1 parent a85f8fd commit 8b5370a
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 49 deletions.
119 changes: 75 additions & 44 deletions pkg/v1/tarball/layer.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,20 @@ import (
"io"
"io/ioutil"
"os"
"sync"

v1 "github.com/google/go-containerregistry/pkg/v1"
"github.com/google/go-containerregistry/pkg/v1/types"
"github.com/google/go-containerregistry/pkg/v1/v1util"
)

type layer struct {
digest v1.Hash
diffID v1.Hash
size int64
opener Opener
compressed bool
compression int
digest v1.Hash
diffID v1.Hash
size int64
compressedopener Opener
uncompressedopener Opener
compression int
}

func (l *layer) Digest() (v1.Hash, error) {
Expand All @@ -44,21 +45,11 @@ func (l *layer) DiffID() (v1.Hash, error) {
}

func (l *layer) Compressed() (io.ReadCloser, error) {
rc, err := l.opener()
if err == nil && !l.compressed {
return v1util.GzipReadCloserLevel(rc, l.compression), nil
}

return rc, err
return l.compressedopener()
}

func (l *layer) Uncompressed() (io.ReadCloser, error) {
rc, err := l.opener()
if err == nil && l.compressed {
return v1util.GunzipReadCloser(rc)
}

return rc, err
return l.uncompressedopener()
}

func (l *layer) Size() (int64, error) {
Expand All @@ -72,13 +63,41 @@ func (l *layer) MediaType() (types.MediaType, error) {
// LayerOption applies options to layer
type LayerOption func(*layer)

// WithCompressionLevel sets the gzip compression. See `gzip.NewWriterLevel` for possible values.
// WithCompressionLevel is a functional option for overriding the default
// compression level used for compressing uncompressed tarballs.
func WithCompressionLevel(level int) LayerOption {
return func(l *layer) {
l.compression = level
}
}

// WithCompressedCaching is a functional option that overrides the
// logic for accessing the compressed bytes to memoize the result
// and avoid expensive repeated gzips.
func WithCompressedCaching(l *layer) {
var once sync.Once
var err error

buf := bytes.NewBuffer(nil)
og := l.compressedopener

l.compressedopener = func() (io.ReadCloser, error) {
once.Do(func() {
var rc io.ReadCloser
rc, err = og()
if err == nil {
defer rc.Close()
_, err = io.Copy(buf, rc)
}
})
if err != nil {
return nil, err
}

return ioutil.NopCloser(bytes.NewBuffer(buf.Bytes())), nil
}
}

// LayerFromFile returns a v1.Layer given a tarball
func LayerFromFile(path string, opts ...LayerOption) (v1.Layer, error) {
opener := func() (io.ReadCloser, error) {
Expand All @@ -87,7 +106,16 @@ func LayerFromFile(path string, opts ...LayerOption) (v1.Layer, error) {
return LayerFromOpener(opener, opts...)
}

// LayerFromOpener returns a v1.Layer given an Opener function
// LayerFromOpener returns a v1.Layer given an Opener function.
// The Opener may return either an uncompressed tarball (common),
// or a compressed tarball (uncommon).
//
// When using this in conjunction with something like remote.Write
// the uncompressed path may end up gzipping things multiple times:
// 1. Compute the layer SHA256
// 2. Upload the compressed layer.
// Since gzip can be expensive, we support an option to memoize the
// compression that can be passed here: tarball.WithCompressedCaching
func LayerFromOpener(opener Opener, opts ...LayerOption) (v1.Layer, error) {
rc, err := opener()
if err != nil {
Expand All @@ -101,20 +129,38 @@ func LayerFromOpener(opener Opener, opts ...LayerOption) (v1.Layer, error) {
}

layer := &layer{
compressed: compressed,
compression: gzip.BestSpeed,
opener: opener,
}

if compressed {
layer.compressedopener = opener
layer.uncompressedopener = func() (io.ReadCloser, error) {
urc, err := opener()
if err != nil {
return nil, err
}
return v1util.GunzipReadCloser(urc)
}
} else {
layer.uncompressedopener = opener
layer.compressedopener = func() (io.ReadCloser, error) {
crc, err := opener()
if err != nil {
return nil, err
}
return v1util.GzipReadCloserLevel(crc, layer.compression), nil
}
}

for _, opt := range opts {
opt(layer)
}

if layer.digest, layer.size, err = computeDigest(opener, compressed, layer.compression); err != nil {
if layer.digest, layer.size, err = computeDigest(layer.compressedopener); err != nil {
return nil, err
}

if layer.diffID, err = computeDiffID(opener, compressed); err != nil {
if layer.diffID, err = computeDiffID(layer.uncompressedopener); err != nil {
return nil, err
}

Expand All @@ -133,38 +179,23 @@ func LayerFromReader(reader io.Reader, opts ...LayerOption) (v1.Layer, error) {
}, opts...)
}

func computeDigest(opener Opener, compressed bool, compression int) (v1.Hash, int64, error) {
func computeDigest(opener Opener) (v1.Hash, int64, error) {
rc, err := opener()
if err != nil {
return v1.Hash{}, 0, err
}
defer rc.Close()

if compressed {
return v1.SHA256(rc)
}

return v1.SHA256(v1util.GzipReadCloserLevel(ioutil.NopCloser(rc), compression))
return v1.SHA256(rc)
}

func computeDiffID(opener Opener, compressed bool) (v1.Hash, error) {
func computeDiffID(opener Opener) (v1.Hash, error) {
rc, err := opener()
if err != nil {
return v1.Hash{}, err
}
defer rc.Close()

if !compressed {
digest, _, err := v1.SHA256(rc)
return digest, err
}

reader, err := gzip.NewReader(rc)
if err != nil {
return v1.Hash{}, err
}
defer reader.Close()

diffID, _, err := v1.SHA256(reader)
return diffID, err
digest, _, err := v1.SHA256(rc)
return digest, err
}
29 changes: 27 additions & 2 deletions pkg/v1/tarball/layer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,37 @@ func TestLayerFromOpenerReader(t *testing.T) {
if err != nil {
t.Fatalf("Unable to read tar file: %v", err)
}
count := 0
ucOpener := func() (io.ReadCloser, error) {
count++
return ioutil.NopCloser(bytes.NewReader(ucBytes)), nil
}
tarLayer, err := LayerFromOpener(ucOpener)
tarLayer, err := LayerFromOpener(ucOpener, WithCompressedCaching)
if err != nil {
t.Fatalf("Unable to create layer from tar file: %v", err)
t.Fatal("Unable to create layer from tar file:", err)
}
for i := 0; i < 10; i++ {
tarLayer.Compressed()
}

// Store the count and reset the counter.
cachedCount := count
count = 0

tarLayer, err = LayerFromOpener(ucOpener)
if err != nil {
t.Fatal("Unable to create layer from tar file:", err)
}
for i := 0; i < 10; i++ {
tarLayer.Compressed()
}

// We expect three calls: gzip sniff, diffid computation, cached compression
if cachedCount != 3 {
t.Errorf("cached count = %d, wanted %d", cachedCount, 3)
}
if cachedCount+10 != count {
t.Errorf("count = %d, wanted %d", count, cachedCount+10)
}

gzBytes, err := ioutil.ReadFile("gzip_content.tgz")
Expand Down
8 changes: 5 additions & 3 deletions pkg/v1/v1util/zip.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,11 @@ func GunzipReadCloser(r io.ReadCloser) (io.ReadCloser, error) {
return &readAndCloser{
Reader: gr,
CloseFunc: func() error {
if err := gr.Close(); err != nil {
return err
}
// If the unzip fails, then this seems to return the same
// error as the read. We don't want this to interfere with
// us closing the main ReadCloser, since this could leave
// an open file descriptor (fails on Windows).
gr.Close()
return r.Close()
},
}, nil
Expand Down

0 comments on commit 8b5370a

Please sign in to comment.