From c95d7bdd6359dab9cbf575fae14e3389e3efa84e Mon Sep 17 00:00:00 2001 From: Matt Moore Date: Mon, 21 Dec 2020 16:14:22 -0800 Subject: [PATCH] Start to flesh out crane optimize. (#879) * Start to flesh out crane optimize. This is a hidden command, which roundtrips a remote image to a target image through `tarball.LayerFromOpener(layer.Uncompressed)`. Right now this does nothing to force estargz (still need `GGCR_EXPERIMENT_ESTARGZ=1`) or prioritize files (need `estargz.WithPrioritizedFiles(foo)`), but want to start the convo. Fixes: https://github.com/google/go-containerregistry/issues/878 * Add --prioritize flag to prioritize files * Fix headers, drop history * Drop unused variable * Add explicit option for estargz * Add a warning comment to crane.Optimize --- cmd/crane/cmd/optimize.go | 46 +++++++++ cmd/crane/cmd/root.go | 1 + pkg/crane/optimize.go | 179 +++++++++++++++++++++++++++++++++++ pkg/v1/tarball/layer.go | 75 ++++++++------- pkg/v1/tarball/layer_test.go | 9 +- 5 files changed, 273 insertions(+), 37 deletions(-) create mode 100644 cmd/crane/cmd/optimize.go create mode 100644 pkg/crane/optimize.go diff --git a/cmd/crane/cmd/optimize.go b/cmd/crane/cmd/optimize.go new file mode 100644 index 000000000..214a0b4ef --- /dev/null +++ b/cmd/crane/cmd/optimize.go @@ -0,0 +1,46 @@ +// Copyright 2020 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "log" + + "github.com/google/go-containerregistry/pkg/crane" + "github.com/spf13/cobra" +) + +// NewCmdOptimize creates a new cobra.Command for the optimize subcommand. +func NewCmdOptimize(options *[]crane.Option) *cobra.Command { + var files []string + + cmd := &cobra.Command{ + Use: "optimize SRC DST", + Hidden: true, + Aliases: []string{"opt"}, + Short: "Optimize a remote container image from src to dst", + Args: cobra.ExactArgs(2), + Run: func(_ *cobra.Command, args []string) { + src, dst := args[0], args[1] + if err := crane.Optimize(src, dst, files, *options...); err != nil { + log.Fatal(err) + } + }, + } + + cmd.Flags().StringSliceVar(&files, "prioritize", nil, + "The list of files to prioritize in the optimized image.") + + return cmd +} diff --git a/cmd/crane/cmd/root.go b/cmd/crane/cmd/root.go index 905671481..759972819 100644 --- a/cmd/crane/cmd/root.go +++ b/cmd/crane/cmd/root.go @@ -86,6 +86,7 @@ func New(use, short string, options []crane.Option) *cobra.Command { NewCmdExport(&options), NewCmdList(&options), NewCmdManifest(&options), + NewCmdOptimize(&options), NewCmdPull(&options), NewCmdPush(&options), NewCmdRebase(&options), diff --git a/pkg/crane/optimize.go b/pkg/crane/optimize.go new file mode 100644 index 000000000..e7603d610 --- /dev/null +++ b/pkg/crane/optimize.go @@ -0,0 +1,179 @@ +// Copyright 2020 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package crane + +import ( + "errors" + "fmt" + + "github.com/containerd/stargz-snapshotter/estargz" + "github.com/google/go-containerregistry/pkg/logs" + "github.com/google/go-containerregistry/pkg/name" + v1 "github.com/google/go-containerregistry/pkg/v1" + "github.com/google/go-containerregistry/pkg/v1/empty" + "github.com/google/go-containerregistry/pkg/v1/mutate" + "github.com/google/go-containerregistry/pkg/v1/remote" + "github.com/google/go-containerregistry/pkg/v1/tarball" + "github.com/google/go-containerregistry/pkg/v1/types" +) + +// Optimize optimizes a remote image or index from src to dst. +// THIS API IS EXPERIMENTAL AND SUBJECT TO CHANGE WITHOUT WARNING. +func Optimize(src, dst string, prioritize []string, opt ...Option) error { + o := makeOptions(opt...) + srcRef, err := name.ParseReference(src, o.name...) + if err != nil { + return fmt.Errorf("parsing reference %q: %v", src, err) + } + + dstRef, err := name.ParseReference(dst, o.name...) + if err != nil { + return fmt.Errorf("parsing reference for %q: %v", dst, err) + } + + logs.Progress.Printf("Optimizing from %v to %v", srcRef, dstRef) + desc, err := remote.Get(srcRef, o.remote...) + if err != nil { + return fmt.Errorf("fetching %q: %v", src, err) + } + + switch desc.MediaType { + case types.OCIImageIndex, types.DockerManifestList: + // Handle indexes separately. + if o.platform != nil { + // If platform is explicitly set, don't optimize the whole index, just the appropriate image. + if err := optimizeAndPushImage(desc, dstRef, prioritize, o); err != nil { + return fmt.Errorf("failed to optimize image: %v", err) + } + } else { + if err := optimizeAndPushIndex(desc, dstRef, prioritize, o); err != nil { + return fmt.Errorf("failed to optimize index: %v", err) + } + } + + case types.DockerManifestSchema1, types.DockerManifestSchema1Signed: + return errors.New("docker schema 1 images are not supported") + + default: + // Assume anything else is an image, since some registries don't set mediaTypes properly. + if err := optimizeAndPushImage(desc, dstRef, prioritize, o); err != nil { + return fmt.Errorf("failed to optimize image: %v", err) + } + } + + return nil +} + +func optimizeAndPushImage(desc *remote.Descriptor, dstRef name.Reference, prioritize []string, o options) error { + img, err := desc.Image() + if err != nil { + return err + } + + oimg, err := optimizeImage(img, prioritize) + if err != nil { + return err + } + + return remote.Write(dstRef, oimg, o.remote...) +} + +func optimizeImage(img v1.Image, prioritize []string) (v1.Image, error) { + cfg, err := img.ConfigFile() + if err != nil { + return nil, err + } + ocfg := cfg.DeepCopy() + ocfg.History = nil + ocfg.RootFS.DiffIDs = nil + + oimg, err := mutate.ConfigFile(empty.Image, ocfg) + if err != nil { + return nil, err + } + + layers, err := img.Layers() + if err != nil { + return nil, err + } + + olayers := make([]mutate.Addendum, 0, len(layers)) + for _, layer := range layers { + olayer, err := tarball.LayerFromOpener(layer.Uncompressed, + tarball.WithEstargz, + tarball.WithEstargzOptions(estargz.WithPrioritizedFiles(prioritize))) + if err != nil { + return nil, err + } + + olayers = append(olayers, mutate.Addendum{ + Layer: olayer, + MediaType: types.DockerLayer, + }) + } + + return mutate.Append(oimg, olayers...) +} + +func optimizeAndPushIndex(desc *remote.Descriptor, dstRef name.Reference, prioritize []string, o options) error { + idx, err := desc.ImageIndex() + if err != nil { + return err + } + + oidx, err := optimizeIndex(idx, prioritize) + if err != nil { + return err + } + + return remote.WriteIndex(dstRef, oidx, o.remote...) +} + +func optimizeIndex(idx v1.ImageIndex, prioritize []string) (v1.ImageIndex, error) { + im, err := idx.IndexManifest() + if err != nil { + return nil, err + } + + // Build an image for each child from the base and append it to a new index to produce the result. + adds := make([]mutate.IndexAddendum, 0, len(im.Manifests)) + for _, desc := range im.Manifests { + img, err := idx.Image(desc.Digest) + if err != nil { + return nil, err + } + + oimg, err := optimizeImage(img, prioritize) + if err != nil { + return nil, err + } + adds = append(adds, mutate.IndexAddendum{ + Add: oimg, + Descriptor: v1.Descriptor{ + URLs: desc.URLs, + MediaType: desc.MediaType, + Annotations: desc.Annotations, + Platform: desc.Platform, + }, + }) + } + + idxType, err := idx.MediaType() + if err != nil { + return nil, err + } + + return mutate.IndexMediaType(mutate.AppendManifests(empty.Index, adds...), idxType), nil +} diff --git a/pkg/v1/tarball/layer.go b/pkg/v1/tarball/layer.go index e6be337a5..9e16680bf 100644 --- a/pkg/v1/tarball/layer.go +++ b/pkg/v1/tarball/layer.go @@ -133,6 +133,45 @@ func WithEstargzOptions(opts ...estargz.Option) LayerOption { } } +// WithEstargz is a functional option that explicitly enables estargz support. +func WithEstargz(l *layer) { + oguncompressed := l.uncompressedopener + estargz := func() (io.ReadCloser, error) { + crc, err := oguncompressed() + if err != nil { + return nil, err + } + eopts := append(l.estgzopts, estargz.WithCompressionLevel(l.compression)) + rc, h, err := gestargz.ReadCloser(crc, eopts...) + if err != nil { + return nil, err + } + l.annotations[estargz.TOCJSONDigestAnnotation] = h.String() + return &and.ReadCloser{ + Reader: rc, + CloseFunc: func() error { + err := rc.Close() + if err != nil { + return err + } + // As an optimization, leverage the DiffID exposed by the estargz ReadCloser + l.diffID, err = v1.NewHash(rc.DiffID().String()) + return err + }, + }, nil + } + uncompressed := func() (io.ReadCloser, error) { + urc, err := estargz() + if err != nil { + return nil, err + } + return v1util.GunzipReadCloser(urc) + } + + l.compressedopener = estargz + l.uncompressedopener = uncompressed +} + // LayerFromFile returns a v1.Layer given a tarball func LayerFromFile(path string, opts ...LayerOption) (v1.Layer, error) { opener := func() (io.ReadCloser, error) { @@ -168,6 +207,10 @@ func LayerFromOpener(opener Opener, opts ...LayerOption) (v1.Layer, error) { annotations: make(map[string]string, 1), } + if estgz := os.Getenv("GGCR_EXPERIMENT_ESTARGZ"); estgz == "1" { + opts = append([]LayerOption{WithEstargz}, opts...) + } + if compressed { layer.compressedopener = opener layer.uncompressedopener = func() (io.ReadCloser, error) { @@ -177,38 +220,6 @@ func LayerFromOpener(opener Opener, opts ...LayerOption) (v1.Layer, error) { } return ggzip.UnzipReadCloser(urc) } - } else if estgz := os.Getenv("GGCR_EXPERIMENT_ESTARGZ"); estgz == "1" { - layer.compressedopener = func() (io.ReadCloser, error) { - crc, err := opener() - if err != nil { - return nil, err - } - eopts := append(layer.estgzopts, estargz.WithCompressionLevel(layer.compression)) - rc, h, err := gestargz.ReadCloser(crc, eopts...) - if err != nil { - return nil, err - } - layer.annotations[estargz.TOCJSONDigestAnnotation] = h.String() - return &and.ReadCloser{ - Reader: rc, - CloseFunc: func() error { - err := rc.Close() - if err != nil { - return err - } - // As an optimization, leverage the DiffID exposed by the estargz ReadCloser - layer.diffID, err = v1.NewHash(rc.DiffID().String()) - return err - }, - }, nil - } - layer.uncompressedopener = func() (io.ReadCloser, error) { - urc, err := layer.compressedopener() - if err != nil { - return nil, err - } - return v1util.GunzipReadCloser(urc) - } } else { layer.uncompressedopener = opener layer.compressedopener = func() (io.ReadCloser, error) { diff --git a/pkg/v1/tarball/layer_test.go b/pkg/v1/tarball/layer_test.go index 114eb30d1..5954c14cd 100644 --- a/pkg/v1/tarball/layer_test.go +++ b/pkg/v1/tarball/layer_test.go @@ -79,12 +79,10 @@ func TestLayerFromFile(t *testing.T) { } func TestLayerFromFileEstargz(t *testing.T) { - os.Setenv("GGCR_EXPERIMENT_ESTARGZ", "1") - defer os.Unsetenv("GGCR_EXPERIMENT_ESTARGZ") setupFixtures(t) defer teardownFixtures(t) - tarLayer, err := LayerFromFile("testdata/content.tar") + tarLayer, err := LayerFromFile("testdata/content.tar", WithEstargz) if err != nil { t.Fatalf("Unable to create layer from tar file: %v", err) } @@ -93,7 +91,7 @@ func TestLayerFromFileEstargz(t *testing.T) { t.Errorf("validate.Layer(tarLayer): %v", err) } - tarLayerDefaultCompression, err := LayerFromFile("testdata/content.tar", WithCompressionLevel(gzip.DefaultCompression)) + tarLayerDefaultCompression, err := LayerFromFile("testdata/content.tar", WithEstargz, WithCompressionLevel(gzip.DefaultCompression)) if err != nil { t.Fatalf("Unable to create layer with 'Default' compression from tar file: %v", err) } @@ -109,7 +107,7 @@ func TestLayerFromFileEstargz(t *testing.T) { t.Fatal("Unable to generate digest with 'Default' compression", err) } - tarLayerSpeedCompression, err := LayerFromFile("testdata/content.tar", WithCompressionLevel(gzip.BestSpeed)) + tarLayerSpeedCompression, err := LayerFromFile("testdata/content.tar", WithEstargz, WithCompressionLevel(gzip.BestSpeed)) if err != nil { t.Fatalf("Unable to create layer with 'BestSpeed' compression from tar file: %v", err) } @@ -136,6 +134,7 @@ func TestLayerFromFileEstargz(t *testing.T) { } tarLayerPrioritizedFiles, err := LayerFromFile("testdata/content.tar", + WithEstargz, // We compare with default, so pass for apples-to-apples comparison. WithCompressionLevel(gzip.DefaultCompression), // By passing a list of priority files, we expect the layer to be different.