From 54cddccd1cef3c4e188d080f00b8b839ec065fd9 Mon Sep 17 00:00:00 2001 From: jonjohnsonjr Date: Wed, 8 Dec 2021 05:47:26 -0800 Subject: [PATCH] Introduce KOCACHE (#269) * Introduce KOCACHE Cache binaries under $KOCACHE// Cache metdata mapping buildid to diffid and diffid to descriptor. * Point TODO at issue * Split layerCache into separate file This makes things a little cleaner by having a single place that calls buildLayer and passing a thunk down into the cache logic to call that on a cache miss. Also, remove the debug logging to make the code easier to follow (if you need to recompile anyway, it's easy enough to add log lines). * Move cached output to $KOCACHE/bin --- pkg/build/cache.go | 210 +++++++++++++++++++++++++++++++++++++++++++ pkg/build/gobuild.go | 75 +++++++++++----- pkg/build/layer.go | 75 ++++++++++++++++ 3 files changed, 337 insertions(+), 23 deletions(-) create mode 100644 pkg/build/cache.go create mode 100644 pkg/build/layer.go diff --git a/pkg/build/cache.go b/pkg/build/cache.go new file mode 100644 index 0000000000..af0e13d7f5 --- /dev/null +++ b/pkg/build/cache.go @@ -0,0 +1,210 @@ +// Copyright 2021 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package build + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "log" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + + v1 "github.com/google/go-containerregistry/pkg/v1" + "github.com/google/go-containerregistry/pkg/v1/partial" +) + +type diffIDToDescriptor map[v1.Hash]v1.Descriptor +type buildIDToDiffID map[string]v1.Hash + +type layerCache struct { + buildToDiff map[string]buildIDToDiffID + diffToDesc map[string]diffIDToDescriptor + sync.Mutex +} + +type layerFactory func() (v1.Layer, error) + +func (c *layerCache) get(ctx context.Context, file string, miss layerFactory) (v1.Layer, error) { + if os.Getenv("KOCACHE") == "" { + return miss() + } + + // Cache hit. + if diffid, desc, err := c.getMeta(ctx, file); err == nil { + return &lazyLayer{ + diffid: *diffid, + desc: *desc, + buildLayer: miss, + }, nil + } + + // Cache miss. + layer, err := miss() + if err != nil { + return nil, err + } + if err := c.put(ctx, file, layer); err != nil { + log.Printf("failed to cache metadata %s: %v", file, err) + } + return layer, nil +} + +func (c *layerCache) getMeta(ctx context.Context, file string) (*v1.Hash, *v1.Descriptor, error) { + buildid, err := getBuildID(ctx, file) + if err != nil { + return nil, nil, err + } + + if buildid == "" { + return nil, nil, fmt.Errorf("no buildid for %s", file) + } + + btod, err := c.readBuildToDiff(file) + if err != nil { + return nil, nil, err + } + dtod, err := c.readDiffToDesc(file) + if err != nil { + return nil, nil, err + } + + diffid, ok := btod[buildid] + if !ok { + return nil, nil, fmt.Errorf("no diffid for %q", buildid) + } + + desc, ok := dtod[diffid] + if !ok { + return nil, nil, fmt.Errorf("no desc for %q", diffid) + } + + return &diffid, &desc, nil +} + +// Compute new layer metadata and cache it in-mem and on-disk. +func (c *layerCache) put(ctx context.Context, file string, layer v1.Layer) error { + buildid, err := getBuildID(ctx, file) + if err != nil { + return err + } + + desc, err := partial.Descriptor(layer) + if err != nil { + return err + } + + diffid, err := layer.DiffID() + if err != nil { + return err + } + + btod, ok := c.buildToDiff[file] + if !ok { + btod = buildIDToDiffID{} + } + btod[buildid] = diffid + + dtod, ok := c.diffToDesc[file] + if !ok { + dtod = diffIDToDescriptor{} + } + dtod[diffid] = *desc + + // TODO: Implement better per-file locking. + c.Lock() + defer c.Unlock() + + btodf, err := os.OpenFile(filepath.Join(filepath.Dir(file), "buildid-to-diffid"), os.O_RDWR|os.O_CREATE, 0755) + if err != nil { + return err + } + defer btodf.Close() + + dtodf, err := os.OpenFile(filepath.Join(filepath.Dir(file), "diffid-to-descriptor"), os.O_RDWR|os.O_CREATE, 0755) + if err != nil { + return err + } + defer dtodf.Close() + + enc := json.NewEncoder(btodf) + enc.SetIndent("", " ") + if err := enc.Encode(&btod); err != nil { + return err + } + + enc = json.NewEncoder(dtodf) + enc.SetIndent("", " ") + if err := enc.Encode(&dtod); err != nil { + return err + } + + return nil +} + +func (c *layerCache) readDiffToDesc(file string) (diffIDToDescriptor, error) { + if dtod, ok := c.diffToDesc[file]; ok { + return dtod, nil + } + + dtodf, err := os.Open(filepath.Join(filepath.Dir(file), "diffid-to-descriptor")) + if err != nil { + return nil, err + } + defer dtodf.Close() + + var dtod diffIDToDescriptor + if err := json.NewDecoder(dtodf).Decode(&dtod); err != nil { + return nil, err + } + c.diffToDesc[file] = dtod + return dtod, nil +} + +func (c *layerCache) readBuildToDiff(file string) (buildIDToDiffID, error) { + if btod, ok := c.buildToDiff[file]; ok { + return btod, nil + } + + btodf, err := os.Open(filepath.Join(filepath.Dir(file), "buildid-to-diffid")) + if err != nil { + return nil, err + } + defer btodf.Close() + + var btod buildIDToDiffID + if err := json.NewDecoder(btodf).Decode(&btod); err != nil { + return nil, err + } + c.buildToDiff[file] = btod + return btod, nil +} + +func getBuildID(ctx context.Context, file string) (string, error) { + cmd := exec.CommandContext(ctx, "go", "tool", "buildid", file) + var output bytes.Buffer + cmd.Stderr = &output + cmd.Stdout = &output + + if err := cmd.Run(); err != nil { + log.Printf("Unexpected error running \"go tool buildid %s\": %v\n%v", err, file, output.String()) + return "", err + } + return strings.TrimSpace(output.String()), nil +} diff --git a/pkg/build/gobuild.go b/pkg/build/gobuild.go index 475a0101de..752eafd6ed 100644 --- a/pkg/build/gobuild.go +++ b/pkg/build/gobuild.go @@ -78,6 +78,8 @@ type gobuild struct { platformMatcher *platformMatcher dir string labels map[string]string + + cache *layerCache } // Option is a functional option for NewGo. @@ -117,6 +119,10 @@ func (gbo *gobuildOpener) Open() (Interface, error) { labels: gbo.labels, dir: gbo.dir, platformMatcher: matcher, + cache: &layerCache{ + buildToDiff: map[string]buildIDToDiffID{}, + diffToDesc: map[string]diffIDToDescriptor{}, + }, }, nil } @@ -222,12 +228,6 @@ func platformToString(p v1.Platform) string { } func build(ctx context.Context, ip string, dir string, platform v1.Platform, config Config) (string, error) { - tmpDir, err := ioutil.TempDir("", "ko") - if err != nil { - return "", err - } - file := filepath.Join(tmpDir, "out") - buildArgs, err := createBuildArgs(config) if err != nil { return "", err @@ -236,15 +236,31 @@ func build(ctx context.Context, ip string, dir string, platform v1.Platform, con args := make([]string, 0, 4+len(buildArgs)) args = append(args, "build") args = append(args, buildArgs...) - args = append(args, "-o", file) - args = append(args, ip) - cmd := exec.CommandContext(ctx, "go", args...) - cmd.Dir = dir env, err := buildEnv(platform, os.Environ(), config.Env) if err != nil { return "", fmt.Errorf("could not create env for %s: %w", ip, err) } + + tmpDir, err := ioutil.TempDir("", "ko") + if err != nil { + return "", err + } + + if dir := os.Getenv("KOCACHE"); dir != "" { + // TODO(#264): if KOCACHE is unset, default to filepath.Join(os.TempDir(), "ko"). + tmpDir = filepath.Join(dir, "bin", ip, platformToString(platform)) + if err := os.MkdirAll(tmpDir, os.ModePerm); err != nil { + return "", err + } + } + + file := filepath.Join(tmpDir, "out") + + args = append(args, "-o", file) + args = append(args, ip) + cmd := exec.CommandContext(ctx, "go", args...) + cmd.Dir = dir cmd.Env = env var output bytes.Buffer @@ -253,7 +269,9 @@ func build(ctx context.Context, ip string, dir string, platform v1.Platform, con log.Printf("Building %s for %s", ip, platformToString(platform)) if err := cmd.Run(); err != nil { - os.RemoveAll(tmpDir) + if os.Getenv("KOCACHE") == "" { + os.RemoveAll(tmpDir) + } log.Printf("Unexpected error running \"go build\": %v\n%v", err, output.String()) return "", err } @@ -643,7 +661,9 @@ func (g *gobuild) buildOne(ctx context.Context, refStr string, base v1.Image, pl if err != nil { return nil, err } - defer os.RemoveAll(filepath.Dir(file)) + if os.Getenv("KOCACHE") == "" { + defer os.RemoveAll(filepath.Dir(file)) + } var layers []mutate.Addendum @@ -671,21 +691,15 @@ func (g *gobuild) buildOne(ctx context.Context, refStr string, base v1.Image, pl appDir := "/ko-app" appPath := path.Join(appDir, appFilename(ref.Path())) - // Construct a tarball with the binary and produce a layer. - binaryLayerBuf, err := tarBinary(appPath, file, v1.Time{}, platform) - if err != nil { - return nil, err + miss := func() (v1.Layer, error) { + return buildLayer(appPath, file, platform) } - binaryLayerBytes := binaryLayerBuf.Bytes() - binaryLayer, err := tarball.LayerFromOpener(func() (io.ReadCloser, error) { - return ioutil.NopCloser(bytes.NewBuffer(binaryLayerBytes)), nil - }, tarball.WithCompressedCaching, tarball.WithEstargzOptions(estargz.WithPrioritizedFiles([]string{ - // When using estargz, prioritize downloading the binary entrypoint. - appPath, - }))) + + binaryLayer, err := g.cache.get(ctx, file, miss) if err != nil { return nil, err } + layers = append(layers, mutate.Addendum{ Layer: binaryLayer, History: v1.History{ @@ -759,6 +773,21 @@ func (g *gobuild) buildOne(ctx context.Context, refStr string, base v1.Image, pl return si, nil } +func buildLayer(appPath, file string, platform *v1.Platform) (v1.Layer, error) { + // Construct a tarball with the binary and produce a layer. + binaryLayerBuf, err := tarBinary(appPath, file, v1.Time{}, platform) + if err != nil { + return nil, err + } + binaryLayerBytes := binaryLayerBuf.Bytes() + return tarball.LayerFromOpener(func() (io.ReadCloser, error) { + return ioutil.NopCloser(bytes.NewBuffer(binaryLayerBytes)), nil + }, tarball.WithCompressedCaching, tarball.WithEstargzOptions(estargz.WithPrioritizedFiles([]string{ + // When using estargz, prioritize downloading the binary entrypoint. + appPath, + }))) +} + // Append appPath to the PATH environment variable, if it exists. Otherwise, // set the PATH environment variable to appPath. func updatePath(cf *v1.ConfigFile, appPath string) { diff --git a/pkg/build/layer.go b/pkg/build/layer.go new file mode 100644 index 0000000000..ad63ffbc8b --- /dev/null +++ b/pkg/build/layer.go @@ -0,0 +1,75 @@ +// Copyright 2020 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package build + +import ( + "io" + "sync" + + v1 "github.com/google/go-containerregistry/pkg/v1" + "github.com/google/go-containerregistry/pkg/v1/types" +) + +type lazyLayer struct { + diffid v1.Hash + desc v1.Descriptor + + sync.Once + buildLayer func() (v1.Layer, error) + layer v1.Layer + err error +} + +// All this info is cached by previous builds. +func (l *lazyLayer) Digest() (v1.Hash, error) { + return l.desc.Digest, nil +} + +func (l *lazyLayer) DiffID() (v1.Hash, error) { + return l.diffid, nil +} + +func (l *lazyLayer) Size() (int64, error) { + return l.desc.Size, nil +} + +func (l *lazyLayer) MediaType() (types.MediaType, error) { + return l.desc.MediaType, nil +} + +// This is only called if the registry doesn't have this blob already. +func (l *lazyLayer) Compressed() (io.ReadCloser, error) { + layer, err := l.compute() + if err != nil { + return nil, err + } + return layer.Compressed() +} + +// This should never actually be called but we need it to impl v1.Layer. +func (l *lazyLayer) Uncompressed() (io.ReadCloser, error) { + layer, err := l.compute() + if err != nil { + return nil, err + } + return layer.Uncompressed() +} + +func (l *lazyLayer) compute() (v1.Layer, error) { + l.Once.Do(func() { + l.layer, l.err = l.buildLayer() + }) + return l.layer, l.err +}