Skip to content

Commit

Permalink
Add Key() to LayeredMap and Snapshotter
Browse files Browse the repository at this point in the history
This will return a string representaiton of the current filesystem to be
used with caching.

Whenever a file is explictly added (via ADD or COPY), it will be stored
in "added" in the LayeredMap. The file will map to a hash created by
CacheHasher (which doesn't take into account mtime, since that will be
different with every build, making the cache useless)

Key() will returns a sha of the added files which will be used in
determining the overall cache key for a command.
  • Loading branch information
Priya Wadhwa committed Sep 4, 2018
1 parent 2e10d27 commit 13accba
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pkg/executor/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func DoBuild(opts *options.KanikoOptions) (v1.Image, error) {
if err := util.GetFSFromImage(constants.RootDir, sourceImage); err != nil {
return nil, err
}
l := snapshot.NewLayeredMap(hasher)
l := snapshot.NewLayeredMap(hasher, util.CacheHasher())
snapshotter := snapshot.NewSnapshotter(l, constants.RootDir)
// Take initial snapshot
if err := snapshotter.Init(); err != nil {
Expand Down
36 changes: 30 additions & 6 deletions pkg/snapshot/layered_map.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,27 @@ limitations under the License.
package snapshot

import (
"bytes"
"encoding/json"
"fmt"
"path/filepath"
"strings"

"github.com/GoogleContainerTools/kaniko/pkg/util"
)

type LayeredMap struct {
layers []map[string]string
whiteouts []map[string]string
hasher func(string) (string, error)
layers []map[string]string
whiteouts []map[string]string
added []map[string]string
hasher func(string) (string, error)
cacheHasher func(string) (string, error)
}

func NewLayeredMap(h func(string) (string, error)) *LayeredMap {
func NewLayeredMap(h func(string) (string, error), c func(string) (string, error)) *LayeredMap {
l := LayeredMap{
hasher: h,
hasher: h,
cacheHasher: c,
}
l.layers = []map[string]string{}
return &l
Expand All @@ -39,8 +46,18 @@ func NewLayeredMap(h func(string) (string, error)) *LayeredMap {
func (l *LayeredMap) Snapshot() {
l.whiteouts = append(l.whiteouts, map[string]string{})
l.layers = append(l.layers, map[string]string{})
l.added = append(l.added, map[string]string{})
}

// Key returns a hash for added files
func (l *LayeredMap) Key() (string, error) {
c := bytes.NewBuffer([]byte{})
enc := json.NewEncoder(c)
enc.Encode(l.added)
return util.SHA256(c)
}

// GetFlattenedPathsForWhiteOut returns all paths in the current FS
func (l *LayeredMap) GetFlattenedPathsForWhiteOut() map[string]struct{} {
paths := map[string]struct{}{}
for _, l := range l.layers {
Expand Down Expand Up @@ -85,11 +102,18 @@ func (l *LayeredMap) MaybeAddWhiteout(s string) (bool, error) {

// Add will add the specified file s to the layered map.
func (l *LayeredMap) Add(s string) error {
// Use hash function and add to layers
newV, err := l.hasher(s)
if err != nil {
return fmt.Errorf("Error creating hash for %s: %s", s, err)
return fmt.Errorf("Error creating hash for %s: %v", s, err)
}
l.layers[len(l.layers)-1][s] = newV
// Use cache hash function and add to added
cacheV, err := l.cacheHasher(s)
if err != nil {
return fmt.Errorf("Error creating cache hash for %s: %v", s, err)
}
l.added[len(l.added)-1][s] = cacheV
return nil
}

Expand Down
78 changes: 78 additions & 0 deletions pkg/snapshot/layered_map_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
Copyright 2018 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package snapshot

import (
"testing"
)

func Test_CacheKey(t *testing.T) {
tests := []struct {
name string
map1 map[string]string
map2 map[string]string
equal bool
}{
{
name: "maps are the same",
map1: map[string]string{
"a": "apple",
"b": "bat",
"c": "cat",
},
map2: map[string]string{
"c": "cat",
"b": "bat",
"a": "apple",
},
equal: true,
},
{
name: "maps are different",
map1: map[string]string{
"a": "apple",
"b": "bat",
"c": "cat",
},
map2: map[string]string{
"c": "",
"b": "bat",
"a": "apple",
},
equal: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
lm1 := LayeredMap{added: []map[string]string{test.map1}}
lm2 := LayeredMap{added: []map[string]string{test.map2}}
k1, err := lm1.Key()
if err != nil {
t.Fatalf("error getting key for map 1: %v", err)
}
k2, err := lm2.Key()
if err != nil {
t.Fatalf("error getting key for map 2: %v", err)
}
if test.equal && k1 != k2 {
t.Fatalf("keys differ.\nExpected\n%+v\nActual\n%+v", k1, k2)
}
if !test.equal && k1 == k2 {
t.Fatal("keys are the same, expected different keys")
}
})
}
}
8 changes: 7 additions & 1 deletion pkg/snapshot/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ func (s *Snapshotter) Init() error {
return nil
}

// Key returns a string based on the current state of the file system
func (s *Snapshotter) Key() (string, error) {
return s.l.Key()
}

// TakeSnapshot takes a snapshot of the specified files, avoiding directories in the whitelist, and creates
// a tarball of the changed files. Return contents of the tarball, and whether or not any files were changed
func (s *Snapshotter) TakeSnapshot(files []string) ([]byte, error) {
Expand Down Expand Up @@ -102,7 +107,8 @@ func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) {
logrus.Info("No files changed in this command, skipping snapshotting.")
return false, nil
}
logrus.Infof("Taking snapshot of files %v...", files)
logrus.Info("Taking snapshot of files...")
logrus.Debugf("Taking snapshot of files %v", files)
snapshottedFiles := make(map[string]bool)
filesAdded := false

Expand Down
2 changes: 1 addition & 1 deletion pkg/snapshot/snapshot_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ func setUpTestDir() (string, *Snapshotter, error) {
}

// Take the initial snapshot
l := NewLayeredMap(util.Hasher())
l := NewLayeredMap(util.Hasher(), util.CacheHasher())
snapshotter := NewSnapshotter(l, testDir)
if err := snapshotter.Init(); err != nil {
return testDir, nil, errors.Wrap(err, "initializing snapshotter")
Expand Down
38 changes: 38 additions & 0 deletions pkg/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package util

import (
"crypto/md5"
"crypto/sha256"
"encoding/hex"
"io"
"os"
Expand Down Expand Up @@ -72,6 +73,36 @@ func Hasher() func(string) (string, error) {
return hasher
}

// CacheHasher takes into account everything the regular hasher does except for mtime
func CacheHasher() func(string) (string, error) {
hasher := func(p string) (string, error) {
h := md5.New()
fi, err := os.Lstat(p)
if err != nil {
return "", err
}
h.Write([]byte(fi.Mode().String()))

h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Uid), 36)))
h.Write([]byte(","))
h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Gid), 36)))

if fi.Mode().IsRegular() {
f, err := os.Open(p)
if err != nil {
return "", err
}
defer f.Close()
if _, err := io.Copy(h, f); err != nil {
return "", err
}
}

return hex.EncodeToString(h.Sum(nil)), nil
}
return hasher
}

// MtimeHasher returns a hash function, which only looks at mtime to determine if a file has changed.
// Note that the mtime can lag, so it's possible that a file will have changed but the mtime may look the same.
func MtimeHasher() func(string) (string, error) {
Expand All @@ -86,3 +117,10 @@ func MtimeHasher() func(string) (string, error) {
}
return hasher
}

// SHA256 returns the shasum of the contents of r
func SHA256(r io.Reader) (string, error) {
hasher := sha256.New()
_, err := io.Copy(hasher, r)
return hex.EncodeToString(hasher.Sum(make([]byte, 0, hasher.Size()))), err
}

0 comments on commit 13accba

Please sign in to comment.