From 8ed7dada2a3b3e20f370089501fcf5b66543acc2 Mon Sep 17 00:00:00 2001 From: Stefano Arlandini Date: Sat, 5 Feb 2022 03:00:14 +0100 Subject: [PATCH] Add the `hashFiles` template func to calculate the md5 hash of multiple files --- CHANGELOG.md | 2 +- docs/cache_key_templates.md | 5 +++ internal/plugin/plugin.go | 3 +- key/generator/hash.go | 4 +- key/generator/metadata.go | 73 +++++++++++++++++++++++++--------- key/generator/metadata_test.go | 66 ++++++++---------------------- key/generator/util.go | 6 +-- 7 files changed, 85 insertions(+), 74 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8f0b2b9..7773b7bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- Nothing. +- [#198](https://github.com/meltwater/drone-cache/pull/198) Add `hashFiles` template function to generate the MD5 hash of multiple files ### Changed diff --git a/docs/cache_key_templates.md b/docs/cache_key_templates.md index 91fa03b7..eb7549a0 100644 --- a/docs/cache_key_templates.md +++ b/docs/cache_key_templates.md @@ -5,6 +5,7 @@ Cache key template syntax is very basic. You just need to provide a string. In t Also following helper functions provided for your use: * `checksum`: Provides md5 hash of a file for given path +* `hashFiles`: Provides md5 hash after md5 hashing each single file * `epoch`: Provides Unix epoch * `arch`: Provides Architecture of running system * `os`: Provides Operation system of running system @@ -17,6 +18,10 @@ For further information about this syntax please see [official docs](https://gol `"{{ .Repo.Name }}_{{ checksum "go.mod" }}_{{ checksum "go.sum" }}_{{ arch }}_{{ os }}"` +`"{{ .Repo.Name }}_{{ hashFiles "go.mod" "go.sum" }}_{{ arch }}_{{ os }}"` + +`"{{ .Repo.Name }}_{{ hashFiles "go.*" }}_{{ arch }}_{{ os }}"` + ## Metadata Following metadata object is available and pre-populated with current build information for you to use in cache key templates. diff --git a/internal/plugin/plugin.go b/internal/plugin/plugin.go index 7879d752..2a19dcca 100644 --- a/internal/plugin/plugin.go +++ b/internal/plugin/plugin.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "path/filepath" + "time" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -83,7 +84,7 @@ func (p *Plugin) Exec() error { // nolint: funlen,cyclop var generator key.Generator if cfg.CacheKeyTemplate != "" { - generator = keygen.NewMetadata(p.logger, cfg.CacheKeyTemplate, p.Metadata) + generator = keygen.NewMetadata(p.logger, cfg.CacheKeyTemplate, p.Metadata, time.Now) if err := generator.Check(); err != nil { return fmt.Errorf("parse failed, falling back to default, %w", err) } diff --git a/key/generator/hash.go b/key/generator/hash.go index e3079e55..a35b9ef3 100644 --- a/key/generator/hash.go +++ b/key/generator/hash.go @@ -23,14 +23,14 @@ func (h *Hash) Generate(parts ...string) (string, error) { return "", fmt.Errorf("generate hash key for mounted, %w", err) } - return key, nil + return fmt.Sprintf("%x", key), nil } // Check checks if generator functional. func (h *Hash) Check() error { return nil } // hash generates a key based on given strings (ie. filename paths and branch). -func hash(parts ...string) (string, error) { +func hash(parts ...string) ([]byte, error) { readers := make([]io.Reader, len(parts)) for i, p := range parts { readers[i] = strings.NewReader(p) diff --git a/key/generator/metadata.go b/key/generator/metadata.go index 478d5bfa..e1f8bb11 100644 --- a/key/generator/metadata.go +++ b/key/generator/metadata.go @@ -1,8 +1,10 @@ package generator import ( + "bytes" "errors" "fmt" + "io" "os" "path/filepath" "runtime" @@ -31,16 +33,17 @@ type Metadata struct { } // NewMetadata creates a new Key Generator. -func NewMetadata(logger log.Logger, tmpl string, data metadata.Metadata) *Metadata { +func NewMetadata(logger log.Logger, tmpl string, data metadata.Metadata, nowFunc func() time.Time) *Metadata { return &Metadata{ logger: logger, tmpl: tmpl, data: data, funcMap: template.FuncMap{ - "checksum": checksumFunc(logger), - "epoch": func() string { return strconv.FormatInt(time.Now().Unix(), EpochNumBase) }, - "arch": func() string { return runtime.GOARCH }, - "os": func() string { return runtime.GOOS }, + "checksum": checksumFunc(logger), + "hashFiles": hashFilesFunc(logger), + "epoch": func() string { return strconv.FormatInt(nowFunc().Unix(), EpochNumBase) }, + "arch": func() string { return runtime.GOARCH }, + "os": func() string { return runtime.GOOS }, }, } } @@ -89,29 +92,63 @@ func (g *Metadata) parseTemplate() (*template.Template, error) { func checksumFunc(logger log.Logger) func(string) string { return func(p string) string { - path, err := filepath.Abs(filepath.Clean(p)) - if err != nil { - level.Error(logger).Log("cache key template/checksum could not find file") + return fmt.Sprintf("%x", getFileHash(p, logger)) + } +} - return "" - } +func hashFilesFunc(logger log.Logger) func(...string) string { + return func(patterns ...string) string { + var readers []io.Reader - f, err := os.Open(path) - if err != nil { - level.Error(logger).Log("cache key template/checksum could not open file") + for _, pattern := range patterns { + paths, err := filepath.Glob(pattern) + if err != nil { + level.Error(logger).Log("could not parse file path as a glob pattern") + continue + } + for _, p := range paths { + readers = append(readers, bytes.NewReader(getFileHash(p, logger))) + } + } + + if len(readers) == 0 { + level.Debug(logger).Log("no matches found for glob") return "" } - defer internal.CloseWithErrLogf(logger, f, "checksum close defer") + level.Debug(logger).Log("found %d files to hash", len(readers)) - str, err := readerHasher(f) + h, err := readerHasher(readers...) if err != nil { - level.Error(logger).Log("cache key template/checksum could not generate hash") - + level.Error(logger).Log("could not generate the hash of the input files: %s", err.Error()) return "" } - return str + return fmt.Sprintf("%x", h) } } + +func getFileHash(path string, logger log.Logger) []byte { + path, err := filepath.Abs(filepath.Clean(path)) + if err != nil { + level.Error(logger).Log("cache key template/checksum could not find file") + return []byte{} + } + + f, err := os.Open(path) + if err != nil { + level.Error(logger).Log("cache key template/checksum could not open file") + return []byte{} + } + + defer internal.CloseWithErrLogf(logger, f, "checksum close defer") + + str, err := readerHasher(f) + if err != nil { + level.Error(logger).Log("cache key template/checksum could not generate hash") + return []byte{} + } + + return str +} diff --git a/key/generator/metadata_test.go b/key/generator/metadata_test.go index 9662bc39..4c964a53 100644 --- a/key/generator/metadata_test.go +++ b/key/generator/metadata_test.go @@ -1,8 +1,9 @@ package generator import ( + "runtime" "testing" - "text/template" + "time" "github.com/go-kit/log" "github.com/meltwater/drone-cache/internal/metadata" @@ -12,7 +13,7 @@ import ( func TestGenerate(t *testing.T) { t.Parallel() - l := log.NewNopLogger() + logger := log.NewNopLogger() for _, tt := range []struct { given string @@ -21,61 +22,28 @@ func TestGenerate(t *testing.T) { {`{{ .Repo.Name }}`, "RepoName"}, {`{{ checksum "checksum_file_test.txt"}}`, "04a29c732ecbce101c1be44c948a50c6"}, {`{{ checksum "../../docs/drone_env_vars.md"}}`, "f8b5b7f96f3ffaa828e4890aab290e59"}, + {`{{ hashFiles "" }}`, ""}, + {`{{ hashFiles "checksum_file_test.txt" }}`, "5c3544faf206777a2827f5db8fca3a9a"}, + {`{{ hashFiles "checksum_file_test.txt" "checksum_file_test.txt" }}`, "1ce4114d3f702eecca6de4fed10250f3"}, + {`{{ hashFiles "checksum_file_tes*.txt" }}`, "5c3544faf206777a2827f5db8fca3a9a"}, {`{{ epoch }}`, "1550563151"}, - {`{{ arch }}`, "amd64"}, - {`{{ os }}`, "darwin"}, + {`{{ arch }}`, runtime.GOARCH}, + {`{{ os }}`, runtime.GOOS}, } { tt := tt t.Run(tt.given, func(t *testing.T) { - g := Metadata{ - logger: l, - tmpl: tt.given, - data: metadata.Metadata{Repo: metadata.Repo{Name: "RepoName"}}, - funcMap: template.FuncMap{ - "checksum": checksumFunc(l), - "epoch": func() string { return "1550563151" }, - "arch": func() string { return "amd64" }, - "os": func() string { return "darwin" }, + g := NewMetadata( + logger, + tt.given, + metadata.Metadata{Repo: metadata.Repo{Name: "RepoName"}}, + func() time.Time { + return time.Unix(1550563151, 0) }, - } + ) actual, err := g.Generate(tt.given) test.Ok(t, err) - test.Equals(t, actual, tt.expected) - }) - } -} - -func TestParseTemplate(t *testing.T) { - t.Parallel() - - l := log.NewNopLogger() - - for _, tt := range []struct { - given string - }{ - {`{{ .Repo.Name }}`}, - {`{{ checksum "checksum_file_test.txt"}}`}, - {`{{ epoch }}`}, - {`{{ arch }}`}, - {`{{ os }}`}, - } { - tt := tt - t.Run(tt.given, func(t *testing.T) { - g := Metadata{ - logger: l, - tmpl: tt.given, - data: metadata.Metadata{Repo: metadata.Repo{Name: "RepoName"}}, - funcMap: template.FuncMap{ - "checksum": checksumFunc(l), - "epoch": func() string { return "1550563151" }, - "arch": func() string { return "amd64" }, - "os": func() string { return "darwin" }, - }, - } - - _, err := g.parseTemplate() - test.Ok(t, err) + test.Equals(t, tt.expected, actual) }) } } diff --git a/key/generator/util.go b/key/generator/util.go index 3b248872..ca5654dc 100644 --- a/key/generator/util.go +++ b/key/generator/util.go @@ -7,15 +7,15 @@ import ( ) // readerHasher generic md5 hash generater from io.Reader. -func readerHasher(readers ...io.Reader) (string, error) { +func readerHasher(readers ...io.Reader) ([]byte, error) { // Use go1.14 new hashmap functions. h := md5.New() // #nosec for _, r := range readers { if _, err := io.Copy(h, r); err != nil { - return "", fmt.Errorf("write reader as hash, %w", err) + return nil, fmt.Errorf("write reader as hash, %w", err) } } - return fmt.Sprintf("%x", h.Sum(nil)), nil + return h.Sum(nil), nil }