From 5ab393ab001b3fbd909d478e98a6709334af3132 Mon Sep 17 00:00:00 2001 From: Lucas Rodriguez Date: Tue, 9 Apr 2024 03:20:45 -0500 Subject: [PATCH 1/2] fix: remove duplicate logic for writing image layers to disk concurrently (#2409) ## Description remove duplicate logic for writing image layers to disk concurrently crane does this already in `WriteImage()`: https://github.com/defenseunicorns/zarf/blob/08c92e12a4a2b05d0ea5abe055c7c01ba9964051/src/internal/packager/images/pull.go#L342 https://github.com/google/go-containerregistry/blob/8dadbe76ff8c20d0e509406f04b7eade43baa6c1/pkg/v1/layout/write.go#L333-L340 ## Type of change - [x] Other (security config, docs update, etc) ## Checklist before merging - [x] Test, docs, adr added or updated as needed - [x] [Contributor Guide Steps](https://github.com/defenseunicorns/zarf/blob/main/CONTRIBUTING.md#developer-workflow) followed Co-authored-by: Austin Abro <37223396+AustinAbro321@users.noreply.github.com> --- src/internal/packager/images/pull.go | 164 +-------------------------- 1 file changed, 2 insertions(+), 162 deletions(-) diff --git a/src/internal/packager/images/pull.go b/src/internal/packager/images/pull.go index de5ab03ff2..df839e4f07 100644 --- a/src/internal/packager/images/pull.go +++ b/src/internal/packager/images/pull.go @@ -6,10 +6,7 @@ package images import ( "context" - "errors" "fmt" - "io" - "os" "path/filepath" "strings" @@ -28,7 +25,6 @@ import ( "github.com/google/go-containerregistry/pkg/v1/empty" clayout "github.com/google/go-containerregistry/pkg/v1/layout" "github.com/google/go-containerregistry/pkg/v1/partial" - "github.com/google/go-containerregistry/pkg/v1/stream" "github.com/moby/moby/client" ) @@ -182,170 +178,16 @@ func (i *ImageConfig) PullAll() ([]ImgInfo, error) { updateText := fmt.Sprintf("Pulling %d images", imageCount) go utils.RenderProgressBarForLocalDirWrite(i.ImagesPath, totalBytes, doneSaving, updateText, updateText) - // Spawn a goroutine for each layer to write it to disk using crane - - layerWritingConcurrency := helpers.NewConcurrencyTools[bool, error](len(processedLayers)) - - defer layerWritingConcurrency.Cancel() - - for _, layer := range processedLayers { - layer := layer - // Function is a combination of https://github.com/google/go-containerregistry/blob/v0.15.2/pkg/v1/layout/write.go#L270-L305 - // and https://github.com/google/go-containerregistry/blob/v0.15.2/pkg/v1/layout/write.go#L198-L262 - // with modifications. This allows us to dedupe layers for all images and write them concurrently. - go func() { - digest, err := layer.Digest() - if errors.Is(err, stream.ErrNotComputed) { - // Allow digest errors, since streams may not have calculated the hash - // yet. Instead, use an empty value, which will be transformed into a - // random file name with `os.CreateTemp` and the final digest will be - // calculated after writing to a temp file and before renaming to the - // final path. - digest = v1.Hash{Algorithm: "sha256", Hex: ""} - } else if err != nil { - layerWritingConcurrency.ErrorChan <- err - return - } - - size, err := layer.Size() - if errors.Is(err, stream.ErrNotComputed) { - // Allow size errors, since streams may not have calculated the size - // yet. Instead, use -1 as a sentinel value meaning that no size - // comparison can be done and any sized blob file should be considered - // valid and not overwritten. - // - // TODO: Provide an option to always overwrite blobs. - size = -1 - } else if err != nil { - layerWritingConcurrency.ErrorChan <- err - return - } - - if layerWritingConcurrency.IsDone() { - return - } - - readCloser, err := layer.Compressed() - if err != nil { - layerWritingConcurrency.ErrorChan <- err - return - } - - // Create the directory for the blob if it doesn't exist - dir := filepath.Join(string(cranePath), "blobs", digest.Algorithm) - if err := helpers.CreateDirectory(dir, os.ModePerm); err != nil { - layerWritingConcurrency.ErrorChan <- err - return - } - - if layerWritingConcurrency.IsDone() { - return - } - - // Check if blob already exists and is the correct size - file := filepath.Join(dir, digest.Hex) - if s, err := os.Stat(file); err == nil && !s.IsDir() && (s.Size() == size || size == -1) { - layerWritingConcurrency.ProgressChan <- true - return - } - - if layerWritingConcurrency.IsDone() { - return - } - - // Write to a temporary file - w, err := os.CreateTemp(dir, digest.Hex) - if err != nil { - layerWritingConcurrency.ErrorChan <- err - return - } - // Delete temp file if an error is encountered before renaming - defer func() { - if err := os.Remove(w.Name()); err != nil && !errors.Is(err, os.ErrNotExist) { - message.Warnf("error removing temporary file after encountering an error while writing blob: %v", err) - } - }() - - defer w.Close() - - if layerWritingConcurrency.IsDone() { - return - } - - // Write to file rename - if n, err := io.Copy(w, readCloser); err != nil { - layerWritingConcurrency.ErrorChan <- err - return - } else if size != -1 && n != size { - layerWritingConcurrency.ErrorChan <- fmt.Errorf("expected blob size %d, but only wrote %d", size, n) - return - } - - if layerWritingConcurrency.IsDone() { - return - } - - // Always close reader before renaming, since Close computes the digest in - // the case of streaming layers. If Close is not called explicitly, it will - // occur in a goroutine that is not guaranteed to succeed before renamer is - // called. When renamer is the layer's Digest method, it can return - // ErrNotComputed. - if err := readCloser.Close(); err != nil { - layerWritingConcurrency.ErrorChan <- err - return - } - - // Always close file before renaming - if err := w.Close(); err != nil { - layerWritingConcurrency.ErrorChan <- err - return - } - - // Rename file based on the final hash - renamePath := filepath.Join(string(cranePath), "blobs", digest.Algorithm, digest.Hex) - os.Rename(w.Name(), renamePath) - - if layerWritingConcurrency.IsDone() { - return - } - - layerWritingConcurrency.ProgressChan <- true - }() - } - - onLayerWritingError := func(err error) error { - // Send a signal to the progress bar that we're done and wait for the thread to finish - doneSaving <- err - <-doneSaving - message.WarnErr(err, "Failed to write image layers, trying again up to 3 times...") - if strings.HasPrefix(err.Error(), "expected blob size") { - message.Warnf("Potential image cache corruption: %s - try clearing cache with \"zarf tools clear-cache\"", err.Error()) - } - return err - } - - if err := layerWritingConcurrency.WaitWithoutProgress(onLayerWritingError); err != nil { - return nil, err - } - imageSavingConcurrency := helpers.NewConcurrencyTools[digestInfo, error](len(refInfoToImage)) defer imageSavingConcurrency.Cancel() // Spawn a goroutine for each image to write it's config and manifest to disk using crane - // All layers should already be in place so this should be extremely fast for refInfo, img := range refInfoToImage { // Create a closure so that we can pass the refInfo and img into the goroutine refInfo, img := refInfo, img go func() { - // Save the image via crane - err := cranePath.WriteImage(img) - - if imageSavingConcurrency.IsDone() { - return - } - - if err != nil { + if err := cranePath.WriteImage(img); err != nil { // Check if the cache has been invalidated, and warn the user if so if strings.HasPrefix(err.Error(), "error writing layer: expected blob size") { message.Warnf("Potential image cache corruption: %s - try clearing cache with \"zarf tools clear-cache\"", err.Error()) @@ -390,15 +232,13 @@ func (i *ImageConfig) PullAll() ([]ImgInfo, error) { } // for every image sequentially append OCI descriptor - for refInfo, img := range refInfoToImage { desc, err := partial.Descriptor(img) if err != nil { return nil, err } - cranePath.AppendDescriptor(*desc) - if err != nil { + if err := cranePath.AppendDescriptor(*desc); err != nil { return nil, err } From 2982c00745b6461740dad1ed719f1cab539053b6 Mon Sep 17 00:00:00 2001 From: Wayne Starr Date: Tue, 9 Apr 2024 07:44:08 -0600 Subject: [PATCH 2/2] feat: add option to skip cosign lookup during find images (#2427) ## Description For larger packages it can take a long time to parse through the cosign lookups, having an option to not do this is nice for faster feedback cycles. ## Related Issue Fixes #N/A ## Type of change - [ ] Bug fix (non-breaking change which fixes an issue) - [X] New feature (non-breaking change which adds functionality) - [ ] Other (security config, docs update, etc) ## Checklist before merging - [X] Test, docs, adr added or updated as needed - [X] [Contributor Guide Steps](https://github.com/defenseunicorns/zarf/blob/main/CONTRIBUTING.md#developer-workflow) followed Co-authored-by: Austin Abro <37223396+AustinAbro321@users.noreply.github.com> --- .../100-cli-commands/zarf_dev_find-images.md | 1 + src/cmd/dev.go | 2 + src/config/lang/english.go | 17 ++++---- src/pkg/packager/prepare.go | 42 ++++++++++--------- src/types/runtime.go | 3 +- 5 files changed, 36 insertions(+), 29 deletions(-) diff --git a/docs/2-the-zarf-cli/100-cli-commands/zarf_dev_find-images.md b/docs/2-the-zarf-cli/100-cli-commands/zarf_dev_find-images.md index 221ee2020a..6f0c335aec 100644 --- a/docs/2-the-zarf-cli/100-cli-commands/zarf_dev_find-images.md +++ b/docs/2-the-zarf-cli/100-cli-commands/zarf_dev_find-images.md @@ -23,6 +23,7 @@ zarf dev find-images [ PACKAGE ] [flags] --kube-version string Override the default helm template KubeVersion when performing a package chart template --registry-url string Override the ###ZARF_REGISTRY### value (default "127.0.0.1:31999") -p, --repo-chart-path string If git repos hold helm charts, often found with gitops tools, specify the chart path, e.g. "/" or "/chart" + --skip-cosign Skip searching for cosign artifacts related to discovered images --why string Prints the source manifest for the specified image ``` diff --git a/src/cmd/dev.go b/src/cmd/dev.go index 92a46e3bcf..068565f7c7 100644 --- a/src/cmd/dev.go +++ b/src/cmd/dev.go @@ -301,6 +301,8 @@ func init() { devFindImagesCmd.Flags().StringVar(&pkgConfig.FindImagesOpts.KubeVersionOverride, "kube-version", "", lang.CmdDevFlagKubeVersion) // check which manifests are using this particular image devFindImagesCmd.Flags().StringVar(&pkgConfig.FindImagesOpts.Why, "why", "", lang.CmdDevFlagFindImagesWhy) + // skip searching cosign artifacts in find images + devFindImagesCmd.Flags().BoolVar(&pkgConfig.FindImagesOpts.SkipCosign, "skip-cosign", false, lang.CmdDevFlagFindImagesSkipCosign) defaultRegistry := fmt.Sprintf("%s:%d", helpers.IPV4Localhost, types.ZarfInClusterContainerRegistryNodePort) devFindImagesCmd.Flags().StringVar(&pkgConfig.FindImagesOpts.RegistryURL, "registry-url", defaultRegistry, lang.CmdDevFlagFindImagesRegistry) diff --git a/src/config/lang/english.go b/src/config/lang/english.go index 9c9219a231..658ebab539 100644 --- a/src/config/lang/english.go +++ b/src/config/lang/english.go @@ -385,13 +385,14 @@ $ zarf package pull oci://ghcr.io/defenseunicorns/packages/dos-games:1.0.0 -a sk "NOTE: This file must not already exist. If no filename is provided, the config will be written to the current working directory as zarf-config.toml." CmdDevGenerateConfigErr = "Unable to write the config file %s, make sure the file doesn't already exist" - CmdDevFlagExtractPath = `The path inside of an archive to use to calculate the sha256sum (i.e. for use with "files.extractPath")` - CmdDevFlagSet = "Specify package variables to set on the command line (KEY=value). Note, if using a config file, this will be set by [package.create.set]." - CmdDevFlagRepoChartPath = `If git repos hold helm charts, often found with gitops tools, specify the chart path, e.g. "/" or "/chart"` - CmdDevFlagGitAccount = "User or organization name for the git account that the repos are created under." - CmdDevFlagKubeVersion = "Override the default helm template KubeVersion when performing a package chart template" - CmdDevFlagFindImagesRegistry = "Override the ###ZARF_REGISTRY### value" - CmdDevFlagFindImagesWhy = "Prints the source manifest for the specified image" + CmdDevFlagExtractPath = `The path inside of an archive to use to calculate the sha256sum (i.e. for use with "files.extractPath")` + CmdDevFlagSet = "Specify package variables to set on the command line (KEY=value). Note, if using a config file, this will be set by [package.create.set]." + CmdDevFlagRepoChartPath = `If git repos hold helm charts, often found with gitops tools, specify the chart path, e.g. "/" or "/chart"` + CmdDevFlagGitAccount = "User or organization name for the git account that the repos are created under." + CmdDevFlagKubeVersion = "Override the default helm template KubeVersion when performing a package chart template" + CmdDevFlagFindImagesRegistry = "Override the ###ZARF_REGISTRY### value" + CmdDevFlagFindImagesWhy = "Prints the source manifest for the specified image" + CmdDevFlagFindImagesSkipCosign = "Skip searching for cosign artifacts related to discovered images" CmdDevLintShort = "Lints the given package for valid schema and recommended practices" CmdDevLintLong = "Verifies the package schema, checks if any variables won't be evaluated, and checks for unpinned images/repos/files" @@ -477,7 +478,7 @@ $ zarf tools registry digest reg.example.com/stefanprodan/podinfo:6.4.0 CmdToolsGetGitPasswdShort = "[Deprecated] Returns the push user's password for the Git server" CmdToolsGetGitPasswdLong = "[Deprecated] Reads the password for a user with push access to the configured Git server in Zarf State. Note that this command has been replaced by 'zarf tools get-creds git' and will be removed in Zarf v1.0.0." CmdToolsGetGitPasswdDeprecation = "Deprecated: This command has been replaced by 'zarf tools get-creds git' and will be removed in Zarf v1.0.0." - CmdToolsYqExample = ` + CmdToolsYqExample = ` # yq defaults to 'eval' command if no command is specified. See "zarf tools yq eval --help" for more examples. # read the "stuff" node from "myfile.yml" diff --git a/src/pkg/packager/prepare.go b/src/pkg/packager/prepare.go index 96bbe10ac8..407ddcac4c 100644 --- a/src/pkg/packager/prepare.go +++ b/src/pkg/packager/prepare.go @@ -313,29 +313,31 @@ func (p *Packager) findImages() (imgMap map[string][]string, err error) { spinner.Success() - // Handle cosign artifact lookups - if len(imagesMap[component.Name]) > 0 { - var cosignArtifactList []string - spinner := message.NewProgressSpinner("Looking up cosign artifacts for discovered images (0/%d)", len(imagesMap[component.Name])) - defer spinner.Stop() - - for idx, image := range imagesMap[component.Name] { - spinner.Updatef("Looking up cosign artifacts for discovered images (%d/%d)", idx+1, len(imagesMap[component.Name])) - cosignArtifacts, err := utils.GetCosignArtifacts(image) - if err != nil { - message.WarnErrf(err, "Problem looking up cosign artifacts for %s: %s", image, err.Error()) - erroredCosignLookups = append(erroredCosignLookups, image) + if !p.cfg.FindImagesOpts.SkipCosign { + // Handle cosign artifact lookups + if len(imagesMap[component.Name]) > 0 { + var cosignArtifactList []string + spinner := message.NewProgressSpinner("Looking up cosign artifacts for discovered images (0/%d)", len(imagesMap[component.Name])) + defer spinner.Stop() + + for idx, image := range imagesMap[component.Name] { + spinner.Updatef("Looking up cosign artifacts for discovered images (%d/%d)", idx+1, len(imagesMap[component.Name])) + cosignArtifacts, err := utils.GetCosignArtifacts(image) + if err != nil { + message.WarnErrf(err, "Problem looking up cosign artifacts for %s: %s", image, err.Error()) + erroredCosignLookups = append(erroredCosignLookups, image) + } + cosignArtifactList = append(cosignArtifactList, cosignArtifacts...) } - cosignArtifactList = append(cosignArtifactList, cosignArtifacts...) - } - spinner.Success() + spinner.Success() - if len(cosignArtifactList) > 0 { - imagesMap[component.Name] = append(imagesMap[component.Name], cosignArtifactList...) - componentDefinition += fmt.Sprintf(" # Cosign artifacts for images - %s - %s\n", p.cfg.Pkg.Metadata.Name, component.Name) - for _, cosignArtifact := range cosignArtifactList { - componentDefinition += fmt.Sprintf(" - %s\n", cosignArtifact) + if len(cosignArtifactList) > 0 { + imagesMap[component.Name] = append(imagesMap[component.Name], cosignArtifactList...) + componentDefinition += fmt.Sprintf(" # Cosign artifacts for images - %s - %s\n", p.cfg.Pkg.Metadata.Name, component.Name) + for _, cosignArtifact := range cosignArtifactList { + componentDefinition += fmt.Sprintf(" - %s\n", cosignArtifact) + } } } } diff --git a/src/types/runtime.go b/src/types/runtime.go index 1ee40f91a0..4df0299175 100644 --- a/src/types/runtime.go +++ b/src/types/runtime.go @@ -57,7 +57,8 @@ type ZarfFindImagesOptions struct { RepoHelmChartPath string `json:"repoHelmChartPath" jsonschema:"description=Path to the helm chart directory"` KubeVersionOverride string `json:"kubeVersionOverride" jsonschema:"description=Kubernetes version to use for the helm chart"` RegistryURL string `json:"registryURL" jsonschema:"description=Manual override for ###ZARF_REGISTRY###"` - Why string `json:"why" jsonschema:"description=Find the location of the image given as an argument and print it to the console."` + Why string `json:"why" jsonschema:"description=Find the location of the image given as an argument and print it to the console"` + SkipCosign bool `json:"skip-cosign" jsonschema:"description=Optionally skip lookup of cosign artifacts when finding images"` } // ZarfDeployOptions tracks the user-defined preferences during a package deploy.