Skip to content

Commit

Permalink
deduplicate layer IDs to handle cases where duplicate layers are in t…
Browse files Browse the repository at this point in the history
…he layer tree

Signed-off-by: Jose R. Gonzalez <komish@flutes.dev>
  • Loading branch information
komish committed Jun 5, 2024
1 parent c8f6f72 commit 3bdeab0
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 34 deletions.
94 changes: 60 additions & 34 deletions internal/policy/container/has_modified_files.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,36 +60,69 @@ type packageFilesRef struct {
// Validate runs the check of whether any Red Hat files were modified
func (p *HasModifiedFilesCheck) Validate(ctx context.Context, imgRef image.ImageReference) (bool, error) {
fs := afero.NewOsFs()
layerIDs, packageFiles, packageDist, err := p.gatherDataToValidate(ctx, imgRef, fs)
layerIDs, packageFiles, err := p.gatherDataToValidate(ctx, imgRef, fs)
if err != nil {
return false, fmt.Errorf("could not generate modified files list: %v", err)
}

packageDist, err := p.parsePackageDist(ctx, imgRef.ImageFSPath, fs)
if err != nil {
return false, fmt.Errorf("could not generate modified files list: %v", err)
}

return p.validate(ctx, layerIDs, packageFiles, packageDist)
}

// parsePackageDist returns the platform's distribution value from the
// os-release contents in the extracted image.
func (p *HasModifiedFilesCheck) parsePackageDist(_ context.Context, extractedImageFSPath string, fs afero.Fs) (string, error) {
osRelease, err := fs.Open(filepath.Join(extractedImageFSPath, "etc", "os-release"))
if err != nil {
return "", fmt.Errorf("could not open os-release: %v", err)
}
defer osRelease.Close()
scanner := bufio.NewScanner(osRelease)
packageDist := "unknown"
for scanner.Scan() {
line := scanner.Text()
r, _ := regexp.Compile(`PLATFORM_ID="platform:([[:alnum:]]+)"`)
m := r.FindStringSubmatch(line)
if m == nil {
continue
}
packageDist = m[1]
break
}

if err := scanner.Err(); err != nil {
return "", fmt.Errorf("error while scanning for package dist: %v", err)
}

return packageDist, nil
}

// gatherDataToValidate returns a map from layer digest to a struct containing the list of files
// (packageFilesRef.LayerPackageFiles) installed via packages (packageFilesRef.LayerPackages)
// from the container image, and the list of files (packageFilesRef.LayerFiles) modified/added
// via layers in the image.
func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef image.ImageReference, fs afero.Fs) ([]string, map[string]packageFilesRef, string, error) {
func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef image.ImageReference, fs afero.Fs) ([]string, map[string]packageFilesRef, error) {
logger := logr.FromContextOrDiscard(ctx)

layerDir, err := afero.TempDir(fs, "", "rpm-layers-")
if err != nil {
return nil, nil, "", err
return nil, nil, err
}
defer func() {
_ = fs.RemoveAll(layerDir)
}()

if imgRef.ImageInfo == nil {
return nil, nil, "", fmt.Errorf("image reference invalid")
return nil, nil, fmt.Errorf("image reference invalid")
}

layers, err := imgRef.ImageInfo.Layers()
if err != nil {
return nil, nil, "", err
return nil, nil, err
}

layerIDs := make([]string, 0, len(layers))
Expand All @@ -102,21 +135,36 @@ func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef
for idx, layer := range layers {
layerIDHash, err := layer.Digest()
if err != nil {
return nil, nil, "", fmt.Errorf("unable to retrieve diff id for layer: %w", err)
return nil, nil, fmt.Errorf("unable to retrieve diff id for layer: %w", err)
}

// Capture the diff ID to aid in debugging. We don't technically care if
// there's an error returned here because we don't use the layerDiffID
// value for anything meaningful.
layerDiffID := "unknown"
layerDiffHash, err := layer.DiffID()
if err == nil && layerDiffHash.String() != "" {
layerDiffID = layerDiffHash.String()
}
layerID := layerIDHash.String()

layerDir := filepath.Join(layerDir, fmt.Sprintf("%02d-%s", idx, layerID))
rawLayerID := layerIDHash.String()
// Map everything using a combination of the layer index and the layer
// ID to avoid problems when images have multiple scattered layers with
// the same ID.
layerID := fmt.Sprintf("%02d-%s", idx, rawLayerID)
logger.V(log.TRC).Info("generating unique layer ID", "uniqueLayerID", layerID, "layerID", rawLayerID, "layerDiffID", layerDiffID)

layerDir := filepath.Join(layerDir, layerID)
err = fs.Mkdir(layerDir, 0o755)
if err != nil {
return nil, nil, "", fmt.Errorf("could not create layer directory: %w", err)
return nil, nil, fmt.Errorf("could not create layer directory: %w", err)
}

layerIDs = append(layerIDs, layerID)

files, err := generateChangesFor(ctx, layer)
if err != nil {
return nil, nil, "", err
return nil, nil, err
}

found, pkgList := findRPMDB(ctx, layer)
Expand All @@ -142,7 +190,7 @@ func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef

packageFiles, err := installedFileMapWithExclusions(ctx, pkgList)
if err != nil {
return nil, nil, "", err
return nil, nil, err
}

layerRefs[layerID] = packageFilesRef{
Expand All @@ -153,29 +201,7 @@ func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef
}
}

osRelease, err := fs.Open(filepath.Join(imgRef.ImageFSPath, "etc", "os-release"))
if err != nil {
return nil, nil, "", fmt.Errorf("could not open os-release: %v", err)
}
defer osRelease.Close()
scanner := bufio.NewScanner(osRelease)
packageDist := "unknown"
for scanner.Scan() {
line := scanner.Text()
r, _ := regexp.Compile(`PLATFORM_ID="platform:([[:alnum:]]+)"`)
m := r.FindStringSubmatch(line)
if m == nil {
continue
}
packageDist = m[1]
break
}

if err := scanner.Err(); err != nil {
return nil, nil, "", fmt.Errorf("error while scanning for package dist: %v", err)
}

return layerIDs, layerRefs, packageDist, nil
return layerIDs, layerRefs, nil
}

// validate compares the list of LayerFiles and PackageFiles to see what PackageFiles
Expand Down
34 changes: 34 additions & 0 deletions internal/policy/container/has_modified_files_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ import (
"context"
"path"

"github.com/google/go-containerregistry/pkg/crane"
"github.com/spf13/afero"

"github.com/redhat-openshift-ecosystem/openshift-preflight/internal/image"

"github.com/bombsimon/logrusr/v4"
Expand Down Expand Up @@ -402,6 +405,37 @@ var _ = Describe("HasModifiedFiles", func() {
})
})

When("multiple no-op layers with the same IDs split layers containing RPMDB modifications", func() {
// Test case ensures that we properly deduplicate layer hashes in our file mapping to avoid cases where
// a later layer with the same ID as an earlier layer doesn't overwrite the earlier layer's file mapping.
var img image.ImageReference
var actualLayerCount int
BeforeEach(func() {
// TODO: The containerfile that generates this test fixture is stored in-repo tests/containerfiles.
// The external call here avoids having to store the image locally. A crane-built image runs into
// issues because we cannot run `microdnf` commands using Crane, and need to have multiple layers
// containing RPMDBs to test this issue correctly.
const dupeLayerTestFixture = "quay.io/opdev/preflight-test-fixture:duplicate-layers"
cImg, pullError := crane.Pull(dupeLayerTestFixture)
Expect(pullError).ToNot(HaveOccurred())
img = image.ImageReference{
ImageInfo: cImg,
}

layers, err := img.ImageInfo.Layers()
Expect(err).ToNot(HaveOccurred())
actualLayerCount = len(layers)
})

It("should validate and have matching layer counts", func() {
fs := afero.NewOsFs()
layerIDs, layerRefs, err := hasModifiedFiles.gatherDataToValidate(context.TODO(), img, fs)
Expect(err).ToNot(HaveOccurred())
Expect(len(layerIDs)).To(Equal(actualLayerCount))
Expect(len(layerRefs)).To(Equal(actualLayerCount))
})
})

When("calling the top level Validate", func() {
It("should fail with an invalid ImageReference", func() {
passed, err := hasModifiedFiles.Validate(context.TODO(), image.ImageReference{})
Expand Down
15 changes: 15 additions & 0 deletions test/containerfiles/container-duplicate-noop-layers.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM registry.access.redhat.com/ubi8-minimal@sha256:9e458f41ff8868ceae00608a6fff35b45fd8bbe967bf8655e5ab08da5964f4d0

# This container file backs
# quay.io/opdev/preflight-test-fixture:duplicate-layers, and is intended to test
# an edge case with HasModifiedFiles where multiple duplicate layers were geting
# squashed into a single entry in our layer-to-file map, causing invalid
# modification flags.
#
# The produced artifact is about 100mb, so this fixture exists just to avoid
# having that blob stored in-repo.

COPY example-license.txt /LICENSE
RUN microdnf install gzip -y
COPY example-license.txt /LICENSE
RUN microdnf install gzip -y

0 comments on commit 3bdeab0

Please sign in to comment.