Skip to content

Commit

Permalink
feat: reporting all base images in _COLLECTED_ARTIFACTS (#457)
Browse files Browse the repository at this point in the history
In addition to reporting the base image of the target section,
all other base images are also collected and reported in
_COLLECTED_ARTIFACTS.
  • Loading branch information
miki725 authored Dec 10, 2024
1 parent 6d04642 commit c5ada18
Show file tree
Hide file tree
Showing 13 changed files with 238 additions and 92 deletions.
1 change: 1 addition & 0 deletions src/chalk_common.nim
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ type
cmd*: CmdInfo
shell*: ShellInfo
lastUser*: UserInfo
chalk*: ChalkObj

DockerEntrypoint* = tuple
entrypoint: EntryPointInfo
Expand Down
65 changes: 35 additions & 30 deletions src/docker/build.nim
Original file line number Diff line number Diff line change
Expand Up @@ -333,16 +333,16 @@ proc launchDockerSubscan(ctx: DockerInvocation,
result = runChalkSubScan(usableContexts, "insert").report
trace("docker: subscan complete.")

proc collectBaseImage(chalk: ChalkObj, baseSection: DockerFileSection) =
proc collectBaseImage(chalk: ChalkObj, section: DockerFileSection) =
trace("docker: collecting chalkmark from base image " &
$baseSection.image & " for " & $chalk.platform)
$section.image & " for " & $chalk.platform)
try:
let baseChalkOpt = scanImage(baseSection.image, platform = chalk.platform)
let baseChalkOpt = scanImage(section.image, platform = chalk.platform)
if baseChalkOpt.isNone():
trace("docker: base image could not be scanned")
return
let
baseChalk = baseChalkOpt.get(ChalkObj())
baseChalk = baseChalkOpt.get()
dict = chalk.collectedData
collected =
if baseChalk.collectedData != nil:
Expand All @@ -352,44 +352,49 @@ proc collectBaseImage(chalk: ChalkObj, baseSection: DockerFileSection) =
baseChalk.addToAllArtifacts()
baseChalk.collectedData["_OP_ARTIFACT_CONTEXT"] = pack("base")
chalk.baseChalk = baseChalk
if baseChalk.isMarked():
dict.setIfNeeded("DOCKER_BASE_IMAGE_METADATA_ID", baseChalk.extract["METADATA_ID"])
else:
trace("docker: base image is not chalked " & $baseSection.image)
if "_IMAGE_ID" in baseChalk.collectedData:
dict.setIfNeeded("DOCKER_BASE_IMAGE_ID", baseChalk.collectedData["_IMAGE_ID"])
section.chalk = baseChalk
if not baseChalk.isMarked():
trace("docker: base image is not chalked " & $section.image)
except:
trace("docker: unable to scan base image due to: " & getCurrentExceptionMsg())

proc collectBaseImages(chalk: ChalkObj, ctx: DockerInvocation) =
for section in ctx.getBasesDockerSections():
chalk.collectBaseImage(section)

proc collectBeforeChalkTime(chalk: ChalkObj, ctx: DockerInvocation) =
let
baseSection = ctx.getBaseDockerSection()
dict = chalk.collectedData
git = getPluginByName("vctl_git")
projectRootPath = git.gitFirstDir().parentDir()
dockerfileRelPath = getRelativePathBetween(projectRootPath, ctx.dockerFileLoc)
chalk.collectBaseImage(baseSection)
dict.setIfNeeded("DOCKERFILE_PATH", ctx.dockerFileLoc)
dict.setIfNeeded("DOCKERFILE_PATH_WITHIN_VCTL", dockerfileRelPath)
dict.setIfNeeded("DOCKER_ADDITIONAL_CONTEXTS", ctx.foundExtraContexts)
dict.setIfNeeded("DOCKER_CONTEXT", ctx.foundContext)
dict.setIfNeeded("DOCKER_FILE", ctx.inDockerFile)
dict.setIfNeeded("DOCKER_PLATFORM", $(chalk.platform.normalize()))
dict.setIfNeeded("DOCKER_PLATFORMS", $(ctx.platforms.normalize()))
dict.setIfNeeded("DOCKER_LABELS", ctx.foundLabels)
dict.setIfNeeded("DOCKER_ANNOTATIONS", ctx.foundAnnotations)
dict.setIfNeeded("DOCKER_TAGS", ctx.foundTags.asRepoTag())
dict.setIfNeeded("DOCKER_BASE_IMAGE", $(baseSection.image))
dict.setIfNeeded("DOCKER_BASE_IMAGE_REPO", baseSection.image.repo)
dict.setIfNeeded("DOCKER_BASE_IMAGE_REGISTRY", baseSection.image.registry)
dict.setIfNeeded("DOCKER_BASE_IMAGE_NAME", baseSection.image.name)
dict.setIfNeeded("DOCKER_BASE_IMAGE_TAG", baseSection.image.tag)
dict.setIfNeeded("DOCKER_BASE_IMAGE_DIGEST", baseSection.image.digest)
dict.setIfNeeded("DOCKER_BASE_IMAGES", ctx.formatBaseImages())
dict.setIfNeeded("DOCKER_COPY_IMAGES", ctx.formatCopyImages())
chalk.collectBaseImages(ctx)
if chalk.baseChalk != nil:
if chalk.baseChalk.isMarked():
dict.setIfNeeded("DOCKER_BASE_IMAGE_METADATA_ID", chalk.baseChalk.extract["METADATA_ID"])
dict.setIfNeeded("DOCKER_BASE_IMAGE_ID", chalk.baseChalk.collectedData.getOrDefault("_IMAGE_ID"))
dict.setIfNeeded("DOCKERFILE_PATH", ctx.dockerFileLoc)
dict.setIfNeeded("DOCKERFILE_PATH_WITHIN_VCTL", dockerfileRelPath)
dict.setIfNeeded("DOCKER_ADDITIONAL_CONTEXTS", ctx.foundExtraContexts)
dict.setIfNeeded("DOCKER_CONTEXT", ctx.foundContext)
dict.setIfNeeded("DOCKER_FILE", ctx.inDockerFile)
dict.setIfNeeded("DOCKER_PLATFORM", $(chalk.platform.normalize()))
dict.setIfNeeded("DOCKER_PLATFORMS", $(ctx.platforms.normalize()))
dict.setIfNeeded("DOCKER_LABELS", ctx.foundLabels)
dict.setIfNeeded("DOCKER_ANNOTATIONS", ctx.foundAnnotations)
dict.setIfNeeded("DOCKER_TAGS", ctx.foundTags.asRepoTag())
dict.setIfNeeded("DOCKER_BASE_IMAGE", $(baseSection.image))
dict.setIfNeeded("DOCKER_BASE_IMAGE_REPO", baseSection.image.repo)
dict.setIfNeeded("DOCKER_BASE_IMAGE_REGISTRY", baseSection.image.registry)
dict.setIfNeeded("DOCKER_BASE_IMAGE_NAME", baseSection.image.name)
dict.setIfNeeded("DOCKER_BASE_IMAGE_TAG", baseSection.image.tag)
dict.setIfNeeded("DOCKER_BASE_IMAGE_DIGEST", baseSection.image.digest)
dict.setIfNeeded("DOCKER_BASE_IMAGES", ctx.formatBaseImages())
dict.setIfNeeded("DOCKER_COPY_IMAGES", ctx.formatCopyImages())
# note this key is expected to be empty string for alias-less targets
# hence setIfSubscribed vs setIfNeeded which doesnt allow to set empty strings
dict.setIfSubscribed("DOCKER_TARGET", ctx.getTargetDockerSection().alias)
dict.setIfSubscribed("DOCKER_TARGET", ctx.getTargetDockerSection().alias)

proc collectBeforeBuild*(chalk: ChalkObj, ctx: DockerInvocation) =
let dict = chalk.collectedData
Expand Down
5 changes: 3 additions & 2 deletions src/docker/collect.nim
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,9 @@ proc collectImageFrom(chalk: ChalkObj,
imageRepo = manifest.asImageRepo(tag = repo.tag)
annotations.update(manifest.annotations)
chalk.repos[repo.repo] = imageRepo + chalk.repos.getOrDefault(repo.repo)
layers = @[]
for layer in manifest.layers:
layers.add(layer.digest)
layers.add(layer.digest.extractDockerHash())
except:
trace("docker: " & getCurrentExceptionMsg())
continue
Expand Down Expand Up @@ -278,7 +279,7 @@ proc collectImageFrom(chalk: ChalkObj,
chalk.setIfNeeded("_IMAGE_ANNOTATIONS", annotations.nimJsonToBox())
chalk.setIfNeeded("COMMIT_ID", annotations{"org.opencontainers.image.revision"}.getStr())
let source = annotations{"org.opencontainers.image.source"}.getStr()
if isGitContext(source):
if isGitContext(source, requireExtension = false):
let (remote, head, subdir) = splitContext(source)
chalk.setIfNeeded("ORIGIN_URI", remote)

Expand Down
17 changes: 17 additions & 0 deletions src/docker/dockerfile.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,15 @@ proc getBaseDockerSection*(ctx: DockerInvocation): DockerFileSection =
for s in ctx.getBaseDockerSections():
return s

iterator getBasesDockerSections*(ctx: DockerInvocation): DockerFileSection =
## iterator to get only bases across all sections of dockerfile
var seen = newSeq[DockerFileSection]()
for s in ctx.dfSections:
let base = ctx.getBaseDockerSection(s)
if base notin seen:
seen.add(base)
yield base

proc formatBaseImage(ctx: DockerInvocation, section: DockerFileSection): TableRef[string, string] =
let base = ctx.getBaseDockerSection(section)
result = newTable[string, string]()
Expand All @@ -1028,6 +1037,14 @@ proc formatBaseImage(ctx: DockerInvocation, section: DockerFileSection): TableRe
result["tag"] = base.image.tag
if base.image.digest != "":
result["digest"] = base.image.digest
if base.chalk != nil:
let
config = unpack[string](base.chalk.collectedData.getOrDefault("_IMAGE_ID", pack("")))
metadata = unpack[string](base.chalk.collectedData.getOrDefault("_METADATA_ID", pack("")))
if config != "":
result["config_digest"] = config
if metadata != "":
result["metadata_id"] = metadata

proc formatBaseImages*(ctx: DockerInvocation): ChalkDict =
result = ChalkDict()
Expand Down
11 changes: 7 additions & 4 deletions src/docker/git.nim
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,13 @@ proc createTempKnownHosts(data: string): string =
let path = writeNewTempFile(data)
return path

proc isHttpGitContext(context: string): bool =
proc isHttpGitContext(context: string, requireExtension = true): bool =
if context.startsWith("http://") or context.startsWith("https://"):
let uri = parseUri(context)
return uri.path.endsWith(".git")
if requireExtension:
return uri.path.endsWith(".git")
else:
return true
return false

proc isSSHGitContext(context: string): bool =
Expand All @@ -64,8 +67,8 @@ proc isSSHGitContext(context: string): bool =
return uri.path.endsWith(".git")
return false

proc isGitContext*(context: string): bool =
return isHttpGitContext(context) or isSSHGitContext(context)
proc isGitContext*(context: string, requireExtension = true): bool =
return isHttpGitContext(context, requireExtension) or isSSHGitContext(context)

proc splitContext*(context: string): (string, string, string) =
let
Expand Down
8 changes: 7 additions & 1 deletion src/docker/scan.nim
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
##
## scan - create new chalk object and collect docker info into it

import ".."/[config, plugin_api]
import ".."/[config, plugin_api, util]
import "."/[collect, ids, inspect, extract]

proc scanLocalImage*(item: string): Option[ChalkObj] =
Expand All @@ -36,6 +36,12 @@ proc scanImage*(item: DockerImage, platform: DockerPlatform): Option[ChalkObj] =
chalk.collectImage(item)
except:
return none(ChalkObj)
# if we already collected the same image before, return the same pointer
# so that we do not duplicate collected artifacts
for artifact in getAllChalks() & getAllArtifacts():
if artifact.collectedData.getOrDefault("_IMAGE_ID") == chalk.collectedData["_IMAGE_ID"]:
artifact.collectedData.merge(chalk.collectedData)
chalk = artifact
try:
chalk.extractImage()
except:
Expand Down
6 changes: 5 additions & 1 deletion src/plugins/system.nim
Original file line number Diff line number Diff line change
Expand Up @@ -215,12 +215,16 @@ proc sysGetRunTimeHostInfo*(self: Plugin, objs: seq[ChalkObj]):
if len(cachedSearchPath) != 0:
result.setIfNeeded("_OP_SEARCH_PATH", cachedSearchPath)

var chalks = 0
for i in objs:
if i.isMarked():
chalks += 1
result.setIfNeeded("_OPERATION", getBaseCommandName())
result.setIfNeeded("_EXEC_ID", execId)
result.setIfNeeded("_OP_CHALKER_VERSION", getChalkExeVersion())
result.setIfNeeded("_OP_PLATFORM", getChalkPlatform())
result.setIfNeeded("_OP_CHALKER_COMMIT_ID", getChalkCommitId())
result.setIfNeeded("_OP_CHALK_COUNT", len(getAllChalks()) - len(getUnmarked()))
result.setIfNeeded("_OP_CHALK_COUNT", chalks)
result.setIfNeeded("_OP_EXE_NAME", getMyAppPath().splitPath().tail)
result.setIfNeeded("_OP_EXE_PATH", getAppDir())
result.setIfNeeded("_OP_ARGV", @[getMyAppPath()] & commandLineParams())
Expand Down
32 changes: 18 additions & 14 deletions tests/functional/chalk/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from ..conf import MAGIC
from ..utils.bin import sha256
from ..utils.dict import ContainsMixin, MISSING, ANY, IfExists
from ..utils.dict import ContainsDict, MISSING, ANY, IfExists, ContainsList
from ..utils.docker import Docker
from ..utils.log import get_logger
from ..utils.os import CalledProcessError, Program, run
Expand Down Expand Up @@ -46,7 +46,7 @@ def artifact_type(path: Path) -> str:
return "ELF"


class ChalkReport(ContainsMixin, dict):
class ChalkReport(ContainsDict):
name = "report"

def __init__(self, report: dict[str, Any]):
Expand Down Expand Up @@ -76,16 +76,18 @@ def deterministic(self, ignore: Optional[set[str]] = None):
@property
def marks(self):
assert len(self["_CHALKS"]) > 0
return [ChalkMark(i, report=self) for i in self["_CHALKS"]]
return ContainsList([ChalkMark(i, report=self) for i in self["_CHALKS"]])

@property
def artifacts(self):
assert len(self["_COLLECTED_ARTIFACTS"]) > 0
return [ChalkMark(i, report=self) for i in self["_COLLECTED_ARTIFACTS"]]
return ContainsList(
[ChalkMark(i, report=self) for i in self["_COLLECTED_ARTIFACTS"]]
)

@property
def marks_by_path(self):
return ContainsMixin(
return ChalkMark(
{
i.get("PATH_WHEN_CHALKED", i.get("_OP_ARTIFACT_PATH")): i
for i in self.marks
Expand Down Expand Up @@ -122,7 +124,7 @@ def from_json(cls, data: str):
return cls(info if isinstance(info, dict) else info[0])


class ChalkMark(ContainsMixin, dict):
class ChalkMark(ContainsDict):
name = "mark"

@classmethod
Expand Down Expand Up @@ -224,7 +226,7 @@ def reports(self):
break
else:
break
return [ChalkReport(i) for i in reports]
return ContainsList([ChalkReport(i) for i in reports])

@property
def report(self):
Expand Down Expand Up @@ -263,9 +265,9 @@ def virtual_path(self):
@property
def vmarks(self):
assert self.virtual_path.exists()
return [
ChalkMark.from_json(i) for i in self.virtual_path.read_text().splitlines()
]
return ContainsList(
[ChalkMark.from_json(i) for i in self.virtual_path.read_text().splitlines()]
)

@property
def vmark(self):
Expand All @@ -279,10 +281,12 @@ def logged_reports_path(self):

@property
def logged_reports(self):
return [
ChalkReport.from_json(json.loads(i)["$message"])
for i in self.logged_reports_path.read_text().splitlines()
]
return ContainsList(
[
ChalkReport.from_json(json.loads(i)["$message"])
for i in self.logged_reports_path.read_text().splitlines()
]
)

@property
def logged_report(self):
Expand Down
2 changes: 2 additions & 0 deletions tests/functional/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ ignore =
E203
# black formats indents
E131
# black formats all statements
E701

[mypy]
ignore_missing_imports = true
Loading

0 comments on commit c5ada18

Please sign in to comment.