Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmd/prune-containers: add a GC script for containers images #3826

Merged
merged 2 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/coreos-assembler.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ var buildCommands = []string{"init", "fetch", "build", "run", "prune", "clean",
var advancedBuildCommands = []string{"buildfetch", "buildupload", "oc-adm-release", "push-container"}
var buildextendCommands = []string{"aliyun", "applehv", "aws", "azure", "digitalocean", "exoscale", "extensions-container", "gcp", "hashlist-experimental", "hyperv", "ibmcloud", "kubevirt", "live", "metal", "metal4k", "nutanix", "openstack", "qemu", "secex", "virtualbox", "vmware", "vultr"}

var utilityCommands = []string{"aws-replicate", "compress", "copy-container", "koji-upload", "kola", "push-container-manifest", "remote-build-container", "cloud-prune", "remote-session", "sign", "tag", "update-variant"}
var utilityCommands = []string{"aws-replicate", "cloud-prune", "compress", "container-prune", "copy-container", "koji-upload", "kola", "push-container-manifest", "remote-build-container", "remote-session", "sign", "tag", "update-variant"}
var otherCommands = []string{"shell", "meta"}

func init() {
Expand Down
32 changes: 12 additions & 20 deletions src/cmd-cloud-prune
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
# Example of policy.yaml
# rawhide:
# # all cloud images
# cloud-uploads: 2 years
# cloud-uploads: 2y
# # artifacts in meta.json's `images` key
# images: 2 years
# images: 2y
# images-keep: [qemu, live-iso]
# build: 3 years
# build: 3y
# containers: 2w
# The script also updates the builds.json for the respective stream by
# adding the policy-cleanup key when we set the upload_builds_json flag.
# It adds the relevant actions completed to that key
Expand Down Expand Up @@ -44,7 +45,8 @@ from cosalib.gcp import remove_gcp_image
from cosalib.aws import deregister_aws_resource
from cosalib.builds import BUILDFILES
from cosalib.s3 import s3_copy
from cosalib.cmdlib import parse_fcos_version_to_timestamp
from cosalib.cmdlib import parse_fcos_version_to_timestamp_and_stream
from cosalib.cmdlib import convert_duration_to_days

Build = collections.namedtuple("Build", ["id", "images", "arch", "meta_json"])
# set metadata caching to 5m
Expand Down Expand Up @@ -105,17 +107,17 @@ def main():
for action in ['cloud-uploads', 'images', 'build']:
if action not in policy[stream]:
continue
duration = get_period_in_months(policy[stream][action])
ref_date = today_date - relativedelta(months=int(duration))
duration = convert_duration_to_days(policy[stream][action])
ref_date = today_date - relativedelta(days=int(duration))

print(f"Pruning resources of type {action} older than {duration} months ({ref_date.date()}) on stream {stream}")
print(f"Pruning resources of type {action} older than {policy[stream][action]} ({ref_date.date()}) on stream {stream}")
# Enumerating in reverse to go from the oldest build to the newest one
for index, build in enumerate(reversed(builds_json_data["builds"])):
build_id = build["id"]
if action in build.get("policy-cleanup", []):
print(f"Build {build_id} has already had {action} pruning completed")
continue
build_date = parse_fcos_version_to_timestamp(build_id)
(build_date, _) = parse_fcos_version_to_timestamp_and_stream(build_id)

if build_date >= ref_date:
break
Expand Down Expand Up @@ -172,8 +174,8 @@ def validate_policy(stream, policy):
actions = policy[stream]
if 'cloud-uploads' not in actions:
raise Exception("Pruning for cloud-uploads must be set before we prune the builds")
cloud_uploads_duration = get_period_in_months(actions["cloud-uploads"])
build_duration = get_period_in_months(actions["build"])
cloud_uploads_duration = convert_duration_to_days(actions["cloud-uploads"])
build_duration = convert_duration_to_days(actions["build"])
if cloud_uploads_duration > build_duration:
raise Exception("Duration of pruning cloud-uploads must be less than or equal to pruning a build")

Expand Down Expand Up @@ -286,15 +288,5 @@ def delete_gcp_image(build, cloud_config, dry_run):
return errors


def get_period_in_months(duration):
val, unit = duration.split(maxsplit=1)
if unit in ["years", "year", "y"]:
return int(val) * 12
elif unit in ["months", "month", "m"]:
return int(val)
else:
raise Exception(f"Duration unit provided is {unit}. Pruning duration is only supported in years and months")


if __name__ == "__main__":
main()
125 changes: 125 additions & 0 deletions src/cmd-container-prune
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#!/usr/bin/python3 -u

"""
Prune containers from a remote registry
according to the images age
See cmd-cloud-prune for a policy file example
"""

import argparse
import datetime
import json
import os
import subprocess
from dateutil.relativedelta import relativedelta
import requests
import yaml
from cosalib.cmdlib import parse_fcos_version_to_timestamp_and_stream
from cosalib.cmdlib import convert_duration_to_days

# Dict of known streams
STREAMS = {"next": 1, "testing": 2, "stable": 3,
"next-devel": 10, "testing-devel": 20,
"rawhide": 91, "branched": 92}


def parse_args():
parser = argparse.ArgumentParser(prog="coreos-assembler container-prune")
parser.add_argument("--policy", required=True, type=str, help="Path to policy YAML file")
parser.add_argument("--dry-run", help="Don't actually delete anything", action='store_true')
parser.add_argument("-v", help="Increase verbosity", action='store_true')
parser.add_argument("--registry-auth-file", default=os.environ.get("REGISTRY_AUTH_FILE"),
help="Path to docker registry auth file. Directly passed to skopeo.")
parser.add_argument("--stream", type=str, help="CoreOS stream", required=True, choices=STREAMS.keys())
parser.add_argument("repository_url", help="container images URL")
return parser.parse_args()


def skopeo_delete(repo, image, auth):

skopeo_args = ["skopeo", "delete", f"docker://{repo}:{image}"]
if auth is not None:
skopeo_args.append(f"--authfile {auth}")

subprocess.check_output(skopeo_args)


def get_update_graph(stream):

url = f"https://builds.coreos.fedoraproject.org/updates/{stream}.json"
r = requests.get(url, timeout=5)
if r.status_code != 200:
raise Exception(f"Could not download update graph for {stream}. HTTP {r.status_code}")
return r.json()


def main():

args = parse_args()

# Load the policy file
with open(args.policy, "r") as f:
policy = yaml.safe_load(f)
if args.stream not in policy:
print(f"Stream {args.stream} is not defined in policy file; exiting...")
return
if 'containers' not in policy[args.stream]:
print(f"No containers section for {args.stream} stream in policy; exiting...")
return
policy = policy[args.stream]["containers"]

print(f"Pulling tags from {args.repository_url}")
# This is a JSON object:
# {"Repository": "quay.io/jbtrystramtestimages/fcos",
# "Tags": [
# "40.20"40.20240301.1.0",.....]}
tags_data = subprocess.check_output(["skopeo", "list-tags",
f"docker://{args.repository_url}"])

tags_json = json.loads(tags_data)
tags = tags_json['Tags']
# Compute the date before we should prune images
# today - prune-policy
today = datetime.datetime.now()
date_limit = today - relativedelta(days=convert_duration_to_days(policy))
print(f"This will delete any images older than {date_limit} from the stream {args.stream}")

stream_id = STREAMS[args.stream]
barrier_releases = set()
# Get the update graph for stable streams
if args.stream in ['stable', 'testing', 'next']:
update_graph = get_update_graph(args.stream)['releases']
# Keep only the barrier releases
barrier_releases = set([release["version"] for release in update_graph if "barrier" in release])

for tag in tags:
# silently skip known moving tags (next, stable...)
if tag in STREAMS:
continue

try:
(build_date, tag_stream) = parse_fcos_version_to_timestamp_and_stream(tag)
except Exception:
print(f"WARNING: Ignoring unexpected tag: {tag}")
continue
if stream_id != tag_stream:
if args.v:
print(f"Skipping tag {tag} not in {args.stream} stream")
continue
# Make sure this is not a barrier release (for stable streams)
# For non-production streams barrier_releases will be empty so
# this will be no-op
if tag in barrier_releases:
print(f"Release {tag} is a barrier release, keeping.")
continue

if build_date < date_limit:
if args.dry_run:
print(f"Dry-run: would prune image {args.repository_url}:{tag}")
else:
print(f"Production tag {tag} is older than {date_limit.strftime("%Y%m%d")}, pruning.")
skopeo_delete(args.repository_url, tag, args.registry_auth_file)


if __name__ == "__main__":
main()
31 changes: 29 additions & 2 deletions src/cosalib/cmdlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def get_basearch():
return get_basearch.saved


def parse_fcos_version_to_timestamp(version):
def parse_fcos_version_to_timestamp_and_stream(version):
'''
Parses an FCOS build ID and verifies the versioning is accurate. Then
it verifies that the parsed timestamp has %Y%m%d format and returns that.
Expand All @@ -351,7 +351,34 @@ def parse_fcos_version_to_timestamp(version):
timestamp = datetime.datetime.strptime(m.group(2), '%Y%m%d')
except ValueError:
raise Exception(f"FCOS build {version} has incorrect date format. It should be in (%Y%m%d)")
return timestamp
return (timestamp, int(m.group(3)))


def convert_duration_to_days(duration_arg):
"""
Parses duration strings and convert them into days.
The excpected format is Nd/D, nw/W, Nm/M, Ny/Y where N is a positive integer.
The return value is the number of days represented, in integer format
"""
match = re.match(r'^([0-9]+)([dDmMyYwW])$', duration_arg)

if match is None:
raise ValueError(f"Incorrect duration '{duration_arg}'. Valid values are in the form of 1d, 2w, 3m, 4y")

unit = match.group(2)
value = int(match.group(1))
match unit.lower():
case "y":
days = value * 365
case "m":
days = value * 30
case "w":
days = value * 7
case "d":
days = value
case _:
raise ValueError(f"Invalid unit '{match.group(2)}'. Please use y (years), m (months), w (weeks), or d (days).")
return days


def parse_date_string(date_string):
Expand Down
Loading