From 26ff25152dc8b71d9b43885de09d0cf543765a25 Mon Sep 17 00:00:00 2001 From: jbtrystram Date: Tue, 16 Jul 2024 11:40:05 +0200 Subject: [PATCH 1/2] src/cloud-prune: rework duration conversion, move to cosalib Rework the duration parsing code to support days, weeks, months and years. Also use a regexp so we don't need the space. In preparation for the container garbage collection code, which will consume the same policy files, move the code to the shared cosalib. Also updates the parse_fcos_version to returns the stream id in a tuple along the build timestamp. --- src/cmd-cloud-prune | 32 ++++++++++++-------------------- src/cosalib/cmdlib.py | 31 +++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/src/cmd-cloud-prune b/src/cmd-cloud-prune index b03440ca56..2db5efba7a 100755 --- a/src/cmd-cloud-prune +++ b/src/cmd-cloud-prune @@ -6,11 +6,12 @@ # Example of policy.yaml # rawhide: # # all cloud images -# cloud-uploads: 2 years +# cloud-uploads: 2y # # artifacts in meta.json's `images` key -# images: 2 years +# images: 2y # images-keep: [qemu, live-iso] -# build: 3 years +# build: 3y +# containers: 2w # The script also updates the builds.json for the respective stream by # adding the policy-cleanup key when we set the upload_builds_json flag. # It adds the relevant actions completed to that key @@ -44,7 +45,8 @@ from cosalib.gcp import remove_gcp_image from cosalib.aws import deregister_aws_resource from cosalib.builds import BUILDFILES from cosalib.s3 import s3_copy -from cosalib.cmdlib import parse_fcos_version_to_timestamp +from cosalib.cmdlib import parse_fcos_version_to_timestamp_and_stream +from cosalib.cmdlib import convert_duration_to_days Build = collections.namedtuple("Build", ["id", "images", "arch", "meta_json"]) # set metadata caching to 5m @@ -105,17 +107,17 @@ def main(): for action in ['cloud-uploads', 'images', 'build']: if action not in policy[stream]: continue - duration = get_period_in_months(policy[stream][action]) - ref_date = today_date - relativedelta(months=int(duration)) + duration = convert_duration_to_days(policy[stream][action]) + ref_date = today_date - relativedelta(days=int(duration)) - print(f"Pruning resources of type {action} older than {duration} months ({ref_date.date()}) on stream {stream}") + print(f"Pruning resources of type {action} older than {policy[stream][action]} ({ref_date.date()}) on stream {stream}") # Enumerating in reverse to go from the oldest build to the newest one for index, build in enumerate(reversed(builds_json_data["builds"])): build_id = build["id"] if action in build.get("policy-cleanup", []): print(f"Build {build_id} has already had {action} pruning completed") continue - build_date = parse_fcos_version_to_timestamp(build_id) + (build_date, _) = parse_fcos_version_to_timestamp_and_stream(build_id) if build_date >= ref_date: break @@ -172,8 +174,8 @@ def validate_policy(stream, policy): actions = policy[stream] if 'cloud-uploads' not in actions: raise Exception("Pruning for cloud-uploads must be set before we prune the builds") - cloud_uploads_duration = get_period_in_months(actions["cloud-uploads"]) - build_duration = get_period_in_months(actions["build"]) + cloud_uploads_duration = convert_duration_to_days(actions["cloud-uploads"]) + build_duration = convert_duration_to_days(actions["build"]) if cloud_uploads_duration > build_duration: raise Exception("Duration of pruning cloud-uploads must be less than or equal to pruning a build") @@ -286,15 +288,5 @@ def delete_gcp_image(build, cloud_config, dry_run): return errors -def get_period_in_months(duration): - val, unit = duration.split(maxsplit=1) - if unit in ["years", "year", "y"]: - return int(val) * 12 - elif unit in ["months", "month", "m"]: - return int(val) - else: - raise Exception(f"Duration unit provided is {unit}. Pruning duration is only supported in years and months") - - if __name__ == "__main__": main() diff --git a/src/cosalib/cmdlib.py b/src/cosalib/cmdlib.py index 613a80a927..12e9056cd4 100644 --- a/src/cosalib/cmdlib.py +++ b/src/cosalib/cmdlib.py @@ -339,7 +339,7 @@ def get_basearch(): return get_basearch.saved -def parse_fcos_version_to_timestamp(version): +def parse_fcos_version_to_timestamp_and_stream(version): ''' Parses an FCOS build ID and verifies the versioning is accurate. Then it verifies that the parsed timestamp has %Y%m%d format and returns that. @@ -351,7 +351,34 @@ def parse_fcos_version_to_timestamp(version): timestamp = datetime.datetime.strptime(m.group(2), '%Y%m%d') except ValueError: raise Exception(f"FCOS build {version} has incorrect date format. It should be in (%Y%m%d)") - return timestamp + return (timestamp, int(m.group(3))) + + +def convert_duration_to_days(duration_arg): + """ + Parses duration strings and convert them into days. + The excpected format is Nd/D, nw/W, Nm/M, Ny/Y where N is a positive integer. + The return value is the number of days represented, in integer format + """ + match = re.match(r'^([0-9]+)([dDmMyYwW])$', duration_arg) + + if match is None: + raise ValueError(f"Incorrect duration '{duration_arg}'. Valid values are in the form of 1d, 2w, 3m, 4y") + + unit = match.group(2) + value = int(match.group(1)) + match unit.lower(): + case "y": + days = value * 365 + case "m": + days = value * 30 + case "w": + days = value * 7 + case "d": + days = value + case _: + raise ValueError(f"Invalid unit '{match.group(2)}'. Please use y (years), m (months), w (weeks), or d (days).") + return days def parse_date_string(date_string): From 3a7e5bcf76df29946c9a10bb1bd19351b8ac8663 Mon Sep 17 00:00:00 2001 From: jbtrystram Date: Tue, 2 Jul 2024 11:37:42 +0200 Subject: [PATCH 2/2] cmd/container-prune: add a GC script for containers images This script calls skopeo delete to prune image from a remote directory. Currently only supports the FCOS tag structure. This consumes the same policy.yaml defined in https://github.com/coreos/coreos-assembler/pull/3798 See https://github.com/coreos/fedora-coreos-tracker/issues/1367 See https://github.com/coreos/fedora-coreos-pipeline/pull/995 --- cmd/coreos-assembler.go | 2 +- src/cmd-container-prune | 125 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+), 1 deletion(-) create mode 100755 src/cmd-container-prune diff --git a/cmd/coreos-assembler.go b/cmd/coreos-assembler.go index 696ac0ba02..29f7b99ea2 100644 --- a/cmd/coreos-assembler.go +++ b/cmd/coreos-assembler.go @@ -16,7 +16,7 @@ var buildCommands = []string{"init", "fetch", "build", "run", "prune", "clean", var advancedBuildCommands = []string{"buildfetch", "buildupload", "oc-adm-release", "push-container"} var buildextendCommands = []string{"aliyun", "applehv", "aws", "azure", "digitalocean", "exoscale", "extensions-container", "gcp", "hashlist-experimental", "hyperv", "ibmcloud", "kubevirt", "live", "metal", "metal4k", "nutanix", "openstack", "qemu", "secex", "virtualbox", "vmware", "vultr"} -var utilityCommands = []string{"aws-replicate", "compress", "copy-container", "koji-upload", "kola", "push-container-manifest", "remote-build-container", "cloud-prune", "remote-session", "sign", "tag", "update-variant"} +var utilityCommands = []string{"aws-replicate", "cloud-prune", "compress", "container-prune", "copy-container", "koji-upload", "kola", "push-container-manifest", "remote-build-container", "remote-session", "sign", "tag", "update-variant"} var otherCommands = []string{"shell", "meta"} func init() { diff --git a/src/cmd-container-prune b/src/cmd-container-prune new file mode 100755 index 0000000000..f89deabaac --- /dev/null +++ b/src/cmd-container-prune @@ -0,0 +1,125 @@ +#!/usr/bin/python3 -u + +""" +Prune containers from a remote registry +according to the images age +See cmd-cloud-prune for a policy file example +""" + +import argparse +import datetime +import json +import os +import subprocess +from dateutil.relativedelta import relativedelta +import requests +import yaml +from cosalib.cmdlib import parse_fcos_version_to_timestamp_and_stream +from cosalib.cmdlib import convert_duration_to_days + +# Dict of known streams +STREAMS = {"next": 1, "testing": 2, "stable": 3, + "next-devel": 10, "testing-devel": 20, + "rawhide": 91, "branched": 92} + + +def parse_args(): + parser = argparse.ArgumentParser(prog="coreos-assembler container-prune") + parser.add_argument("--policy", required=True, type=str, help="Path to policy YAML file") + parser.add_argument("--dry-run", help="Don't actually delete anything", action='store_true') + parser.add_argument("-v", help="Increase verbosity", action='store_true') + parser.add_argument("--registry-auth-file", default=os.environ.get("REGISTRY_AUTH_FILE"), + help="Path to docker registry auth file. Directly passed to skopeo.") + parser.add_argument("--stream", type=str, help="CoreOS stream", required=True, choices=STREAMS.keys()) + parser.add_argument("repository_url", help="container images URL") + return parser.parse_args() + + +def skopeo_delete(repo, image, auth): + + skopeo_args = ["skopeo", "delete", f"docker://{repo}:{image}"] + if auth is not None: + skopeo_args.append(f"--authfile {auth}") + + subprocess.check_output(skopeo_args) + + +def get_update_graph(stream): + + url = f"https://builds.coreos.fedoraproject.org/updates/{stream}.json" + r = requests.get(url, timeout=5) + if r.status_code != 200: + raise Exception(f"Could not download update graph for {stream}. HTTP {r.status_code}") + return r.json() + + +def main(): + + args = parse_args() + + # Load the policy file + with open(args.policy, "r") as f: + policy = yaml.safe_load(f) + if args.stream not in policy: + print(f"Stream {args.stream} is not defined in policy file; exiting...") + return + if 'containers' not in policy[args.stream]: + print(f"No containers section for {args.stream} stream in policy; exiting...") + return + policy = policy[args.stream]["containers"] + + print(f"Pulling tags from {args.repository_url}") + # This is a JSON object: + # {"Repository": "quay.io/jbtrystramtestimages/fcos", + # "Tags": [ + # "40.20"40.20240301.1.0",.....]} + tags_data = subprocess.check_output(["skopeo", "list-tags", + f"docker://{args.repository_url}"]) + + tags_json = json.loads(tags_data) + tags = tags_json['Tags'] + # Compute the date before we should prune images + # today - prune-policy + today = datetime.datetime.now() + date_limit = today - relativedelta(days=convert_duration_to_days(policy)) + print(f"This will delete any images older than {date_limit} from the stream {args.stream}") + + stream_id = STREAMS[args.stream] + barrier_releases = {} + # Get the update graph for stable streams + if args.stream in ['stable', 'testing', 'next']: + update_graph = get_update_graph(args.stream)['releases'] + # Keep only the barrier releases + barrier_releases = set([release["version"] for release in update_graph if "barrier" in release]) + + for tag in tags: + # silently skip known moving tags (next, stable...) + if tag in STREAMS: + continue + + try: + (build_date, tag_stream) = parse_fcos_version_to_timestamp_and_stream(tag) + except Exception: + print(f"WARNING: Ignoring unexpected tag: {tag}") + continue + if stream_id != tag_stream: + if args.v: + print(f"Skipping tag {tag} not in {args.stream} stream") + continue + # Make sure this is not a barrier release (for stable streams) + # For non-production streams barrier_releases will be empty so + # this will be no-op + if tag in barrier_releases: + print(f"Release {tag} is a barrier release, keeping.") + continue + + if build_date < date_limit: + if args.dry_run: + print(f"Dry-run: would prune image {args.repository_url}:{tag}") + else: + print(f"Production tag {tag} is older than {date_limit.strftime("%Y%m%d")}, pruning.") + skopeo_delete(args.repository_url, tag, args.registry_auth_file) + + +if __name__ == "__main__": + main()