diff --git a/cmd/coreos-assembler.go b/cmd/coreos-assembler.go index 696ac0ba02..29f7b99ea2 100644 --- a/cmd/coreos-assembler.go +++ b/cmd/coreos-assembler.go @@ -16,7 +16,7 @@ var buildCommands = []string{"init", "fetch", "build", "run", "prune", "clean", var advancedBuildCommands = []string{"buildfetch", "buildupload", "oc-adm-release", "push-container"} var buildextendCommands = []string{"aliyun", "applehv", "aws", "azure", "digitalocean", "exoscale", "extensions-container", "gcp", "hashlist-experimental", "hyperv", "ibmcloud", "kubevirt", "live", "metal", "metal4k", "nutanix", "openstack", "qemu", "secex", "virtualbox", "vmware", "vultr"} -var utilityCommands = []string{"aws-replicate", "compress", "copy-container", "koji-upload", "kola", "push-container-manifest", "remote-build-container", "cloud-prune", "remote-session", "sign", "tag", "update-variant"} +var utilityCommands = []string{"aws-replicate", "cloud-prune", "compress", "container-prune", "copy-container", "koji-upload", "kola", "push-container-manifest", "remote-build-container", "remote-session", "sign", "tag", "update-variant"} var otherCommands = []string{"shell", "meta"} func init() { diff --git a/src/cmd-cloud-prune b/src/cmd-cloud-prune index b03440ca56..2db5efba7a 100755 --- a/src/cmd-cloud-prune +++ b/src/cmd-cloud-prune @@ -6,11 +6,12 @@ # Example of policy.yaml # rawhide: # # all cloud images -# cloud-uploads: 2 years +# cloud-uploads: 2y # # artifacts in meta.json's `images` key -# images: 2 years +# images: 2y # images-keep: [qemu, live-iso] -# build: 3 years +# build: 3y +# containers: 2w # The script also updates the builds.json for the respective stream by # adding the policy-cleanup key when we set the upload_builds_json flag. # It adds the relevant actions completed to that key @@ -44,7 +45,8 @@ from cosalib.gcp import remove_gcp_image from cosalib.aws import deregister_aws_resource from cosalib.builds import BUILDFILES from cosalib.s3 import s3_copy -from cosalib.cmdlib import parse_fcos_version_to_timestamp +from cosalib.cmdlib import parse_fcos_version_to_timestamp_and_stream +from cosalib.cmdlib import convert_duration_to_days Build = collections.namedtuple("Build", ["id", "images", "arch", "meta_json"]) # set metadata caching to 5m @@ -105,17 +107,17 @@ def main(): for action in ['cloud-uploads', 'images', 'build']: if action not in policy[stream]: continue - duration = get_period_in_months(policy[stream][action]) - ref_date = today_date - relativedelta(months=int(duration)) + duration = convert_duration_to_days(policy[stream][action]) + ref_date = today_date - relativedelta(days=int(duration)) - print(f"Pruning resources of type {action} older than {duration} months ({ref_date.date()}) on stream {stream}") + print(f"Pruning resources of type {action} older than {policy[stream][action]} ({ref_date.date()}) on stream {stream}") # Enumerating in reverse to go from the oldest build to the newest one for index, build in enumerate(reversed(builds_json_data["builds"])): build_id = build["id"] if action in build.get("policy-cleanup", []): print(f"Build {build_id} has already had {action} pruning completed") continue - build_date = parse_fcos_version_to_timestamp(build_id) + (build_date, _) = parse_fcos_version_to_timestamp_and_stream(build_id) if build_date >= ref_date: break @@ -172,8 +174,8 @@ def validate_policy(stream, policy): actions = policy[stream] if 'cloud-uploads' not in actions: raise Exception("Pruning for cloud-uploads must be set before we prune the builds") - cloud_uploads_duration = get_period_in_months(actions["cloud-uploads"]) - build_duration = get_period_in_months(actions["build"]) + cloud_uploads_duration = convert_duration_to_days(actions["cloud-uploads"]) + build_duration = convert_duration_to_days(actions["build"]) if cloud_uploads_duration > build_duration: raise Exception("Duration of pruning cloud-uploads must be less than or equal to pruning a build") @@ -286,15 +288,5 @@ def delete_gcp_image(build, cloud_config, dry_run): return errors -def get_period_in_months(duration): - val, unit = duration.split(maxsplit=1) - if unit in ["years", "year", "y"]: - return int(val) * 12 - elif unit in ["months", "month", "m"]: - return int(val) - else: - raise Exception(f"Duration unit provided is {unit}. Pruning duration is only supported in years and months") - - if __name__ == "__main__": main() diff --git a/src/cmd-container-prune b/src/cmd-container-prune new file mode 100755 index 0000000000..965551b1e2 --- /dev/null +++ b/src/cmd-container-prune @@ -0,0 +1,127 @@ +#!/usr/bin/python3 -u + +""" +Prune containers from a remote registry +according to the images age +See cmd-cloud-prune for a policy file example +""" + +import argparse +import datetime +import json +import re as regexp +import os +import subprocess +from dateutil.relativedelta import relativedelta +import requests +import yaml +from cosalib.cmdlib import parse_fcos_version_to_timestamp_and_stream +from cosalib.cmdlib import convert_duration_to_days + +# Dict of known streams +STREAMS = {"next": 1, "testing": 2, "stable": 3, + "next-devel": 10, "testing-devel": 20, + "rawhide": 91, "branched": 92} + + +def parse_args(): + parser = argparse.ArgumentParser(prog="coreos-assembler container-prune") + parser.add_argument("--policy", required=True, type=str, help="Path to policy YAML file") + parser.add_argument("--dry-run", help="Don't actually delete anything", action='store_true') + parser.add_argument("-v", help="Increase verbosity", action='store_true') + parser.add_argument("--registry-auth-file", default=os.environ.get("REGISTRY_AUTH_FILE"), + help="Path to docker registry auth file. Directly passed to skopeo.") + parser.add_argument("--stream", type=str, help="CoreOS stream", required=True, choices=STREAMS.keys()) + parser.add_argument("repository_url", help="container images URL") + return parser.parse_args() + + + + +def skopeo_delete(repo, image, auth): + + skopeo_args = ["skopeo", "delete", f"docker://{repo}:{image}"] + if auth is not None: + skopeo_args.append(f"--authfile {auth}") + + subprocess.check_output(skopeo_args) + +def get_update_graph(stream): + + url = f"https://builds.coreos.fedoraproject.org/updates/{stream}.json" + r = requests.get(url, timeout=5) + if r.status_code != 200: + raise Exception(f"Could not download update graph for {stream}. HTTP {r.status_code}") + return r.json() + + +def main(): + + args = parse_args() + + # Load the policy file + with open(args.policy, "r") as f: + policy = yaml.safe_load(f) + if args.stream not in policy: + print(f"Stream {args.stream} is not defined in policy file; exiting...") + return + if 'containers' not in policy[args.stream]: + print(f"No containers section for {args.stream} stream in policy; exiting...") + return + policy = policy[args.stream]["containers"] + + print(f"Pulling tags from {args.repository_url}") + # This is a JSON object: + # {"Repository": "quay.io/jbtrystramtestimages/fcos", + # "Tags": [ + # "40.20"40.20240301.1.0",.....]} + tags_data = subprocess.check_output(["skopeo", "list-tags", + f"docker://{args.repository_url}"]) + + tags_json = json.loads(tags_data) + tags = tags_json['Tags'] + # Compute the date before we should prune images + # today - prune-policy + today = datetime.datetime.now() + date_limit = today - relativedelta(days=convert_duration_to_days(policy)) + print(f"This will delete any images older than {date_limit} from the stream {args.stream}") + + stream_id = STREAMS[args.stream] + barrier_releases = {} + # Get the update graph for stable streams + if args.stream in ['stable', 'testing', 'next']: + update_graph = get_update_graph(args.stream)['releases'] + # Keep only the barrier releases + barrier_releases = set([release["version"] for release in update_graph if "barrier" in release]) + + for tag in tags: + # silently skip known moving tags (next, stable...) + if tag in STREAMS: + continue + + try: + (build_date, tag_stream) = parse_fcos_version_to_timestamp_and_stream(tag) + except Exception: + print(f"WARNING: Ignoring unexpected tag: {tag}") + continue + if stream_id != tag_stream: + if args.v: + print(f"Skipping tag {tag} not in {args.stream} stream") + continue + # Make sure this is not a barrier release (for stable streams) + # For non-production streams barrier_releases will be empty so + # this will be no-op + if tag in barrier_releases: + print(f"Release {tag} is a barrier release, keeping.") + continue + + if build_date < date_limit: + if args.dry_run: + print(f"Dry-run: would prune image {args.repository_url}:{tag}") + else: + print(f"Production tag {tag} is older than {date_limit.strftime("%Y%m%d")}, pruning.") + skopeo_delete(args.repository_url, tag, args.registry_auth_file) + + +if __name__ == "__main__": + main() diff --git a/src/cosalib/cmdlib.py b/src/cosalib/cmdlib.py index 613a80a927..12e9056cd4 100644 --- a/src/cosalib/cmdlib.py +++ b/src/cosalib/cmdlib.py @@ -339,7 +339,7 @@ def get_basearch(): return get_basearch.saved -def parse_fcos_version_to_timestamp(version): +def parse_fcos_version_to_timestamp_and_stream(version): ''' Parses an FCOS build ID and verifies the versioning is accurate. Then it verifies that the parsed timestamp has %Y%m%d format and returns that. @@ -351,7 +351,34 @@ def parse_fcos_version_to_timestamp(version): timestamp = datetime.datetime.strptime(m.group(2), '%Y%m%d') except ValueError: raise Exception(f"FCOS build {version} has incorrect date format. It should be in (%Y%m%d)") - return timestamp + return (timestamp, int(m.group(3))) + + +def convert_duration_to_days(duration_arg): + """ + Parses duration strings and convert them into days. + The excpected format is Nd/D, nw/W, Nm/M, Ny/Y where N is a positive integer. + The return value is the number of days represented, in integer format + """ + match = re.match(r'^([0-9]+)([dDmMyYwW])$', duration_arg) + + if match is None: + raise ValueError(f"Incorrect duration '{duration_arg}'. Valid values are in the form of 1d, 2w, 3m, 4y") + + unit = match.group(2) + value = int(match.group(1)) + match unit.lower(): + case "y": + days = value * 365 + case "m": + days = value * 30 + case "w": + days = value * 7 + case "d": + days = value + case _: + raise ValueError(f"Invalid unit '{match.group(2)}'. Please use y (years), m (months), w (weeks), or d (days).") + return days def parse_date_string(date_string):