Skip to content

Commit

Permalink
Adding garbage removal for cloud uploads
Browse files Browse the repository at this point in the history
  • Loading branch information
gursewak1997 committed Jun 26, 2024
1 parent 596af28 commit 0ebe9d2
Show file tree
Hide file tree
Showing 6 changed files with 323 additions and 64 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ schema-check:
# Is the generated Go code synced with the schema?
grep -q "$(DIGEST)" pkg/builds/cosa_v1.go
grep -q "$(DIGEST)" pkg/builds/schema_doc.go
grep -q "$(DIGEST)" src/cmd-cloud-prune

install:
install -d $(DESTDIR)$(PREFIX)/lib/coreos-assembler
Expand Down
2 changes: 1 addition & 1 deletion cmd/coreos-assembler.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ var buildCommands = []string{"init", "fetch", "build", "run", "prune", "clean",
var advancedBuildCommands = []string{"buildfetch", "buildupload", "oc-adm-release", "push-container"}
var buildextendCommands = []string{"aliyun", "applehv", "aws", "azure", "digitalocean", "exoscale", "extensions-container", "gcp", "hashlist-experimental", "hyperv", "ibmcloud", "kubevirt", "live", "metal", "metal4k", "nutanix", "openstack", "qemu", "secex", "virtualbox", "vmware", "vultr"}

var utilityCommands = []string{"aws-replicate", "compress", "copy-container", "koji-upload", "kola", "push-container-manifest", "remote-build-container", "remote-prune", "remote-session", "sign", "tag", "update-variant"}
var utilityCommands = []string{"aws-replicate", "compress", "copy-container", "koji-upload", "kola", "push-container-manifest", "remote-build-container", "cloud-prune", "remote-session", "sign", "tag", "update-variant"}
var otherCommands = []string{"shell", "meta"}

func init() {
Expand Down
64 changes: 2 additions & 62 deletions src/cmd-buildupload
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ import sys
import tempfile
import subprocess
import boto3
from botocore.exceptions import ClientError, NoCredentialsError
from tenacity import retry
from cosalib.s3 import s3_copy, s3_check_exists

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

Expand All @@ -22,14 +21,7 @@ CACHE_MAX_AGE_ARTIFACT = 60 * 60 * 24 * 365
# set metadata caching to 5m
CACHE_MAX_AGE_METADATA = 60 * 5
from cosalib.builds import Builds, BUILDFILES
from cosalib.cmdlib import (
load_json,
retry_stop_long,
retry_wait_long,
retry_boto_exception,
retry_callback
)

from cosalib.cmdlib import load_json

def main():
args = parse_args()
Expand Down Expand Up @@ -194,57 +186,5 @@ def s3_upload_build(s3_client, args, builddir, bucket, prefix):
dry_run=args.dry_run)


@retry(stop=retry_stop_long, wait=retry_wait_long,
retry=retry_boto_exception, before_sleep=retry_callback)
def s3_check_exists(s3_client, bucket, key, dry_run=False):
print(f"Checking if bucket '{bucket}' has key '{key}'")
try:
s3_client.head_object(Bucket=bucket, Key=key)
except ClientError as e:
if e.response['Error']['Code'] == '404':
return False
raise e
except NoCredentialsError as e:
# It's reasonable to run without creds if doing a dry-run
if dry_run:
return False
raise e
return True


@retry(stop=retry_stop_long, wait=retry_wait_long,
retry=retry_boto_exception, retry_error_callback=retry_callback)
def s3_copy(s3_client, src, bucket, key, max_age, acl, extra_args={}, dry_run=False):
extra_args = dict(extra_args)
if 'ContentType' not in extra_args:
if key.endswith('.json'):
extra_args['ContentType'] = 'application/json'
elif key.endswith('.tar'):
extra_args['ContentType'] = 'application/x-tar'
elif key.endswith('.xz'):
extra_args['ContentType'] = 'application/x-xz'
elif key.endswith('.gz'):
extra_args['ContentType'] = 'application/gzip'
elif key.endswith('.iso'):
extra_args['ContentType'] = 'application/x-iso9660-image'
else:
# use a standard MIME type for "binary blob" instead of the default
# 'binary/octet-stream' AWS slaps on
extra_args['ContentType'] = 'application/octet-stream'
upload_args = {
'CacheControl': f'max-age={max_age}',
'ACL': acl
}
upload_args.update(extra_args)

print((f"{'Would upload' if dry_run else 'Uploading'} {src} to "
f"s3://{bucket}/{key} {extra_args if len(extra_args) else ''}"))

if dry_run:
return

s3_client.upload_file(Filename=src, Bucket=bucket, Key=key, ExtraArgs=upload_args)


if __name__ == '__main__':
sys.exit(main())
263 changes: 263 additions & 0 deletions src/cmd-cloud-prune
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
#!/usr/bin/python3 -u

# This script parses a policy.yaml file, which outlines the specific
# pruning actions required for each stream and the age threshold for
# deleting artifacts within them.
# Example of policy.yaml
# rawhide:
# # all cloud images
# cloud-uploads: 2 years
# # artifacts in meta.json's `images` key
# images: 2 years
# images-keep: [qemu, live-iso]
# build: 3 years

import argparse
import json
import re
import time
import pytz
import yaml
import collections
import datetime
import os
import boto3
from dateutil.relativedelta import relativedelta
from cosalib.gcp import remove_gcp_image
from cosalib.aws import deregister_ami, delete_snapshot
from cosalib.builds import BUILDFILES
from cosalib.s3 import s3_copy

Build = collections.namedtuple("Build", ["id", "timestamp", "images", "arch"])
# set metadata caching to 5m
CACHE_MAX_AGE_METADATA = 60 * 5


def main():
args = parse_args()
bucket, prefix = args.url.split("/", 1)
stream = args.stream
# Boto3 loads credentials from ~/.aws/config by default and we can change
# this default location by setting the AWS_CONFIG_FILE environment variable.
# The Python bindings don't support passing a config file.
# The alternative is to manually pass ACCESS_KEY and SECRET_KEY which isn't favourable.
if args.aws_config_file:
os.environ["AWS_CONFIG_FILE"] = args.aws_config_file

gcp_cloud_config = {
"gcp": {
"json-key": args.gcp_json_key,
"project": args.gcp_project,
}
}
# These lists are up to date as of schema hash
# 4c19aed3b3d84af278780bff63728510bb3e70613e4c4eef8cabd7939eb31bd8. If changing
# this hash, ensure that the list of supported and unsupported artifacts below
# is up to date.
supported = ["amis", "gcp"]
unsupported = []

with open(args.policy, "r") as f:
policy = yaml.safe_load(f)

s3_client = boto3.client("s3")
# If the build key is set in the policy file, then the cloud-uploads key must
# also be present, and the duration of cloud-uploads must be equal or shorter
if "build" in policy[stream]:
cloud_uploads_check(policy[stream])

# Base URL for Fedora CoreOS builds

builds_json_data = get_json_from_s3(s3_client, bucket, prefix + "/builds/builds.json")

# action is basically whatever is needed to be pruned for the respective stream
for action in policy[stream]:
duration = policy[stream][action]
duration_in_days = get_period_in_days(duration)
ref_date = datetime.datetime.now() - relativedelta(days=int(duration_in_days))

print(f"Pruning resources of type {action} older than {duration_in_days} days ({ref_date.date()}) on stream {stream}")
# Enumerating in reverse to go from the oldest build to the newest one
for index, build in enumerate(reversed(builds_json_data["builds"])):
build_id = build["id"]
if "policy-cleanup" in build.keys():
# If we have already pruned the specified resources for this
# build as per builds.json, we skip through it.
if action in build["policy-cleanup"]:
print(f"Build {build_id} has already had {action} pruning completed")
continue
# Assuming only FCOS versioning as of now
timestamp = parse_fcos_version(build_id)[1]
build_date = datetime.datetime(int(timestamp[0:4]), int(timestamp[4:6]), int(timestamp[-2:]))

if build_date >= ref_date:
break
for arch in build["arches"]:
meta_url = f"/builds/{build_id}/{arch}/meta.json"
meta_json = get_json_from_s3(s3_client, bucket, prefix + meta_url)
if meta_json is None:
print(f"Failed to get meta.json for {build_id} for {arch}")
return
images = {
"amis": meta_json.get("amis") or [],
"gcp": meta_json.get("gcp") or [],
}
currentBuild = Build(
id=build_id,
timestamp=timestamp,
images=images,
arch=arch,
)
match action:
case "cloud-uploads":
# Prunes only AWS and GCP at the moment
delete_cloud_resources(currentBuild, gcp_cloud_config, args.dry_run)
if not args.dry_run:
build.setdefault("policy-cleanup", []).append(action)
builds_json_data["builds"][index] = build
builds_json_data["timestamp"] = datetime.datetime.now(pytz.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
# Not implemented yet
case "build":
return NotImplementedError
# print(f"Deleting key {prefix}{build.id} from bucket {bucket}")
# Delete the build's directory in S3
# S3().delete_object(args.bucket, f"{args.prefix}{str(currentBuild.id)}")

if not args.dry_run:
with open("builds/builds.json", "w") as json_file:
json.dump(builds_json_data, json_file, indent=2)
# This copies the local builds.json and updates the S3 bucket version.
if args.upload_builds_json:
s3_copy(s3_client, BUILDFILES['list'], bucket, f'{prefix}/builds/builds.json',
CACHE_MAX_AGE_METADATA, args.acl, extra_args={},
dry_run=args.dry_run)


def parse_fcos_version(version):
m = re.match(r'^([0-9]{2})\.([0-9]{8})\.([0-9]+)\.([0-9]+)$', version)
if m is None:
raise Exception(f"Incorrect versioning for FCOS build {version}")
# sanity-check date
try:
time.strptime(m.group(2), '%Y%m%d')
except ValueError:
raise Exception(f"FCOS build {version} has incorrect date format. It should be in (%Y%m%d)")
return tuple(map(str, m.groups()))


def get_json_from_s3(s3, bucket, key):
try:
# Fetch the JSON file from S3
response = s3.get_object(Bucket=bucket, Key=key)
# Read the content of the file
content = response["Body"].read().decode("utf-8")
# Parse the JSON content
json_content = json.loads(content)
return json_content

except Exception as e:
raise Exception(f"Error fetching the JSON file from S3 {bucket}/{key}: {e}")


def parse_args():
parser = argparse.ArgumentParser(prog="coreos-assembler cloud-prune")
parser.add_argument("--policy", required=True, type=str,
help="Path to policy YAML file")
parser.add_argument("--dry-run", help="Don't actually delete anything",
action='store_true')
parser.add_argument("--upload-builds-json", help="Push builds.json",
action='store_true')
parser.add_argument("--stream", type=str, help="CoreOS stream", required=True)

parser.add_argument("--gcp-json-key", help="GCP Service Account JSON Auth",
default=os.environ.get("GCP_JSON_AUTH"))
parser.add_argument("--gcp-project", help="GCP Project name",
default=os.environ.get("GCP_PROJECT_NAME"))

subparsers = parser.add_subparsers(dest="cmd", title="subcommands")
subparsers.required = True

s3 = subparsers.add_parser('s3', help='Prune s3 buckets')
s3.add_argument("url", metavar='<BUCKET>[/PREFIX]',
help="Bucket and path prefix in which to upload")
s3.add_argument("--acl", help="ACL for objects",
action='store', default='private')
s3.add_argument("--aws-config-file", default=os.environ.get("AWS_CONFIG_FILE"),
help="Path to AWS config file")
return parser.parse_args()


# Handling just AWS and GCP at the moment
def delete_cloud_resources(build, gcp_cloud_config, dry_run):
errors = []
if not build.images.get("amis", []):
print(f"No AMIs for {build.id} for {build.arch}")

# Deregister AMIs and snapshots
for ami in build.images.get("amis", []):
region_name = ami.get("name")
ami_id = ami.get("hvm")
snapshot_id = ami.get("snapshot")
if dry_run:
print(f"Would delete {ami_id} and {snapshot_id} for {build.id}")
break
if ami_id and region_name:
try:
deregister_ami(ami_id, region=region_name)
except Exception as e:
errors.append(e)
if snapshot_id and region_name:
try:
delete_snapshot(snapshot_id, region=region_name)
except Exception as e:
errors.append(e)

gcp = build.images.get("gcp")
if gcp:
gcp_image = gcp.get("image")
json_key = gcp_cloud_config.get("gcp", {}).get("json-key")
project = gcp_cloud_config.get("gcp", {}).get("project")
if dry_run:
print(f"Would delete {gcp_image} GCP image for {build.id}")
elif gcp_image and json_key and project:
try:
remove_gcp_image(gcp_image, json_key, project)
except Exception as e:
errors.append(e)
else:
errors.append(f"Missing paramaters to remove {gcp_image}")
else:
print(f"No GCP image for {build.id} for {build.arch}")

if len(errors) != 0:
print(f"Found errors when removing cloud-uploads for {build.id}:")
for e in errors:
print(e)


def cloud_uploads_check(actions):
if 'cloud-uploads' not in actions:
raise Exception("Pruning for cloud-uploads must be set before we prune the builds")
cloud_uploads_duration = get_period_in_days(actions["cloud-uploads"])
build_duration = get_period_in_days(actions["build"])
if cloud_uploads_duration > build_duration:
raise Exception("Duration of pruning cloud-uploads must be less than or equal to pruning a build")


def get_period_in_days(duration):
val, unit = duration.split(" ")
if unit in ["years" or "year" or "y"]:
days = int(val) * 365
elif unit in ["months" or "month" or "m"]:
days = int(val) * 30
elif unit in ["weeks" or "week" or "w"]:
days = int(val) *7
elif unit in ["days" or "day" or "d"]:
days = int(val)
else:
raise Exception(f"Duration unit provided is {unit}. Pruning duration is only supported in years, months, weeks or days.")
return days


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions src/cosalib/gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def remove_gcp_image(gcp_id, json_key, project):
'--json-key', json_key,
'--project', project
])
print(f"GCP: successfully removed image {gcp_id}")
except SystemExit:
raise Exception("Failed to remove image")

Expand Down
Loading

0 comments on commit 0ebe9d2

Please sign in to comment.