fetch images

Concordium · Dec 13, 2024 · 426ebcd · 426ebcd
1 parent bf39f3b
commit 426ebcd
Show file tree

Hide file tree

Showing 3 changed files with 179 additions and 2 deletions.
diff --git a/.github/workflows/release-node-images.yaml b/.github/workflows/release-node-images.yaml
@@ -293,7 +293,7 @@ jobs:
         run: |
           set -e
           TARGET_AWS_REGIONS=$(echo '${{ env.ENVIRONMENT_TO_AWS_REGION }}' | jq -r -c '[..|strings]|unique| join(" ")')
-          IMAGES=$(python tools/find_images.py --image_count_lower_limit ${{ env.IMAGE_COUNT_LOWER_LIMIT }} --image_count_upper_limit ${{ env.IMAGE_COUNT_UPPER_LIMIT }} --aws_regions $TARGET_AWS_REGIONS)
+          IMAGES=$(python scripts/find_images.py --image_count_lower_limit ${{ env.IMAGE_COUNT_LOWER_LIMIT }} --image_count_upper_limit ${{ env.IMAGE_COUNT_UPPER_LIMIT }} --aws_regions $TARGET_AWS_REGIONS)
           echo "images=$(echo $IMAGES | jq '@json' | sed 's/^"\(.*\)"$/\1/')" >> $GITHUB_OUTPUT
 
   notify-slack-on-image-deletions:
@@ -307,7 +307,7 @@ jobs:
         with:
           payload: >-
             {
-              "text": "There are image which should be deleted: ```$DEVOPS_REPOSITORY_PATH/tools/delete_amis.sh '${{ needs.fetch-images.outputs.images }}'```"
+              "text": "There are image which should be deleted: ```$INFRA_IMAGES_REPOSITORY_PATH/scripts/delete_amis.sh '${{ needs.fetch-images.outputs.images }}'```"
             }
         env:
           SLACK_WEBHOOK_URL: ${{ secrets.SLACK_URL }}

diff --git a/scripts/delete_amis.sh b/scripts/delete_amis.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+
+set -e
+
+if [ "$#" -ne 1 ]; then
+    echo "Usage: \$0 '<json_input>' where json_input is the content written by find_images.sh. The content is \
+usually obtained by copying from the slack channel events-mgmt. \
+Example format: '{\"Stagenet\":[{\"name\":\"stagenet-v6-3-0-0-concordium-node-00000-x86-64\",\"id\":\"ami-03631f7d6549f62bd\",\"provider\":\"aws\"}]}'"
+    exit 1
+fi
+
+typeset -A environment_to_project
+environment_to_project=(
+    Stagenet concordium-stagenet-0
+    Testnet concordium-testnet-0
+    Mainnet concordium-mainnet-0
+    BaseImage concordium-mgmt-0
+)
+
+json_input="$1"
+
+echo "$json_input" | jq -r 'to_entries | .[] | .key as $key | .value[] | "\($key) \(.id) \(.provider) \(.region // "null")"' | while read -r line; do
+    environment=$(echo "$line" | awk '{print $1}')
+    id=$(echo "$line" | awk '{print $2}')
+    provider=$(echo "$line" | awk '{print $3}')
+    aws_region=$(echo "$line" | awk '{print $4}')
+    if [[ "$provider" == "aws" ]]; then
+      if [[  "$aws_region" == "null" ]]; then
+        echo "Region is not provided for $id"
+        exit 2;
+      fi
+      snapshot_ids=$(aws ec2 describe-snapshots --region="$aws_region" --filter "Name=tag:Environment,Values=$environment" --query "Snapshots[? contains(Description, '$id')].SnapshotId" --output text)
+      for snapshot_id in $snapshot_ids; do
+        echo "Marking AMI $id and snapshot $snapshot_id for deletion"
+        aws ec2 create-tags --region="$aws_region" --resources "$snapshot_id" "$id" --tags Key=ToBeDeleted,Value=True
+      done
+    elif [[ "$provider" == "gcp" ]]; then
+      project_name=${environment_to_project[$environment]}
+      if [[ -n "$project_name" ]]; then
+        echo "Delete gcp image $id"
+        gcloud compute images delete "$id" --project="$project_name" --quiet
+      else
+        echo "No project mapping for environment: $environment"
+      fi
+    fi
+done
+
+for ami_id in $(aws ec2 describe-images --filters "Name=tag:ToBeDeleted,Values=True" --query "Images[].ImageId" --output text); do
+  echo "Deleting AMI $ami_id"
+  aws ec2 deregister-image --image-id "$ami_id"
+done
+
+for snapshot_id in $(aws ec2 describe-snapshots --filters "Name=tag:ToBeDeleted,Values=True" --query "Snapshots[].SnapshotId" --output text); do
+  echo "Deleting aws snapshot $snapshot_id"
+  aws ec2 delete-snapshot --snapshot-id "$snapshot_id"
+done
diff --git a/scripts/find_images.py b/scripts/find_images.py
@@ -0,0 +1,121 @@
+import argparse
+import json
+import re
+import subprocess
+from collections import defaultdict
+import datetime
+
+
+def fetch_aws_images_by_tag(tag_key, tag_value, region):
+    try:
+        command = [
+            "aws", "ec2", "describe-images",
+            "--filters", f"Name=tag:{tag_key},Values={tag_value}",
+            "--region", region,
+            "--output", "json"
+        ]
+        result = subprocess.run(command, capture_output=True, text=True, check=True)
+        return json.loads(result.stdout)
+    except subprocess.CalledProcessError as e:
+        print(f"Error fetching images: {e}")
+        return None
+
+
+def group_aws_images_by_tag(images, tag_key):
+    groups = defaultdict(list)
+    for image in images:
+        env_tag = next((tag['Value'] for tag in image['Tags'] if tag['Key'] == tag_key), None)
+        if env_tag:
+            groups[env_tag].append(image)
+    return groups
+
+
+def sort_aws_images_by_creation_date(images):
+    for key in images:
+        images[key].sort(key=lambda x: datetime.datetime.strptime(x['CreationDate'], '%Y-%m-%dT%H:%M:%S.%fZ'),
+                         reverse=True)
+
+
+def fetch_aws_images(args):
+    json_output = {}
+    for region in args.aws_regions:
+        for images_info in [fetch_aws_images_by_tag("Project", args.project_name, region)]:
+            if images_info and images_info.get('Images'):
+                images = images_info['Images']
+                grouped_images = group_aws_images_by_tag(images, "Environment")
+                sort_aws_images_by_creation_date(grouped_images)
+                for environment, imgs in grouped_images.items():
+                    if len(imgs) >= args.image_count_upper_limit:
+                        json_output[environment] = [{'name': img['Name'], 'id': img['ImageId'], 'provider': 'aws', 'region': region} for img in
+                                                    imgs[args.image_count_lower_limit:]]
+    return json_output
+
+
+def fetch_gcp_images_by_project(project, label):
+    try:
+        command = [
+            "gcloud", "compute", "images", "list",
+            "--project", project,
+            "--filter", f"labels.project={label}",
+            "--format", "json"
+        ]
+        result = subprocess.run(command, capture_output=True, text=True, check=True)
+        if result.returncode != 0:
+            print(f"Error fetching GCP images: {result.stderr}")
+            return {}
+        return json.loads(result.stdout)
+    except subprocess.CalledProcessError as e:
+        print(f"Error fetching GCP images: {e}")
+        return {}
+
+
+def fetch_gcp_images_by_projects(project_to_environments, label):
+    images = defaultdict(list)
+    for project, environment in project_to_environments.items():
+        for image in fetch_gcp_images_by_project(project, label):
+            images[environment].append(image)
+    return images
+
+
+def sort_gcp_images_by_creation_date(images):
+    for key in images:
+        images[key].sort(key=lambda x: datetime.datetime.strptime(x['creationTimestamp'], '%Y-%m-%dT%H:%M:%S.%f%z'),
+                         reverse=True)
+
+
+def fetch_gcp_images(args):
+    projects = {
+        'concordium-mgmt-0':        'BaseImage',
+        'concordium-stagenet-0':    'Stagenet',
+        'concordium-testnet-0':     'Testnet',
+        'concordium-mainnet-0':     'Mainnet'
+    }
+    json_output = {}
+    gcp_images = fetch_gcp_images_by_projects(projects, re.sub(r'([A-Z])', r'_\1', args.project_name).lower().strip('_'))
+    if gcp_images:
+        sort_gcp_images_by_creation_date(gcp_images)
+        for environment, imgs in gcp_images.items():
+            if len(imgs) >= args.image_count_upper_limit:
+                json_output[environment] = [{'name': img['name'], 'id': img['id'], 'provider': 'gcp'} for img in
+                                            imgs[args.image_count_lower_limit:]]
+    return json_output
+
+
+def main(args):
+    aws_images = fetch_aws_images(args)
+    gcp_images = fetch_gcp_images(args)
+    json_output = {key: aws_images.get(key, []) + gcp_images.get(key, []) for key in aws_images.keys() | gcp_images.keys()}
+    print(json.dumps(json_output))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Report the oldest images to a file ignoring the newest images provided by the limit.")
+    parser.add_argument('--image_count_lower_limit', type=int, default=10,
+                        help='The limit indicating where to report images down to. Those images are meant for de-registering')
+    parser.add_argument('--image_count_upper_limit', type=int, default=20,
+                        help='The limit indicating when to start reporting that images has to be de-registered')
+    parser.add_argument('--aws_regions', type=str, nargs='+', help='AWS region to fetch images from.')
+    parser.add_argument('--project_name', type=str, default='ConcordiumNode', help='Project name to query tags by')
+    args = parser.parse_args()
+    main(args)