introduce new env variable to specify S3 tags

zalando · Sep 6, 2024 · 79c5369 · 79c5369
1 parent 0766812
commit 79c5369
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 19 deletions.
diff --git a/ENVIRONMENT.rst b/ENVIRONMENT.rst
@@ -90,6 +90,7 @@ Environment Configuration Settings
 - **AZURE_TENANT_ID**: (optional) Tenant ID of the Service Principal
 - **CALLBACK_SCRIPT**: the callback script to run on various cluster actions (on start, on stop, on restart, on role change). The script will receive the cluster name, connection string and the current action. See `Patroni <http://patroni.readthedocs.io/en/latest/SETTINGS.html?highlight=callback#postgresql>`__ documentation for details.
 - **LOG_S3_BUCKET**: path to the S3 bucket used for PostgreSQL daily log files (i.e. foobar, without `s3://` prefix). Spilo will add `/spilo/{LOG_BUCKET_SCOPE_PREFIX}{SCOPE}{LOG_BUCKET_SCOPE_SUFFIX}/log/` to that path. Logs are shipped if this variable is set.
+- **LOG_S3_TAGS**: map of key value pairs to be used for tagging files uploaded to S3. Values should be referencing existing environment variables.
 - **LOG_SHIP_SCHEDULE**: cron schedule for shipping compressed logs from ``pg_log`` (if this feature is enabled, '00 02 * * *' by default)
 - **LOG_ENV_DIR**: directory to store environment variables necessary for log shipping
 - **LOG_TMPDIR**: directory to store temporary compressed daily log files. PGROOT/../tmp by default.

diff --git a/postgres-appliance/scripts/configure_spilo.py b/postgres-appliance/scripts/configure_spilo.py
@@ -582,6 +582,7 @@ def get_placeholders(provider):
     placeholders.setdefault('LOG_SHIP_SCHEDULE', '1 0 * * *')
     placeholders.setdefault('LOG_S3_BUCKET', '')
     placeholders.setdefault('LOG_S3_ENDPOINT', '')
+    placeholders.setdefault('LOG_S3_TAGS', '{}')
     placeholders.setdefault('LOG_TMPDIR', os.path.abspath(os.path.join(placeholders['PGROOT'], '../tmp')))
     placeholders.setdefault('LOG_BUCKET_SCOPE_SUFFIX', '')
 
@@ -766,7 +767,7 @@ def write_log_environment(placeholders):
     if not os.path.exists(log_env['LOG_ENV_DIR']):
         os.makedirs(log_env['LOG_ENV_DIR'])
 
-    for var in ('LOG_TMPDIR', 'LOG_AWS_REGION', 'LOG_S3_ENDPOINT', 'LOG_S3_KEY', 'LOG_S3_BUCKET', 'PGLOG'):
+    for var in ('LOG_TMPDIR', 'LOG_AWS_REGION', 'LOG_S3_ENDPOINT', 'LOG_S3_KEY', 'LOG_S3_BUCKET', 'LOG_S3_TAGS', 'PGLOG'):
         write_file(log_env[var], os.path.join(log_env['LOG_ENV_DIR'], var), True)
 
 

diff --git a/postgres-appliance/scripts/upload_pg_log_to_s3.py b/postgres-appliance/scripts/upload_pg_log_to_s3.py
@@ -7,7 +7,6 @@
 import subprocess
 import sys
 import time
-import croniter
 
 from datetime import datetime, timedelta
 
@@ -17,21 +16,12 @@
 logger = logging.getLogger(__name__)
 
 
-def generate_file_name():
-    schedule = os.getenv('LOG_SHIP_SCHEDULE')
-    itr = croniter(schedule, datetime.now() - timedelta(minutes=1))
-    prev_log = itr.get_prev(datetime.datetime)
+def compress_pg_log():
     yesterday = datetime.now() - timedelta(days=1)
     yesterday_day_number = yesterday.strftime('%u')
 
     log_file = os.path.join(os.getenv('PGLOG'), 'postgresql-' + yesterday_day_number + '.csv')
-    archived_log_file = os.path.join(os.getenv('LOG_TMPDIR'), prev_log.strftime('%F') + '.csv.gz')
-
-    return log_file, archived_log_file
-
-
-def compress_pg_log():
-    log_file, archived_log_file = generate_file_name()
+    archived_log_file = os.path.join(os.getenv('LOG_TMPDIR'), yesterday.strftime('%F') + '.csv.gz')
 
     if os.path.getsize(log_file) == 0:
         logger.warning("Postgres log from yesterday '%s' is empty.", log_file)
@@ -63,14 +53,15 @@ def upload_to_s3(local_file_path):
 
     chunk_size = 52428800  # 50 MiB
     config = TransferConfig(multipart_threshold=chunk_size, multipart_chunksize=chunk_size)
-    tags = {
-        'Namespace': os.getenv('POD_NAMESPACE'),
-        'ClusterName': os.getenv('SCOPE')
-    }
-    tags_str = "&".join(f"{key}={value}" for key, value in tags.items())
+    tags = eval(os.getenv('LOG_S3_TAGS'))
+    s3_tags = {}
+    for key, value in tags.items():
+        s3_tags[key] = os.getenv(value)
+
+    s3_tags_str = "&".join(f"{key}={value}" for key, value in s3_tags.items())
 
     try:
-        bucket.upload_file(local_file_path, key_name, Config=config, ExtraArgs={'Tagging': tags_str})
+        bucket.upload_file(local_file_path, key_name, Config=config, ExtraArgs={'Tagging': s3_tags_str})
     except S3UploadFailedError as e:
         logger.exception('Failed to upload the %s to the bucket %s under the key %s. Exception: %r',
                          local_file_path, bucket_name, key_name, e)