Skip to content

Commit

Permalink
introduce new env variable to specify S3 tags
Browse files Browse the repository at this point in the history
  • Loading branch information
FxKu committed Sep 6, 2024
1 parent 0766812 commit 79c5369
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 19 deletions.
1 change: 1 addition & 0 deletions ENVIRONMENT.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ Environment Configuration Settings
- **AZURE_TENANT_ID**: (optional) Tenant ID of the Service Principal
- **CALLBACK_SCRIPT**: the callback script to run on various cluster actions (on start, on stop, on restart, on role change). The script will receive the cluster name, connection string and the current action. See `Patroni <http://patroni.readthedocs.io/en/latest/SETTINGS.html?highlight=callback#postgresql>`__ documentation for details.
- **LOG_S3_BUCKET**: path to the S3 bucket used for PostgreSQL daily log files (i.e. foobar, without `s3://` prefix). Spilo will add `/spilo/{LOG_BUCKET_SCOPE_PREFIX}{SCOPE}{LOG_BUCKET_SCOPE_SUFFIX}/log/` to that path. Logs are shipped if this variable is set.
- **LOG_S3_TAGS**: map of key value pairs to be used for tagging files uploaded to S3. Values should be referencing existing environment variables.
- **LOG_SHIP_SCHEDULE**: cron schedule for shipping compressed logs from ``pg_log`` (if this feature is enabled, '00 02 * * *' by default)
- **LOG_ENV_DIR**: directory to store environment variables necessary for log shipping
- **LOG_TMPDIR**: directory to store temporary compressed daily log files. PGROOT/../tmp by default.
Expand Down
3 changes: 2 additions & 1 deletion postgres-appliance/scripts/configure_spilo.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,7 @@ def get_placeholders(provider):
placeholders.setdefault('LOG_SHIP_SCHEDULE', '1 0 * * *')
placeholders.setdefault('LOG_S3_BUCKET', '')
placeholders.setdefault('LOG_S3_ENDPOINT', '')
placeholders.setdefault('LOG_S3_TAGS', '{}')
placeholders.setdefault('LOG_TMPDIR', os.path.abspath(os.path.join(placeholders['PGROOT'], '../tmp')))
placeholders.setdefault('LOG_BUCKET_SCOPE_SUFFIX', '')

Expand Down Expand Up @@ -766,7 +767,7 @@ def write_log_environment(placeholders):
if not os.path.exists(log_env['LOG_ENV_DIR']):
os.makedirs(log_env['LOG_ENV_DIR'])

for var in ('LOG_TMPDIR', 'LOG_AWS_REGION', 'LOG_S3_ENDPOINT', 'LOG_S3_KEY', 'LOG_S3_BUCKET', 'PGLOG'):
for var in ('LOG_TMPDIR', 'LOG_AWS_REGION', 'LOG_S3_ENDPOINT', 'LOG_S3_KEY', 'LOG_S3_BUCKET', 'LOG_S3_TAGS', 'PGLOG'):
write_file(log_env[var], os.path.join(log_env['LOG_ENV_DIR'], var), True)


Expand Down
27 changes: 9 additions & 18 deletions postgres-appliance/scripts/upload_pg_log_to_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import subprocess
import sys
import time
import croniter

from datetime import datetime, timedelta

Expand All @@ -17,21 +16,12 @@
logger = logging.getLogger(__name__)


def generate_file_name():
schedule = os.getenv('LOG_SHIP_SCHEDULE')
itr = croniter(schedule, datetime.now() - timedelta(minutes=1))
prev_log = itr.get_prev(datetime.datetime)
def compress_pg_log():
yesterday = datetime.now() - timedelta(days=1)
yesterday_day_number = yesterday.strftime('%u')

log_file = os.path.join(os.getenv('PGLOG'), 'postgresql-' + yesterday_day_number + '.csv')
archived_log_file = os.path.join(os.getenv('LOG_TMPDIR'), prev_log.strftime('%F') + '.csv.gz')

return log_file, archived_log_file


def compress_pg_log():
log_file, archived_log_file = generate_file_name()
archived_log_file = os.path.join(os.getenv('LOG_TMPDIR'), yesterday.strftime('%F') + '.csv.gz')

if os.path.getsize(log_file) == 0:
logger.warning("Postgres log from yesterday '%s' is empty.", log_file)
Expand Down Expand Up @@ -63,14 +53,15 @@ def upload_to_s3(local_file_path):

chunk_size = 52428800 # 50 MiB
config = TransferConfig(multipart_threshold=chunk_size, multipart_chunksize=chunk_size)
tags = {
'Namespace': os.getenv('POD_NAMESPACE'),
'ClusterName': os.getenv('SCOPE')
}
tags_str = "&".join(f"{key}={value}" for key, value in tags.items())
tags = eval(os.getenv('LOG_S3_TAGS'))
s3_tags = {}
for key, value in tags.items():
s3_tags[key] = os.getenv(value)

s3_tags_str = "&".join(f"{key}={value}" for key, value in s3_tags.items())

try:
bucket.upload_file(local_file_path, key_name, Config=config, ExtraArgs={'Tagging': tags_str})
bucket.upload_file(local_file_path, key_name, Config=config, ExtraArgs={'Tagging': s3_tags_str})
except S3UploadFailedError as e:
logger.exception('Failed to upload the %s to the bucket %s under the key %s. Exception: %r',
local_file_path, bucket_name, key_name, e)
Expand Down

0 comments on commit 79c5369

Please sign in to comment.