From 04a43e4c2e05aa0a7bcfe905b22dbdad9c75fb53 Mon Sep 17 00:00:00 2001 From: Fredrik Averpil Date: Wed, 10 Apr 2019 17:07:01 +0200 Subject: [PATCH 1/5] Improve logs cleanup reliability Rather than querying the db for jobs to delete, query the db for jobs to _keep_ --- tractor-purge.py | 80 +++++++++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 42 deletions(-) diff --git a/tractor-purge.py b/tractor-purge.py index 64a15d5..febc9ad 100755 --- a/tractor-purge.py +++ b/tractor-purge.py @@ -12,6 +12,7 @@ import logging from optparse import OptionParser import time +import glob #################################### @@ -82,16 +83,23 @@ #################################### # Functions -def jobs_list(days): - """Create list of all job ids matching query + +def jids_to_keep(days): + """Return list of jids to be kept. + + NOTE: this query returns all jids within the time span in order to + NOT delete them. """ + jids = [] command = [TQ, 'jobs', - 'not active and not ready and spooltime < -' + days + 'd', - '--noheader', '--archives', + 'spooltime > -' + days + 'd or active or ready or blocked', + '--noheader', + '--archives', '--cols', 'jid', '--sortby', 'jid', '--limit', '0'] + p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) @@ -100,7 +108,7 @@ def jobs_list(days): sys.stdout.flush() jid = line.rstrip() jids.append(int(jid)) - logger.info('Found job: ' + jid) + logger.info('Keep: ' + jid) except: logger.warning('Failed to read stdout.') @@ -108,34 +116,23 @@ def jobs_list(days): def get_all_job_folders(cmd_logs_dir): - """Create list of all job folders - """ - job_folders = [] - for root, directories, files in os.walk(cmd_logs_dir): - if len(directories) > 0: - for directory in directories: - match = re.search(r'J\d*', directory) - if match: - job_folder = root + '/' + directory - job_folders.append(job_folder) - return job_folders - - -def get_job_folders_for_deletion(job_folders, jids): - """Compare job folders list against jids list, create deletion list - """ - delete_list = [] + """Return list of job folders.""" + + return glob.glob("%s/*/J*" % (cmd_logs_dir)) + + +def get_job_folders_for_deletion(job_folders, keep_jids): + """Return list of job folders to NOT keep.""" + + folders_to_delete = [] + for job_folder in job_folders: - jid_match = False - for jid in jids: - if job_folder.endswith('J' + str(jid)): - jid_match = True + jid = int(os.path.basename(job_folder).replace("J", "")) - if jid_match: - delete_list.append(job_folder) - logger.info('Added log folder for deletion: ' + job_folder) + if jid not in keep_jids: + folders_to_delete.append(job_folder) - return delete_list + return folders_to_delete def delete_logs(delete_list): @@ -143,10 +140,10 @@ def delete_logs(delete_list): """ for job_folder in delete_list: if not DRY_RUN: - logger.info('Deleting ' + job_folder) + logger.info('Deleting %s' % job_folder) shutil.rmtree(job_folder) else: - logger.info('Dry run: (not) deleting ' + job_folder) + logger.info('Dry run: (not) deleting %s' % job_folder) def delete_tractor_jobs(days): @@ -157,12 +154,13 @@ def delete_tractor_jobs(days): logger.info('Executing tq command to delete jobs...') command = [TQ, '--force', '--yes', 'delete', 'not active and not ready and spooltime < -' + days + 'd', - '--limit', '0', - '--cols', 'jid'] + '--cols', 'jid', + '--limit', '0'] else: logger.info('Executing tq command to (not) delete jobs...') command = [TQ, 'jobs', '--archives', 'not active and not ready and spooltime < -' + days + 'd', + '--cols', 'jid', '--limit', '0'] p = subprocess.Popen(command, stdout=subprocess.PIPE, @@ -183,7 +181,7 @@ def delete_tractor_jobs(days): # Show warning SECONDS = 10 WARNING_MESSAGE = ('Welcome to tractor-purge.\n\n' + - 'This script will now execute the follow actions') + 'This script will now execute the follow actions') if DRY_RUN: WARNING_MESSAGE += ' in "dry run" mode:\n' else: @@ -203,20 +201,18 @@ def delete_tractor_jobs(days): logger.info('Tractor purge initiated.') # Queries - jids = jobs_list(days=DAYS) + jids = jids_to_keep(days=DAYS) if DELETE_CMD_LOGS: all_job_folders = get_all_job_folders(cmd_logs_dir=CMD_LOGS_DIR) job_folders_for_deletion = get_job_folders_for_deletion( - job_folders=all_job_folders, jids=jids) + job_folders=all_job_folders, keep_jids=jids) # Summary if DELETE_CMD_LOGS: - logger.info('Job log folders found: ' + - str(len(all_job_folders))) - logger.info('Job log folders to be emptied: ' + - str(len(job_folders_for_deletion))) + logger.info('Job log folders found: %s' % len(all_job_folders)) + logger.info('Job log folders to be emptied: %s' % len(job_folders_for_deletion)) if DELETE_JOBS: - logger.info('Jobs to be deleted: ' + str(len(jids))) + logger.info('Jobs to be deleted: %s' % len(jids)) # Delete logs if DELETE_CMD_LOGS: From 79113ee0c4a10c7fd484ad9ba8771de283a837c0 Mon Sep 17 00:00:00 2001 From: Fredrik Averpil Date: Wed, 10 Apr 2019 17:35:52 +0200 Subject: [PATCH 2/5] Refactor main --- tractor-purge.py | 81 ++++++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/tractor-purge.py b/tractor-purge.py index febc9ad..61e631d 100755 --- a/tractor-purge.py +++ b/tractor-purge.py @@ -83,9 +83,32 @@ #################################### # Functions +def jids_to_delete(days): + """Create list of all job ids matching query.""" + jids = [] + command = [TQ, 'jobs', + 'not active and not ready and spooltime < -' + days + 'd', + '--noheader', '--archives', + '--cols', 'jid', + '--sortby', 'jid', + '--limit', '0'] + p = subprocess.Popen(command, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + try: + for line in iter(p.stdout.readline, b''): + sys.stdout.flush() + jid = line.rstrip() + jids.append(int(jid)) + logger.info('Found job: ' + jid) + except: + logger.warning('Failed to read stdout.') + + return jids + def jids_to_keep(days): - """Return list of jids to be kept. + """Create list of all job ids matching query. NOTE: this query returns all jids within the time span in order to NOT delete them. @@ -108,19 +131,13 @@ def jids_to_keep(days): sys.stdout.flush() jid = line.rstrip() jids.append(int(jid)) - logger.info('Keep: ' + jid) + logger.info('Keep logs for job: ' + jid) except: logger.warning('Failed to read stdout.') return jids -def get_all_job_folders(cmd_logs_dir): - """Return list of job folders.""" - - return glob.glob("%s/*/J*" % (cmd_logs_dir)) - - def get_job_folders_for_deletion(job_folders, keep_jids): """Return list of job folders to NOT keep.""" @@ -176,56 +193,58 @@ def delete_tractor_jobs(days): #################################### # Main -if __name__ == '__main__': +def main(): + """Main program.""" # Show warning - SECONDS = 10 - WARNING_MESSAGE = ('Welcome to tractor-purge.\n\n' + + seconds = 10 + warning_message = ('Welcome to tractor-purge.\n\n' + 'This script will now execute the follow actions') if DRY_RUN: - WARNING_MESSAGE += ' in "dry run" mode:\n' + warning_message += ' in "dry run" mode:\n' else: - WARNING_MESSAGE += ':\n' + warning_message += ':\n' if DELETE_CMD_LOGS: - WARNING_MESSAGE += ('- Delete cmd-logs older than ' + + warning_message += ('- Delete cmd-logs older than ' + str(DAYS) + ' days.\n') if DELETE_JOBS: - WARNING_MESSAGE += ('- Delete/archive jobs older than ' + + warning_message += ('- Delete/archive jobs older than ' + str(DAYS) + ' days.\n') - WARNING_MESSAGE += ('\nAbort now (ctrl+c) if this is does not look ' + - 'right to you. You have ' + str(SECONDS) + ' ' + + warning_message += ('\nAbort now (ctrl+c) if this is does not look ' + + 'right to you. You have ' + str(seconds) + ' ' + 'seconds and counting...') - logger.warning(WARNING_MESSAGE) - time.sleep(SECONDS) + logger.warning(warning_message) + time.sleep(seconds) logger.info('Tractor purge initiated.') # Queries - jids = jids_to_keep(days=DAYS) if DELETE_CMD_LOGS: - all_job_folders = get_all_job_folders(cmd_logs_dir=CMD_LOGS_DIR) - job_folders_for_deletion = get_job_folders_for_deletion( + jids = jids_to_keep(days=DAYS) + all_job_folders = glob.glob("%s/*/J*" % (CMD_LOGS_DIR)) + paths_to_delete = get_job_folders_for_deletion( job_folders=all_job_folders, keep_jids=jids) - # Summary - if DELETE_CMD_LOGS: logger.info('Job log folders found: %s' % len(all_job_folders)) - logger.info('Job log folders to be emptied: %s' % len(job_folders_for_deletion)) - if DELETE_JOBS: - logger.info('Jobs to be deleted: %s' % len(jids)) + logger.info('Job log folders to be emptied: %s' % len(paths_to_delete)) - # Delete logs - if DELETE_CMD_LOGS: if len(jids) > 0: - delete_logs(delete_list=job_folders_for_deletion) + delete_logs(delete_list=paths_to_delete) else: logger.info('No logs to delete.') # Delete jobs - if DELETE_JOBS: + elif DELETE_JOBS: + jids = jids_to_delete(days=DAYS) + logger.info('Jobs to be deleted: %s' % len(jids)) + if len(jids) > 0: delete_tractor_jobs(days=DAYS) else: logger.info('No jobs to delete.') logger.info('Tractor purge done.\n') + + +if __name__ == '__main__': + main() From 17559e1ca1817ca9c49455629c4721e0473532ab Mon Sep 17 00:00:00 2001 From: Fredrik Averpil Date: Wed, 10 Apr 2019 17:59:16 +0200 Subject: [PATCH 3/5] Remove unused imports --- tractor-purge.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tractor-purge.py b/tractor-purge.py index 61e631d..3cf4c54 100755 --- a/tractor-purge.py +++ b/tractor-purge.py @@ -4,11 +4,9 @@ import sys import os -import platform import subprocess import re import shutil -import datetime import logging from optparse import OptionParser import time From 1c2ae2a0e76ad3ed8c16acc90a72934faa8aff63 Mon Sep 17 00:00:00 2001 From: Fredrik Averpil Date: Wed, 10 Apr 2019 18:06:31 +0200 Subject: [PATCH 4/5] Do not search archive for deletion --- tractor-purge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tractor-purge.py b/tractor-purge.py index 3cf4c54..24a4299 100755 --- a/tractor-purge.py +++ b/tractor-purge.py @@ -86,7 +86,7 @@ def jids_to_delete(days): jids = [] command = [TQ, 'jobs', 'not active and not ready and spooltime < -' + days + 'd', - '--noheader', '--archives', + '--noheader', '--cols', 'jid', '--sortby', 'jid', '--limit', '0'] From 6f2c72f158fdbf8780f2ac78eb3e833a415bedb6 Mon Sep 17 00:00:00 2001 From: Fredrik Averpil Date: Wed, 10 Apr 2019 18:16:48 +0200 Subject: [PATCH 5/5] Bump version --- tractor-purge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tractor-purge.py b/tractor-purge.py index 24a4299..258e7c4 100755 --- a/tractor-purge.py +++ b/tractor-purge.py @@ -15,7 +15,7 @@ #################################### # Option parser and constants -TRACTOR_PURGE_VERSION = 'v2.0.0' +TRACTOR_PURGE_VERSION = 'v2.1.0' DEFAULT_DAYS = '30' parser = OptionParser(version='%prog ' + TRACTOR_PURGE_VERSION) parser.add_option('-t', '--tq', dest='tq',