Skip to content

Commit

Permalink
Merge pull request #1 from fredrikaverpil/better-logs-cleanup
Browse files Browse the repository at this point in the history
Improve logs cleanup reliability
  • Loading branch information
fredrikaverpil authored Apr 10, 2019
2 parents 3453b36 + 6f2c72f commit 64d8d74
Showing 1 changed file with 77 additions and 64 deletions.
141 changes: 77 additions & 64 deletions tractor-purge.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,18 @@

import sys
import os
import platform
import subprocess
import re
import shutil
import datetime
import logging
from optparse import OptionParser
import time
import glob


####################################
# Option parser and constants
TRACTOR_PURGE_VERSION = 'v2.0.0'
TRACTOR_PURGE_VERSION = 'v2.1.0'
DEFAULT_DAYS = '30'
parser = OptionParser(version='%prog ' + TRACTOR_PURGE_VERSION)
parser.add_option('-t', '--tq', dest='tq',
Expand Down Expand Up @@ -82,13 +81,12 @@
####################################
# Functions

def jobs_list(days):
"""Create list of all job ids matching query
"""
def jids_to_delete(days):
"""Create list of all job ids matching query."""
jids = []
command = [TQ, 'jobs',
'not active and not ready and spooltime < -' + days + 'd',
'--noheader', '--archives',
'--noheader',
'--cols', 'jid',
'--sortby', 'jid',
'--limit', '0']
Expand All @@ -107,46 +105,60 @@ def jobs_list(days):
return jids


def get_all_job_folders(cmd_logs_dir):
"""Create list of all job folders
"""
job_folders = []
for root, directories, files in os.walk(cmd_logs_dir):
if len(directories) > 0:
for directory in directories:
match = re.search(r'J\d*', directory)
if match:
job_folder = root + '/' + directory
job_folders.append(job_folder)
return job_folders


def get_job_folders_for_deletion(job_folders, jids):
"""Compare job folders list against jids list, create deletion list
def jids_to_keep(days):
"""Create list of all job ids matching query.
NOTE: this query returns all jids within the time span in order to
NOT delete them.
"""
delete_list = []

jids = []
command = [TQ, 'jobs',
'spooltime > -' + days + 'd or active or ready or blocked',
'--noheader',
'--archives',
'--cols', 'jid',
'--sortby', 'jid',
'--limit', '0']

p = subprocess.Popen(command, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)

try:
for line in iter(p.stdout.readline, b''):
sys.stdout.flush()
jid = line.rstrip()
jids.append(int(jid))
logger.info('Keep logs for job: ' + jid)
except:
logger.warning('Failed to read stdout.')

return jids


def get_job_folders_for_deletion(job_folders, keep_jids):
"""Return list of job folders to NOT keep."""

folders_to_delete = []

for job_folder in job_folders:
jid_match = False
for jid in jids:
if job_folder.endswith('J' + str(jid)):
jid_match = True
jid = int(os.path.basename(job_folder).replace("J", ""))

if jid_match:
delete_list.append(job_folder)
logger.info('Added log folder for deletion: ' + job_folder)
if jid not in keep_jids:
folders_to_delete.append(job_folder)

return delete_list
return folders_to_delete


def delete_logs(delete_list):
"""Delete the actual log folders
"""
for job_folder in delete_list:
if not DRY_RUN:
logger.info('Deleting ' + job_folder)
logger.info('Deleting %s' % job_folder)
shutil.rmtree(job_folder)
else:
logger.info('Dry run: (not) deleting ' + job_folder)
logger.info('Dry run: (not) deleting %s' % job_folder)


def delete_tractor_jobs(days):
Expand All @@ -157,12 +169,13 @@ def delete_tractor_jobs(days):
logger.info('Executing tq command to delete jobs...')
command = [TQ, '--force', '--yes', 'delete',
'not active and not ready and spooltime < -' + days + 'd',
'--limit', '0',
'--cols', 'jid']
'--cols', 'jid',
'--limit', '0']
else:
logger.info('Executing tq command to (not) delete jobs...')
command = [TQ, 'jobs', '--archives',
'not active and not ready and spooltime < -' + days + 'd',
'--cols', 'jid',
'--limit', '0']

p = subprocess.Popen(command, stdout=subprocess.PIPE,
Expand All @@ -178,58 +191,58 @@ def delete_tractor_jobs(days):
####################################
# Main

if __name__ == '__main__':
def main():
"""Main program."""

# Show warning
SECONDS = 10
WARNING_MESSAGE = ('Welcome to tractor-purge.\n\n' +
'This script will now execute the follow actions')
seconds = 10
warning_message = ('Welcome to tractor-purge.\n\n' +
'This script will now execute the follow actions')
if DRY_RUN:
WARNING_MESSAGE += ' in "dry run" mode:\n'
warning_message += ' in "dry run" mode:\n'
else:
WARNING_MESSAGE += ':\n'
warning_message += ':\n'
if DELETE_CMD_LOGS:
WARNING_MESSAGE += ('- Delete cmd-logs older than ' +
warning_message += ('- Delete cmd-logs older than ' +
str(DAYS) + ' days.\n')
if DELETE_JOBS:
WARNING_MESSAGE += ('- Delete/archive jobs older than ' +
warning_message += ('- Delete/archive jobs older than ' +
str(DAYS) + ' days.\n')
WARNING_MESSAGE += ('\nAbort now (ctrl+c) if this is does not look ' +
'right to you. You have ' + str(SECONDS) + ' ' +
warning_message += ('\nAbort now (ctrl+c) if this is does not look ' +
'right to you. You have ' + str(seconds) + ' ' +
'seconds and counting...')
logger.warning(WARNING_MESSAGE)
time.sleep(SECONDS)
logger.warning(warning_message)
time.sleep(seconds)

logger.info('Tractor purge initiated.')

# Queries
jids = jobs_list(days=DAYS)
if DELETE_CMD_LOGS:
all_job_folders = get_all_job_folders(cmd_logs_dir=CMD_LOGS_DIR)
job_folders_for_deletion = get_job_folders_for_deletion(
job_folders=all_job_folders, jids=jids)
jids = jids_to_keep(days=DAYS)
all_job_folders = glob.glob("%s/*/J*" % (CMD_LOGS_DIR))
paths_to_delete = get_job_folders_for_deletion(
job_folders=all_job_folders, keep_jids=jids)

# Summary
if DELETE_CMD_LOGS:
logger.info('Job log folders found: ' +
str(len(all_job_folders)))
logger.info('Job log folders to be emptied: ' +
str(len(job_folders_for_deletion)))
if DELETE_JOBS:
logger.info('Jobs to be deleted: ' + str(len(jids)))
logger.info('Job log folders found: %s' % len(all_job_folders))
logger.info('Job log folders to be emptied: %s' % len(paths_to_delete))

# Delete logs
if DELETE_CMD_LOGS:
if len(jids) > 0:
delete_logs(delete_list=job_folders_for_deletion)
delete_logs(delete_list=paths_to_delete)
else:
logger.info('No logs to delete.')

# Delete jobs
if DELETE_JOBS:
elif DELETE_JOBS:
jids = jids_to_delete(days=DAYS)
logger.info('Jobs to be deleted: %s' % len(jids))

if len(jids) > 0:
delete_tractor_jobs(days=DAYS)
else:
logger.info('No jobs to delete.')

logger.info('Tractor purge done.\n')


if __name__ == '__main__':
main()

0 comments on commit 64d8d74

Please sign in to comment.