From b8ecea2d7a96d7e84c796815526e8613144b361b Mon Sep 17 00:00:00 2001 From: Alexey Ermakov Date: Wed, 2 Jan 2019 11:03:36 +0100 Subject: [PATCH 1/2] Handle pods with failed initcontainers or weird statuses Signed-off-by: Alexey Ermakov --- cleaner.py | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/cleaner.py b/cleaner.py index d46d1ba..7a81c8b 100755 --- a/cleaner.py +++ b/cleaner.py @@ -42,36 +42,37 @@ def job_expired(max_age, timeout_seconds, job): return 'timeout ({:.0f}s running)'.format(seconds_since_start) +def container_finish_time(status): + terminated_state = status.get('state', {}).get('terminated') or status.get('lastState') + if terminated_state: + finish_time = terminated_state.get('finishedAt') + if finish_time: + return parse_time(finish_time) + + +def termination_time(pod): + pod_status = pod.obj['status'] + container_statuses = pod_status.get('initContainerStatuses', []) + pod_status.get('containerStatuses', []) + finish_times = filter(None, (container_finish_time(status) for status in container_statuses)) + if not finish_times: + return None + return max(finish_times) + + def pod_expired(max_age, pod): now = time.time() pod_status = pod.obj['status'] if pod_status.get('phase') in ('Succeeded', 'Failed'): - container_statuses = pod_status.get('containerStatuses', []) - if pod_status.get('reason') == 'Preempting': # preempting pods don't have any container information, so let's remove them immediately return 'preempted' - elif not container_statuses: - print("Warning: Skipping pod without containers ({})".format(pod.obj['metadata'].get('name'))) - return - else: - seconds_since_completion = 0 - for container in pod_status.get('containerStatuses'): - if 'terminated' in container['state']: - state = container['state'] - elif 'terminated' in container.get('lastState', {}): - # current state might be "waiting", but lastState is good enough - state = container['lastState'] - else: - state = None - if state: - finish = now - parse_time(state['terminated']['finishedAt']) - if seconds_since_completion == 0 or finish < seconds_since_completion: - seconds_since_completion = finish - - if seconds_since_completion > max_age: - return '{:.0f}s old'.format(seconds_since_completion) + + # If we cannot determine the finish time, use start time instead + finish_time = termination_time(pod) or parse_time(pod.obj['metadata']['creationTimestamp']) + seconds_since_completion = now - finish_time + if seconds_since_completion > max_age: + return '{:.0f}s old'.format(seconds_since_completion) def delete_if_expired(dry_run, entity, reason): From bb1e151602b557ad5cbe8a109ccf71e7beea6613 Mon Sep 17 00:00:00 2001 From: Alexey Ermakov Date: Tue, 8 Jan 2019 14:24:14 +0100 Subject: [PATCH 2/2] Correctly handle empty statuses filter() returns a generator, not a list, so the next check will fail. Signed-off-by: Alexey Ermakov --- cleaner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cleaner.py b/cleaner.py index 7a81c8b..dcf410c 100755 --- a/cleaner.py +++ b/cleaner.py @@ -53,7 +53,7 @@ def container_finish_time(status): def termination_time(pod): pod_status = pod.obj['status'] container_statuses = pod_status.get('initContainerStatuses', []) + pod_status.get('containerStatuses', []) - finish_times = filter(None, (container_finish_time(status) for status in container_statuses)) + finish_times = list(filter(None, (container_finish_time(status) for status in container_statuses))) if not finish_times: return None return max(finish_times)