Skip to content

Commit

Permalink
arbiter: Respect timeout when killing workers
Browse files Browse the repository at this point in the history
Previously, a worker could end up getting killed twice by SIGTERM,
which could make it end up exiting by a signal, causing an error to be
emitted.

Now, we try to respect the timeout set for workers, and don't kill
them forcefully until it's time.
  • Loading branch information
sylt committed Sep 4, 2024
1 parent ee8e7f9 commit 445c595
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 6 deletions.
16 changes: 11 additions & 5 deletions gunicorn/arbiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ def reload(self):

def murder_workers(self):
"""\
Kill unused/idle workers
Kill non-responsive workers
"""
if not self.timeout:
return
Expand All @@ -467,11 +467,11 @@ def murder_workers(self):
except (OSError, ValueError):
continue

if not worker.aborted:
if worker.killed['by'] is None:
self.log.critical("WORKER TIMEOUT (pid:%s)", pid)
worker.aborted = True
self.kill_worker(pid, signal.SIGABRT)
else:
elif (worker.killed['by'] == signal.SIGABRT
and time.monotonic() > worker.killed['when'] + worker.timeout):
self.kill_worker(pid, signal.SIGKILL)

def reap_workers(self):
Expand Down Expand Up @@ -535,7 +535,12 @@ def manage_workers(self):
workers = sorted(workers, key=lambda w: w[1].age)
while len(workers) > self.num_workers:
(pid, _) = workers.pop(0)
self.kill_worker(pid, signal.SIGTERM)
worker = self.WORKERS[pid]
if worker.killed['by'] is None:
self.kill_worker(pid, signal.SIGTERM)
elif (worker.killed['by'] == signal.SIGTERM
and time.monotonic() > worker.killed['when'] + worker.timeout):
self.kill_worker(pid, signal.SIGKILL)

active_worker_count = len(workers)
if self._last_logged_active_worker_count != active_worker_count:
Expand Down Expand Up @@ -621,6 +626,7 @@ def kill_worker(self, pid, sig):
"""
try:
os.kill(pid, sig)
self.WORKERS[pid].killed.update({'by': sig, 'when': time.monotonic()})
except OSError as e:
if e.errno == errno.ESRCH:
try:
Expand Down
2 changes: 1 addition & 1 deletion gunicorn/workers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def __init__(self, age, ppid, sockets, app, timeout, cfg, log):
self.timeout = timeout
self.cfg = cfg
self.booted = False
self.aborted = False
self.reloader = None
self.killed = {'when': None, 'by': None}

self.nr = 0

Expand Down

0 comments on commit 445c595

Please sign in to comment.