Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH enable faulthandler traceback reporting on worker crash by SIGSEV #419

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
### 3.5.0 - in development

- Automatically call `faulthandler.enable()` when starting loky worker
processes to report more informative information (post-mortem Python
tracebacks in particular) on worker crashs. (#419).

### 3.4.1 - 2023-06-29

Expand Down
31 changes: 29 additions & 2 deletions loky/process_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
__author__ = "Thomas Moreau (thomas.moreau.2010@gmail.com)"


import faulthandler
import os
import gc
import sys
Expand Down Expand Up @@ -375,6 +376,28 @@
result_queue.put(_ResultItem(work_id, exception=exc))


def _enable_faulthandler_if_needed():
if "PYTHONFAULTHANDLER" in os.environ:
# Respect the environment variable to configure faulthandler. This
# makes it possible to never enable faulthandler in the loky workers by
# setting PYTHONFAULTHANDLER=0 explicitly in the environment.
mp.util.debug(
f"faulthandler explicitly configured by environment variable: "
f"PYTHONFAULTHANDLER={os.environ['PYTHONFAULTHANDLER']}."
)
else:
if faulthandler.is_enabled():
# Fault handler is already enabled, possibly via a custom
# initializer to customize the behavior.
mp.util.debug("faulthandler already enabled.")

Check warning on line 392 in loky/process_executor.py

View check run for this annotation

Codecov / codecov/patch

loky/process_executor.py#L392

Added line #L392 was not covered by tests
Copy link
Collaborator Author

@ogrisel ogrisel Aug 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be covered by "Case 4" in the test but this it detected by the coverage module because we use subprocess.Popen in this test to get a full control on the env and ability to inspect stderr in isolation of pytest.

else:
# Enable faulthandler by default with default paramaters otherwise.
mp.util.debug(
"Enabling faulthandler to report tracebacks on worker crashes."
)
faulthandler.enable()


def _process_worker(
call_queue,
result_queue,
Expand Down Expand Up @@ -421,6 +444,8 @@
pid = os.getpid()

mp.util.debug(f"Worker started with timeout={timeout}")
_enable_faulthandler_if_needed()

while True:
try:
call_item = call_queue.get(block=True, timeout=timeout)
Expand Down Expand Up @@ -710,7 +735,10 @@
"terminated. This could be caused by a segmentation fault "
"while calling the function or by an excessive memory usage "
"causing the Operating System to kill the worker.\n"
f"{exit_codes}"
f"{exit_codes}\n"
"Detailed tracebacks of the workers should have been printed "
"to stderr in the executor process if faulthandler was not "
"disabled."
)

self.thread_wakeup.clear()
Expand Down Expand Up @@ -1014,7 +1042,6 @@


class ShutdownExecutorError(RuntimeError):

"""
Raised when a ProcessPoolExecutor is shutdown while a future was in the
running or pending state.
Expand Down
83 changes: 83 additions & 0 deletions tests/test_reusable_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,89 @@ def test_reusable_executor_reuse_true(self):
executor4 = get_reusable_executor()
assert executor4 is executor3

def test_faulthandler_enabled(self):
cmd = """if 1:
from loky import get_reusable_executor
from loky.process_executor import TerminatedWorkerError
import faulthandler

def f(i):
if {expect_enabled}:
assert faulthandler.is_enabled()
else:
assert not faulthandler.is_enabled()
if i == 5:
faulthandler._sigsegv()

if {enable_faulthandler_via_initializer}:
executor = get_reusable_executor(max_workers=2, initializer=faulthandler.enable)
else:
executor = get_reusable_executor(max_workers=2)
try:
list(executor.map(f, range(10)))
except TerminatedWorkerError:
# expected
sys.exit(0)

raise RuntimeError("Should have raised a TerminatedWorkerError")
"""

def check_faulthandler_output(
expect_enabled=True, enable_faulthandler_via_initializer=False
):
p = subprocess.Popen(
[
sys.executable,
"-c",
cmd.format(
expect_enabled=expect_enabled,
enable_faulthandler_via_initializer=enable_faulthandler_via_initializer,
),
],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
)
p.wait()
out, err = p.communicate()
assert p.returncode == 1, out.decode()
if expect_enabled:
assert b"Current thread" in err, err.decode()
else:
assert b"Current thread" not in err, err.decode()

original_pythonfaulthandler_env = os.environ.get(
"PYTHONFAULTHANDLER", None
)
try:
# Case 1: faulthandler should be automatically enabled by default.
if original_pythonfaulthandler_env is not None:
del os.environ["PYTHONFAULTHANDLER"]
check_faulthandler_output(expect_enabled=True)

# Case 2: faulthandler should also be enabled when
# PYTHONFAULTHANDLER=1 is set.
os.environ["PYTHONFAULTHANDLER"] = "1"
check_faulthandler_output(expect_enabled=True)

# Case 3: faulthandler should not be enabled when
# PYTHONFAULTHANDLER=0 is set explicitly.
os.environ["PYTHONFAULTHANDLER"] = "0"
check_faulthandler_output(expect_enabled=False)

# Case 4: faulthandler can also be enabled manually via the initializer.
del os.environ["PYTHONFAULTHANDLER"]
check_faulthandler_output(
expect_enabled=True, enable_faulthandler_via_initializer=True
)
finally:
if original_pythonfaulthandler_env is None:
os.environ["PYTHONFAULTHANDLER"] = "0" # avoid KeyError
del os.environ["PYTHONFAULTHANDLER"]
else:
os.environ[
"PYTHONFAULTHANDLER"
] = original_pythonfaulthandler_env


class TestExecutorInitializer(ReusableExecutorMixin):
def _initializer(self, x):
Expand Down
Loading