Skip to content

Commit

Permalink
pythongh-104372: Cleanup _posixsubprocess make_inheritable for asyn…
Browse files Browse the repository at this point in the history
…c signal safety and no GIL requirement (python#104518)

Move all of the Python C API calls into the parent process up front
instead of doing PyLong_AsLong and PyErr_Occurred and PyTuple_GET from
the post-fork/vfork child process.

Much of this was long overdue. We shouldn't have been using PyTuple and
PyLong APIs within all of these low level functions anyways.

(cherry picked from commit c649df6)
  • Loading branch information
gpshead committed May 23, 2023
1 parent be20e9c commit de47455
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 34 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Refactored the ``_posixsubprocess`` internals to avoid Python C API usage between fork and exec when marking ``pass_fds=`` file descriptors inheritable.
125 changes: 91 additions & 34 deletions Modules/_posixsubprocess.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,16 +138,17 @@ _sanity_check_python_fd_sequence(PyObject *fd_sequence)

/* Is fd found in the sorted Python Sequence? */
static int
_is_fd_in_sorted_fd_sequence(int fd, PyObject *fd_sequence)
_is_fd_in_sorted_fd_sequence(int fd, int *fd_sequence,
Py_ssize_t fd_sequence_len)
{
/* Binary search. */
Py_ssize_t search_min = 0;
Py_ssize_t search_max = PyTuple_GET_SIZE(fd_sequence) - 1;
Py_ssize_t search_max = fd_sequence_len - 1;
if (search_max < 0)
return 0;
do {
long middle = (search_min + search_max) / 2;
long middle_fd = PyLong_AsLong(PyTuple_GET_ITEM(fd_sequence, middle));
long middle_fd = fd_sequence[middle];
if (fd == middle_fd)
return 1;
if (fd > middle_fd)
Expand All @@ -158,24 +159,56 @@ _is_fd_in_sorted_fd_sequence(int fd, PyObject *fd_sequence)
return 0;
}

/*
* Do all the Python C API calls in the parent process to turn the pass_fds
* "py_fds_to_keep" tuple into a C array. The caller owns allocation and
* freeing of the array.
*
* On error an unknown number of array elements may have been filled in.
* A Python exception has been set when an error is returned.
*
* Returns: -1 on error, 0 on success.
*/
static int
make_inheritable(PyObject *py_fds_to_keep, int errpipe_write)
convert_fds_to_keep_to_c(PyObject *py_fds_to_keep, int *c_fds_to_keep)
{
Py_ssize_t i, len;

len = PyTuple_GET_SIZE(py_fds_to_keep);
for (i = 0; i < len; ++i) {
PyObject* fdobj = PyTuple_GET_ITEM(py_fds_to_keep, i);
long fd = PyLong_AsLong(fdobj);
assert(!PyErr_Occurred());
assert(0 <= fd && fd <= INT_MAX);
if (PyErr_Occurred()) {
return -1;
}
if (fd < 0 || fd > INT_MAX) {
PyErr_SetString(PyExc_ValueError,
"fd out of range in fds_to_keep.");
return -1;
}
c_fds_to_keep[i] = (int)fd;
}
return 0;
}


/* This function must be async-signal-safe as it is called from child_exec()
* after fork() or vfork().
*/
static int
make_inheritable(int *c_fds_to_keep, Py_ssize_t len, int errpipe_write)
{
Py_ssize_t i;

for (i = 0; i < len; ++i) {
int fd = c_fds_to_keep[i];
if (fd == errpipe_write) {
/* errpipe_write is part of py_fds_to_keep. It must be closed at
/* errpipe_write is part of fds_to_keep. It must be closed at
exec(), but kept open in the child process until exec() is
called. */
continue;
}
if (_Py_set_inheritable_async_safe((int)fd, 1, NULL) < 0)
if (_Py_set_inheritable_async_safe(fd, 1, NULL) < 0)
return -1;
}
return 0;
Expand Down Expand Up @@ -211,7 +244,7 @@ safe_get_max_fd(void)


/* Close all file descriptors in the given range except for those in
* py_fds_to_keep by invoking closer on each subrange.
* fds_to_keep by invoking closer on each subrange.
*
* If end_fd == -1, it's guessed via safe_get_max_fd(), but it isn't
* possible to know for sure what the max fd to go up to is for
Expand All @@ -221,19 +254,18 @@ safe_get_max_fd(void)
static int
_close_range_except(int start_fd,
int end_fd,
PyObject *py_fds_to_keep,
int *fds_to_keep,
Py_ssize_t fds_to_keep_len,
int (*closer)(int, int))
{
if (end_fd == -1) {
end_fd = Py_MIN(safe_get_max_fd(), INT_MAX);
}
Py_ssize_t num_fds_to_keep = PyTuple_GET_SIZE(py_fds_to_keep);
Py_ssize_t keep_seq_idx;
/* As py_fds_to_keep is sorted we can loop through the list closing
/* As fds_to_keep is sorted we can loop through the list closing
* fds in between any in the keep list falling within our range. */
for (keep_seq_idx = 0; keep_seq_idx < num_fds_to_keep; ++keep_seq_idx) {
PyObject* py_keep_fd = PyTuple_GET_ITEM(py_fds_to_keep, keep_seq_idx);
int keep_fd = PyLong_AsLong(py_keep_fd);
for (keep_seq_idx = 0; keep_seq_idx < fds_to_keep_len; ++keep_seq_idx) {
int keep_fd = fds_to_keep[keep_seq_idx];
if (keep_fd < start_fd)
continue;
if (closer(start_fd, keep_fd - 1) != 0)
Expand Down Expand Up @@ -273,7 +305,7 @@ _brute_force_closer(int first, int last)
}

/* Close all open file descriptors in the range from start_fd and higher
* Do not close any in the sorted py_fds_to_keep list.
* Do not close any in the sorted fds_to_keep list.
*
* This version is async signal safe as it does not make any unsafe C library
* calls, malloc calls or handle any locks. It is _unfortunate_ to be forced
Expand All @@ -288,14 +320,16 @@ _brute_force_closer(int first, int last)
* it with some cpp #define magic to work on other OSes as well if you want.
*/
static void
_close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
_close_open_fds_safe(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)
{
int fd_dir_fd;

fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY);
if (fd_dir_fd == -1) {
/* No way to get a list of open fds. */
_close_range_except(start_fd, -1, py_fds_to_keep, _brute_force_closer);
_close_range_except(start_fd, -1,
fds_to_keep, fds_to_keep_len,
_brute_force_closer);
return;
} else {
char buffer[sizeof(struct linux_dirent64)];
Expand All @@ -314,7 +348,8 @@ _close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
if ((fd = _pos_int_from_ascii(entry->d_name)) < 0)
continue; /* Not a number. */
if (fd != fd_dir_fd && fd >= start_fd &&
!_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) {
!_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,
fds_to_keep_len)) {
close(fd);
}
}
Expand All @@ -335,7 +370,7 @@ _unsafe_closer(int first, int last)
}

/* Close all open file descriptors from start_fd and higher.
* Do not close any in the sorted py_fds_to_keep tuple.
* Do not close any in the sorted fds_to_keep tuple.
*
* This function violates the strict use of async signal safe functions. :(
* It calls opendir(), readdir() and closedir(). Of these, the one most
Expand All @@ -348,11 +383,13 @@ _unsafe_closer(int first, int last)
* http://womble.decadent.org.uk/readdir_r-advisory.html
*/
static void
_close_open_fds_maybe_unsafe(int start_fd, PyObject* py_fds_to_keep)
_close_open_fds_maybe_unsafe(int start_fd, int *fds_to_keep,
Py_ssize_t fds_to_keep_len)
{
DIR *proc_fd_dir;
#ifndef HAVE_DIRFD
while (_is_fd_in_sorted_fd_sequence(start_fd, py_fds_to_keep)) {
while (_is_fd_in_sorted_fd_sequence(start_fd, fds_to_keep,
fds_to_keep_len)) {
++start_fd;
}
/* Close our lowest fd before we call opendir so that it is likely to
Expand All @@ -371,7 +408,8 @@ _close_open_fds_maybe_unsafe(int start_fd, PyObject* py_fds_to_keep)
proc_fd_dir = opendir(FD_DIR);
if (!proc_fd_dir) {
/* No way to get a list of open fds. */
_close_range_except(start_fd, -1, py_fds_to_keep, _unsafe_closer);
_close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
_unsafe_closer);
} else {
struct dirent *dir_entry;
#ifdef HAVE_DIRFD
Expand All @@ -385,14 +423,16 @@ _close_open_fds_maybe_unsafe(int start_fd, PyObject* py_fds_to_keep)
if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0)
continue; /* Not a number. */
if (fd != fd_used_by_opendir && fd >= start_fd &&
!_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) {
!_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,
fds_to_keep_len)) {
close(fd);
}
errno = 0;
}
if (errno) {
/* readdir error, revert behavior. Highly Unlikely. */
_close_range_except(start_fd, -1, py_fds_to_keep, _unsafe_closer);
_close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
_unsafe_closer);
}
closedir(proc_fd_dir);
}
Expand Down Expand Up @@ -420,16 +460,16 @@ _close_range_closer(int first, int last)
#endif

static void
_close_open_fds(int start_fd, PyObject* py_fds_to_keep)
_close_open_fds(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)
{
#ifdef HAVE_ASYNC_SAFE_CLOSE_RANGE
if (_close_range_except(
start_fd, INT_MAX, py_fds_to_keep,
start_fd, INT_MAX, fds_to_keep, fds_to_keep_len,
_close_range_closer) == 0) {
return;
}
#endif
_close_open_fds_fallback(start_fd, py_fds_to_keep);
_close_open_fds_fallback(start_fd, fds_to_keep, fds_to_keep_len);
}

#ifdef VFORK_USABLE
Expand Down Expand Up @@ -522,7 +562,7 @@ child_exec(char *const exec_array[],
int call_setgroups, size_t groups_size, const gid_t *groups,
int call_setuid, uid_t uid, int child_umask,
const void *child_sigmask,
PyObject *py_fds_to_keep,
int *fds_to_keep, Py_ssize_t fds_to_keep_len,
PyObject *preexec_fn,
PyObject *preexec_fn_args_tuple)
{
Expand All @@ -532,7 +572,7 @@ child_exec(char *const exec_array[],
/* Buffer large enough to hold a hex integer. We can't malloc. */
char hex_errno[sizeof(saved_errno)*2+1];

if (make_inheritable(py_fds_to_keep, errpipe_write) < 0)
if (make_inheritable(fds_to_keep, fds_to_keep_len, errpipe_write) < 0)
goto error;

/* Close parent's pipe ends. */
Expand Down Expand Up @@ -652,7 +692,7 @@ child_exec(char *const exec_array[],
/* close FDs after executing preexec_fn, which might open FDs */
if (close_fds) {
/* TODO HP-UX could use pstat_getproc() if anyone cares about it. */
_close_open_fds(3, py_fds_to_keep);
_close_open_fds(3, fds_to_keep, fds_to_keep_len);
}

/* This loop matches the Lib/os.py _execvpe()'s PATH search when */
Expand Down Expand Up @@ -726,7 +766,7 @@ do_fork_exec(char *const exec_array[],
int call_setgroups, size_t groups_size, const gid_t *groups,
int call_setuid, uid_t uid, int child_umask,
const void *child_sigmask,
PyObject *py_fds_to_keep,
int *fds_to_keep, Py_ssize_t fds_to_keep_len,
PyObject *preexec_fn,
PyObject *preexec_fn_args_tuple)
{
Expand Down Expand Up @@ -777,7 +817,8 @@ do_fork_exec(char *const exec_array[],
close_fds, restore_signals, call_setsid, pgid_to_set,
call_setgid, gid, call_setgroups, groups_size, groups,
call_setuid, uid, child_umask, child_sigmask,
py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
fds_to_keep, fds_to_keep_len,
preexec_fn, preexec_fn_args_tuple);
_exit(255);
return 0; /* Dead code to avoid a potential compiler warning. */
}
Expand Down Expand Up @@ -810,6 +851,8 @@ subprocess_fork_exec(PyObject *module, PyObject *args)
int need_after_fork = 0;
int saved_errno = 0;
int allow_vfork;
int *c_fds_to_keep = NULL;
Py_ssize_t fds_to_keep_len = PyTuple_GET_SIZE(py_fds_to_keep);

if (!PyArg_ParseTuple(
args, "OOpO!OOiiiiiiiiii" _Py_PARSE_PID "OOOiOp:fork_exec",
Expand Down Expand Up @@ -983,6 +1026,15 @@ subprocess_fork_exec(PyObject *module, PyObject *args)
#endif /* HAVE_SETREUID */
}

c_fds_to_keep = PyMem_RawMalloc(fds_to_keep_len * sizeof(int));
if (c_fds_to_keep == NULL) {
PyErr_SetString(PyExc_MemoryError, "failed to malloc c_fds_to_keep");
goto cleanup;
}
if (convert_fds_to_keep_to_c(py_fds_to_keep, c_fds_to_keep) < 0) {
goto cleanup;
}

/* This must be the last thing done before fork() because we do not
* want to call PyOS_BeforeFork() if there is any chance of another
* error leading to the cleanup: code without calling fork(). */
Expand Down Expand Up @@ -1025,7 +1077,8 @@ subprocess_fork_exec(PyObject *module, PyObject *args)
close_fds, restore_signals, call_setsid, pgid_to_set,
call_setgid, gid, call_setgroups, num_groups, groups,
call_setuid, uid, child_umask, old_sigmask,
py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
c_fds_to_keep, fds_to_keep_len,
preexec_fn, preexec_fn_args_tuple);

/* Parent (original) process */
if (pid == -1) {
Expand Down Expand Up @@ -1055,6 +1108,10 @@ subprocess_fork_exec(PyObject *module, PyObject *args)
PyOS_AfterFork_Parent();

cleanup:
if (c_fds_to_keep != NULL) {
PyMem_RawFree(c_fds_to_keep);
}

if (saved_errno != 0) {
errno = saved_errno;
/* We can't call this above as PyOS_AfterFork_Parent() calls back
Expand Down

0 comments on commit de47455

Please sign in to comment.