Skip to content

Commit

Permalink
Rollup merge of #117957 - the8472:pidfd-wait, r=Mark-Simulacrum
Browse files Browse the repository at this point in the history
if available use a Child's pidfd for kill/wait

This should get us closer to stabilization of pidfds since they now do something useful. And they're `CLOEXEC` now.

```
$ strace -ffe clone,sendmsg,recvmsg,execve,kill,pidfd_open,pidfd_send_signal,waitpid,waitid ./x test std --no-doc -- pidfd

[...]
running 1 tests
strace: Process 816007 attached
[pid 816007] pidfd_open(816006, 0)      = 3
[pid 816007] clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f0c6b787990) = 816008
strace: Process 816008 attached
[pid 816007] recvmsg(3,  <unfinished ...>
[pid 816008] pidfd_open(816008, 0)      = 3
[pid 816008] sendmsg(4, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="", iov_len=0}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[3]}], msg_controllen=24, msg_flags=0}, 0) = 0
[pid 816007] <... recvmsg resumed>{msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="", iov_len=0}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[4]}], msg_controllen=24, msg_flags=MSG_CMSG_CLOEXEC}, MSG_CMSG_CLOEXEC) = 0
[pid 816008] execve("/usr/bin/false", ["false"], 0x7ffcf2100048 /* 105 vars */) = 0
[pid 816007] waitid(P_PIDFD, 4,  <unfinished ...>
[pid 816008] +++ exited with 1 +++
[pid 816007] <... waitid resumed>{si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=816008, si_uid=1001, si_status=1, si_utime=0, si_stime=0}, WEXITED, NULL) = 0
[pid 816007] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=816008, si_uid=1001, si_status=1, si_utime=0, si_stime=0} ---
[pid 816007] clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLDstrace: Process 816009 attached
, child_tidptr=0x7f0c6b787990) = 816009
[pid 816007] recvmsg(3,  <unfinished ...>
[pid 816009] pidfd_open(816009, 0)      = 3
[pid 816009] sendmsg(5, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="", iov_len=0}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[3]}], msg_controllen=24, msg_flags=0}, 0) = 0
[pid 816007] <... recvmsg resumed>{msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="", iov_len=0}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[5]}], msg_controllen=24, msg_flags=MSG_CMSG_CLOEXEC}, MSG_CMSG_CLOEXEC) = 0
[pid 816009] execve("/usr/bin/sleep", ["sleep", "1000"], 0x7ffcf2100048 /* 105 vars */) = 0
[pid 816007] waitid(P_PIDFD, 5, {}, WNOHANG|WEXITED, NULL) = 0
[pid 816007] pidfd_send_signal(5, SIGKILL, NULL, 0) = 0
[pid 816007] waitid(P_PIDFD, 5,  <unfinished ...>
[pid 816009] +++ killed by SIGKILL +++
[pid 816007] <... waitid resumed>{si_signo=SIGCHLD, si_code=CLD_KILLED, si_pid=816009, si_uid=1001, si_status=SIGKILL, si_utime=0, si_stime=0}, WEXITED, NULL) = 0
[pid 816007] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_KILLED, si_pid=816009, si_uid=1001, si_status=SIGKILL, si_utime=0, si_stime=0} ---
[pid 816007] +++ exited with 0 +++
```
  • Loading branch information
compiler-errors authored Nov 20, 2023
2 parents a7f805d + f34e7f4 commit 6d33e90
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 11 deletions.
6 changes: 6 additions & 0 deletions library/std/src/os/linux/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,12 @@ pub trait CommandExt: Sealed {
/// in a guaranteed race-free manner (e.g. if the `clone3` system call
/// is supported). Otherwise, [`pidfd`] will return an error.
///
/// If a pidfd has been successfully created and not been taken from the `Child`
/// then calls to `kill()`, `wait()` and `try_wait()` will use the pidfd
/// instead of the pid. This can prevent pid recycling races, e.g.
/// those caused by rogue libraries in the same process prematurely reaping
/// zombie children via `waitpid(-1, ...)` calls.
///
/// [`Command`]: process::Command
/// [`Child`]: process::Child
/// [`pidfd`]: fn@ChildExt::pidfd
Expand Down
76 changes: 68 additions & 8 deletions library/std/src/sys/unix/process/process_unix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use core::ffi::NonZero_c_int;

#[cfg(target_os = "linux")]
use crate::os::linux::process::PidFd;
#[cfg(target_os = "linux")]
use crate::os::unix::io::AsRawFd;

#[cfg(any(
target_os = "macos",
Expand Down Expand Up @@ -696,11 +698,12 @@ impl Command {

msg.msg_iov = &mut iov as *mut _ as *mut _;
msg.msg_iovlen = 1;
msg.msg_controllen = mem::size_of_val(&cmsg.buf) as _;
msg.msg_control = &mut cmsg.buf as *mut _ as *mut _;

// only attach cmsg if we successfully acquired the pidfd
if pidfd >= 0 {
msg.msg_controllen = mem::size_of_val(&cmsg.buf) as _;
msg.msg_control = &mut cmsg.buf as *mut _ as *mut _;

let hdr = CMSG_FIRSTHDR(&mut msg as *mut _ as *mut _);
(*hdr).cmsg_level = SOL_SOCKET;
(*hdr).cmsg_type = SCM_RIGHTS;
Expand All @@ -717,7 +720,7 @@ impl Command {
// so we get a consistent SEQPACKET order
match cvt_r(|| libc::sendmsg(sock.as_raw(), &msg, 0)) {
Ok(0) => {}
_ => rtabort!("failed to communicate with parent process"),
other => rtabort!("failed to communicate with parent process. {:?}", other),
}
}
}
Expand Down Expand Up @@ -748,7 +751,7 @@ impl Command {
msg.msg_controllen = mem::size_of::<Cmsg>() as _;
msg.msg_control = &mut cmsg as *mut _ as *mut _;

match cvt_r(|| libc::recvmsg(sock.as_raw(), &mut msg, 0)) {
match cvt_r(|| libc::recvmsg(sock.as_raw(), &mut msg, libc::MSG_CMSG_CLOEXEC)) {
Err(_) => return -1,
Ok(_) => {}
}
Expand Down Expand Up @@ -787,7 +790,7 @@ pub struct Process {
// On Linux, stores the pidfd created for this child.
// This is None if the user did not request pidfd creation,
// or if the pidfd could not be created for some reason
// (e.g. the `clone3` syscall was not available).
// (e.g. the `pidfd_open` syscall was not available).
#[cfg(target_os = "linux")]
pidfd: Option<PidFd>,
}
Expand Down Expand Up @@ -816,17 +819,41 @@ impl Process {
// and used for another process, and we probably shouldn't be killing
// random processes, so return Ok because the process has exited already.
if self.status.is_some() {
Ok(())
} else {
cvt(unsafe { libc::kill(self.pid, libc::SIGKILL) }).map(drop)
return Ok(());
}
#[cfg(target_os = "linux")]
if let Some(pid_fd) = self.pidfd.as_ref() {
// pidfd_send_signal predates pidfd_open. so if we were able to get an fd then sending signals will work too
return cvt(unsafe {
libc::syscall(
libc::SYS_pidfd_send_signal,
pid_fd.as_raw_fd(),
libc::SIGKILL,
crate::ptr::null::<()>(),
0,
)
})
.map(drop);
}
cvt(unsafe { libc::kill(self.pid, libc::SIGKILL) }).map(drop)
}

pub fn wait(&mut self) -> io::Result<ExitStatus> {
use crate::sys::cvt_r;
if let Some(status) = self.status {
return Ok(status);
}
#[cfg(target_os = "linux")]
if let Some(pid_fd) = self.pidfd.as_ref() {
let mut siginfo: libc::siginfo_t = unsafe { crate::mem::zeroed() };

cvt_r(|| unsafe {
libc::waitid(libc::P_PIDFD, pid_fd.as_raw_fd() as u32, &mut siginfo, libc::WEXITED)
})?;
let status = ExitStatus::from_waitid_siginfo(siginfo);
self.status = Some(status);
return Ok(status);
}
let mut status = 0 as c_int;
cvt_r(|| unsafe { libc::waitpid(self.pid, &mut status, 0) })?;
self.status = Some(ExitStatus::new(status));
Expand All @@ -837,6 +864,25 @@ impl Process {
if let Some(status) = self.status {
return Ok(Some(status));
}
#[cfg(target_os = "linux")]
if let Some(pid_fd) = self.pidfd.as_ref() {
let mut siginfo: libc::siginfo_t = unsafe { crate::mem::zeroed() };

cvt(unsafe {
libc::waitid(
libc::P_PIDFD,
pid_fd.as_raw_fd() as u32,
&mut siginfo,
libc::WEXITED | libc::WNOHANG,
)
})?;
if unsafe { siginfo.si_pid() } == 0 {
return Ok(None);
}
let status = ExitStatus::from_waitid_siginfo(siginfo);
self.status = Some(status);
return Ok(Some(status));
}
let mut status = 0 as c_int;
let pid = cvt(unsafe { libc::waitpid(self.pid, &mut status, libc::WNOHANG) })?;
if pid == 0 {
Expand Down Expand Up @@ -866,6 +912,20 @@ impl ExitStatus {
ExitStatus(status)
}

#[cfg(target_os = "linux")]
pub fn from_waitid_siginfo(siginfo: libc::siginfo_t) -> ExitStatus {
let status = unsafe { siginfo.si_status() };

match siginfo.si_code {
libc::CLD_EXITED => ExitStatus((status & 0xff) << 8),
libc::CLD_KILLED => ExitStatus(status),
libc::CLD_DUMPED => ExitStatus(status | 0x80),
libc::CLD_CONTINUED => ExitStatus(0xffff),
libc::CLD_STOPPED | libc::CLD_TRAPPED => ExitStatus(((status & 0xff) << 8) | 0x7f),
_ => unreachable!("waitid() should only return the above codes"),
}
}

fn exited(&self) -> bool {
libc::WIFEXITED(self.0)
}
Expand Down
19 changes: 16 additions & 3 deletions library/std/src/sys/unix/process/process_unix/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ fn test_command_fork_no_unwind() {
#[test]
#[cfg(target_os = "linux")]
fn test_command_pidfd() {
use crate::os::fd::RawFd;
use crate::assert_matches::assert_matches;
use crate::os::fd::{AsRawFd, RawFd};
use crate::os::linux::process::{ChildExt, CommandExt};
use crate::process::Command;

Expand All @@ -78,10 +79,22 @@ fn test_command_pidfd() {
};

// always exercise creation attempts
let child = Command::new("echo").create_pidfd(true).spawn().unwrap();
let mut child = Command::new("false").create_pidfd(true).spawn().unwrap();

// but only check if we know that the kernel supports pidfds
if pidfd_open_available {
assert!(child.pidfd().is_ok())
assert!(child.pidfd().is_ok());
}
if let Ok(pidfd) = child.pidfd() {
let flags = super::cvt(unsafe { libc::fcntl(pidfd.as_raw_fd(), libc::F_GETFD) }).unwrap();
assert!(flags & libc::FD_CLOEXEC != 0);
}
let status = child.wait().expect("error waiting on pidfd");
assert_eq!(status.code(), Some(1));

let mut child = Command::new("sleep").arg("1000").create_pidfd(true).spawn().unwrap();
assert_matches!(child.try_wait(), Ok(None));
child.kill().expect("failed to kill child");
let status = child.wait().expect("error waiting on pidfd");
assert_eq!(status.signal(), Some(libc::SIGKILL));
}

0 comments on commit 6d33e90

Please sign in to comment.