Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit f66bd5f

Browse files
authoredJul 12, 2024
Rollup merge of rust-lang#126827 - the8472:pidfd-spawn, r=workingjubilee
Use pidfd_spawn for faster process spawning when a PidFd is requested glibc 2.39 added `pidfd_spawnp` and `pidfd_getpid` which makes it possible to get pidfds while staying on the CLONE_VFORK path. verified that vfork gets used with strace: ``` $ strace -ff -e pidfd_open,clone3,openat,execve,waitid,close ./x test std --no-doc -- pidfd [...] [pid 2820532] clone3({flags=CLONE_VM|CLONE_PIDFD|CLONE_VFORK|CLONE_CLEAR_SIGHAND, pidfd=0x7b7f885fec6c, exit_signal=SIGCHLD, stack=0x7b7f88aff000, stack_size=0x9000}strace: Process 2820533 attached <unfinished ...> [pid 2820533] execve("/home/the8472/bin/sleep", ["sleep", "1000"], 0x7ffdd0e268d8 /* 107 vars */) = -1 ENOENT (No such file or directory) [pid 2820533] execve("/home/the8472/.cargo/bin/sleep", ["sleep", "1000"], 0x7ffdd0e268d8 /* 107 vars */) = -1 ENOENT (No such file or directory) [pid 2820533] execve("/usr/local/bin/sleep", ["sleep", "1000"], 0x7ffdd0e268d8 /* 107 vars */) = -1 ENOENT (No such file or directory) [pid 2820533] execve("/usr/bin/sleep", ["sleep", "1000"], 0x7ffdd0e268d8 /* 107 vars */ <unfinished ...> [pid 2820532] <... clone3 resumed> => {pidfd=[3]}, 88) = 2820533 [pid 2820533] <... execve resumed>) = 0 [pid 2820532] openat(AT_FDCWD, "/proc/self/fdinfo/3", O_RDONLY|O_CLOEXEC) = 4 [pid 2820532] close(4) = 0 ``` Tracking issue: rust-lang#82971
2 parents 1e5cd21 + 17d03b9 commit f66bd5f

File tree

3 files changed

+131
-6
lines changed

3 files changed

+131
-6
lines changed
 

‎std/src/sys/pal/unix/linux/pidfd/tests.rs

+14-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::assert_matches::assert_matches;
22
use crate::os::fd::{AsRawFd, RawFd};
3-
use crate::os::linux::process::{ChildExt, CommandExt};
4-
use crate::os::unix::process::ExitStatusExt;
3+
use crate::os::linux::process::{ChildExt, CommandExt as _};
4+
use crate::os::unix::process::{CommandExt as _, ExitStatusExt};
55
use crate::process::Command;
66

77
#[test]
@@ -21,6 +21,7 @@ fn test_command_pidfd() {
2121
let flags = super::cvt(unsafe { libc::fcntl(pidfd.as_raw_fd(), libc::F_GETFD) }).unwrap();
2222
assert!(flags & libc::FD_CLOEXEC != 0);
2323
}
24+
assert!(child.id() > 0 && child.id() < -1i32 as u32);
2425
let status = child.wait().expect("error waiting on pidfd");
2526
assert_eq!(status.code(), Some(1));
2627

@@ -42,6 +43,17 @@ fn test_command_pidfd() {
4243
.unwrap()
4344
.pidfd()
4445
.expect_err("pidfd should not have been created");
46+
47+
// exercise the fork/exec path since the earlier attempts may have used pidfd_spawnp()
48+
let mut child =
49+
unsafe { Command::new("false").pre_exec(|| Ok(())) }.create_pidfd(true).spawn().unwrap();
50+
51+
assert!(child.id() > 0 && child.id() < -1i32 as u32);
52+
53+
if pidfd_open_available {
54+
assert!(child.pidfd().is_ok())
55+
}
56+
child.wait().expect("error waiting on child");
4557
}
4658

4759
#[test]

‎std/src/sys/pal/unix/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -305,10 +305,13 @@ macro_rules! impl_is_minus_one {
305305

306306
impl_is_minus_one! { i8 i16 i32 i64 isize }
307307

308+
/// Convert native return values to Result using the *-1 means error is in `errno`* convention.
309+
/// Non-error values are `Ok`-wrapped.
308310
pub fn cvt<T: IsMinusOne>(t: T) -> crate::io::Result<T> {
309311
if t.is_minus_one() { Err(crate::io::Error::last_os_error()) } else { Ok(t) }
310312
}
311313

314+
/// `-1` → look at `errno` → retry on `EINTR`. Otherwise `Ok()`-wrap the closure return value.
312315
pub fn cvt_r<T, F>(mut f: F) -> crate::io::Result<T>
313316
where
314317
T: IsMinusOne,
@@ -323,6 +326,7 @@ where
323326
}
324327

325328
#[allow(dead_code)] // Not used on all platforms.
329+
/// Zero means `Ok()`, all other values are treated as raw OS errors. Does not look at `errno`.
326330
pub fn cvt_nz(error: libc::c_int) -> crate::io::Result<()> {
327331
if error == 0 { Ok(()) } else { Err(crate::io::Error::from_raw_os_error(error)) }
328332
}

‎std/src/sys/pal/unix/process/process_unix.rs

+113-4
Original file line numberDiff line numberDiff line change
@@ -449,17 +449,82 @@ impl Command {
449449
use crate::mem::MaybeUninit;
450450
use crate::sys::weak::weak;
451451
use crate::sys::{self, cvt_nz, on_broken_pipe_flag_used};
452+
#[cfg(target_os = "linux")]
453+
use core::sync::atomic::{AtomicU8, Ordering};
452454

453455
if self.get_gid().is_some()
454456
|| self.get_uid().is_some()
455457
|| (self.env_saw_path() && !self.program_is_path())
456458
|| !self.get_closures().is_empty()
457459
|| self.get_groups().is_some()
458-
|| self.get_create_pidfd()
459460
{
460461
return Ok(None);
461462
}
462463

464+
cfg_if::cfg_if! {
465+
if #[cfg(target_os = "linux")] {
466+
weak! {
467+
fn pidfd_spawnp(
468+
*mut libc::c_int,
469+
*const libc::c_char,
470+
*const libc::posix_spawn_file_actions_t,
471+
*const libc::posix_spawnattr_t,
472+
*const *mut libc::c_char,
473+
*const *mut libc::c_char
474+
) -> libc::c_int
475+
}
476+
477+
weak! { fn pidfd_getpid(libc::c_int) -> libc::c_int }
478+
479+
static PIDFD_SUPPORTED: AtomicU8 = AtomicU8::new(0);
480+
const UNKNOWN: u8 = 0;
481+
const SPAWN: u8 = 1;
482+
// Obtaining a pidfd via the fork+exec path might work
483+
const FORK_EXEC: u8 = 2;
484+
// Neither pidfd_spawn nor fork/exec will get us a pidfd.
485+
// Instead we'll just posix_spawn if the other preconditions are met.
486+
const NO: u8 = 3;
487+
488+
if self.get_create_pidfd() {
489+
let mut support = PIDFD_SUPPORTED.load(Ordering::Relaxed);
490+
if support == FORK_EXEC {
491+
return Ok(None);
492+
}
493+
if support == UNKNOWN {
494+
support = NO;
495+
let our_pid = crate::process::id();
496+
let pidfd = cvt(unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as c_int);
497+
match pidfd {
498+
Ok(pidfd) => {
499+
support = FORK_EXEC;
500+
if let Some(Ok(pid)) = pidfd_getpid.get().map(|f| cvt(unsafe { f(pidfd) } as i32)) {
501+
if pidfd_spawnp.get().is_some() && pid as u32 == our_pid {
502+
support = SPAWN
503+
}
504+
}
505+
unsafe { libc::close(pidfd) };
506+
}
507+
Err(e) if e.raw_os_error() == Some(libc::EMFILE) => {
508+
// We're temporarily(?) out of file descriptors. In this case obtaining a pidfd would also fail
509+
// Don't update the support flag so we can probe again later.
510+
return Err(e)
511+
}
512+
_ => {}
513+
}
514+
PIDFD_SUPPORTED.store(support, Ordering::Relaxed);
515+
if support == FORK_EXEC {
516+
return Ok(None);
517+
}
518+
}
519+
core::assert_matches::debug_assert_matches!(support, SPAWN | NO);
520+
}
521+
} else {
522+
if self.get_create_pidfd() {
523+
unreachable!("only implemented on linux")
524+
}
525+
}
526+
}
527+
463528
// Only glibc 2.24+ posix_spawn() supports returning ENOENT directly.
464529
#[cfg(all(target_os = "linux", target_env = "gnu"))]
465530
{
@@ -543,9 +608,6 @@ impl Command {
543608

544609
let pgroup = self.get_pgroup();
545610

546-
// Safety: -1 indicates we don't have a pidfd.
547-
let mut p = unsafe { Process::new(0, -1) };
548-
549611
struct PosixSpawnFileActions<'a>(&'a mut MaybeUninit<libc::posix_spawn_file_actions_t>);
550612

551613
impl Drop for PosixSpawnFileActions<'_> {
@@ -640,6 +702,47 @@ impl Command {
640702
#[cfg(target_os = "nto")]
641703
let spawn_fn = retrying_libc_posix_spawnp;
642704

705+
#[cfg(target_os = "linux")]
706+
if self.get_create_pidfd() && PIDFD_SUPPORTED.load(Ordering::Relaxed) == SPAWN {
707+
let mut pidfd: libc::c_int = -1;
708+
let spawn_res = pidfd_spawnp.get().unwrap()(
709+
&mut pidfd,
710+
self.get_program_cstr().as_ptr(),
711+
file_actions.0.as_ptr(),
712+
attrs.0.as_ptr(),
713+
self.get_argv().as_ptr() as *const _,
714+
envp as *const _,
715+
);
716+
717+
let spawn_res = cvt_nz(spawn_res);
718+
if let Err(ref e) = spawn_res
719+
&& e.raw_os_error() == Some(libc::ENOSYS)
720+
{
721+
PIDFD_SUPPORTED.store(FORK_EXEC, Ordering::Relaxed);
722+
return Ok(None);
723+
}
724+
spawn_res?;
725+
726+
let pid = match cvt(pidfd_getpid.get().unwrap()(pidfd)) {
727+
Ok(pid) => pid,
728+
Err(e) => {
729+
// The child has been spawned and we are holding its pidfd.
730+
// But we cannot obtain its pid even though pidfd_getpid support was verified earlier.
731+
// This might happen if libc can't open procfs because the file descriptor limit has been reached.
732+
libc::close(pidfd);
733+
return Err(Error::new(
734+
e.kind(),
735+
"pidfd_spawnp succeeded but the child's PID could not be obtained",
736+
));
737+
}
738+
};
739+
740+
return Ok(Some(Process::new(pid, pidfd)));
741+
}
742+
743+
// Safety: -1 indicates we don't have a pidfd.
744+
let mut p = Process::new(0, -1);
745+
643746
let spawn_res = spawn_fn(
644747
&mut p.pid,
645748
self.get_program_cstr().as_ptr(),
@@ -786,6 +889,12 @@ pub struct Process {
786889

787890
impl Process {
788891
#[cfg(target_os = "linux")]
892+
/// # Safety
893+
///
894+
/// `pidfd` must either be -1 (representing no file descriptor) or a valid, exclusively owned file
895+
/// descriptor (See [I/O Safety]).
896+
///
897+
/// [I/O Safety]: crate::io#io-safety
789898
unsafe fn new(pid: pid_t, pidfd: pid_t) -> Self {
790899
use crate::os::unix::io::FromRawFd;
791900
use crate::sys_common::FromInner;

0 commit comments

Comments
 (0)
Failed to load comments.