diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/3rdparty/forkfd/forkfd.c | 2 | ||||
-rw-r--r-- | src/3rdparty/forkfd/forkfd_linux.c | 251 |
2 files changed, 253 insertions, 0 deletions
diff --git a/src/3rdparty/forkfd/forkfd.c b/src/3rdparty/forkfd/forkfd.c index e4f3bd85de..2ae85d6f37 100644 --- a/src/3rdparty/forkfd/forkfd.c +++ b/src/3rdparty/forkfd/forkfd.c @@ -822,6 +822,8 @@ int forkfd_close(int ffd) #if defined(__FreeBSD__) && __FreeBSD__ >= 9 # include "forkfd_freebsd.c" +#elif defined(__linux__) +# include "forkfd_linux.c" #else int system_has_forkfd() { diff --git a/src/3rdparty/forkfd/forkfd_linux.c b/src/3rdparty/forkfd/forkfd_linux.c new file mode 100644 index 0000000000..ea587b7ed5 --- /dev/null +++ b/src/3rdparty/forkfd/forkfd_linux.c @@ -0,0 +1,251 @@ +/**************************************************************************** +** +** Copyright (C) 2019 Intel Corporation. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and associated documentation files (the "Software"), to deal +** in the Software without restriction, including without limitation the rights +** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +** copies of the Software, and to permit persons to whom the Software is +** furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +** THE SOFTWARE. +** +****************************************************************************/ + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#include "forkfd.h" + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "forkfd_atomic.h" + +#ifndef CLONE_PIDFD +# define CLONE_PIDFD 0x00001000 +#endif +#ifndef P_PIDFD +# define P_PIDFD 3 +#endif + +static ffd_atomic_int system_forkfd_state = FFD_ATOMIC_INIT(0); + +static int sys_waitid(int which, int pid_or_pidfd, siginfo_t *infop, int options, + struct rusage *ru) +{ + /* use the waitid raw system call, which has an extra parameter that glibc + * doesn't offer to us */ + return syscall(__NR_waitid, which, pid_or_pidfd, infop, options, ru); +} + +static int sys_clone(unsigned long cloneflags, int *ptid) +{ + void *child_stack = NULL; + int *ctid = NULL; + unsigned long newtls = 0; +#if defined(__NR_clone2) + size_t stack_size = 0; + return syscall(__NR_clone2, cloneflags, child_stack, stack_size, ptid, ctid, newtls); +#elif defined(__cris__) || defined(__s390__) + /* a.k.a., CONFIG_CLONE_BACKWARDS2 architectures */ + return syscall(__NR_clone, child_stack, cloneflags, ptid, newtls, ctid); +#elif defined(__microblaze__) + /* a.k.a., CONFIG_CLONE_BACKWARDS3 architectures */ + size_t stack_size = 0; + return syscall(__NR_clone, cloneflags, child_stack, stack_size, ptid, newtls, ctid); +#elif defined(__arc__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \ + defined(__nds32__) || defined(__hppa__) || defined(__powerpc__) || defined(__i386__) || \ + defined(__x86_64__) || defined(__xtensa__) || defined(__alpha__) + /* ctid and newtls are inverted on CONFIG_CLONE_BACKWARDS architectures, + * but since both values are 0, there's no harm. */ + return syscall(__NR_clone, cloneflags, child_stack, ptid, ctid, newtls); +#else + (void) child_stack; + (void) ctid; + (void) newtls; + errno = ENOSYS; + return -1; +#endif +} + +static int detect_clone_pidfd_support() +{ + /* + * Detect support for CLONE_PIDFD and P_PIDFD. Support was added in steps: + * - Linux 5.2 added CLONE_PIDFD support in clone(2) system call + * - Linux 5.2 added pidfd_send_signal(2) + * - Linux 5.3 added support for poll(2) on pidfds + * - Linux 5.3 added clone3(2) + * - Linux 5.4 added P_PIDFD support in waitid(2) + * + * We need CLONE_PIDFD and the poll(2) support. We could emulate the + * P_PIDFD support by reading the PID from /proc/self/fdinfo/n, which works + * in Linux 5.2, but without poll(2), we can't guarantee the functionality + * anyway. + * + * So we detect by trying to waitid(2) on a positive file descriptor that + * is definitely closed (INT_MAX). If P_PIDFD is supported, waitid(2) will + * return EBADF. If it isn't supported, it returns EINVAL (as it would for + * a negative file descriptor). This will succeed on Linux 5.4. + * + * We could have instead detected by the existence of the clone3(2) system + * call, but for that we would have needed to wait for __NR_clone3 to show + * up on the libcs. We choose to go via the waitid(2) route, which requires + * platform-independent constants only. It would have simplified the + * sys_clone() mess above... + */ + + sys_waitid(P_PIDFD, INT_MAX, NULL, WEXITED|WNOHANG, NULL); + return errno == EBADF ? 1 : -1; + +#if 0 + /* Detection methods not used: */ +#ifdef __NR_pidfd_send_signal + /* + * pidfd_send_signal was added at the same time as CLONE_PIDFD, so if this + * system call exists, so does CLONE_PIDFD. We make a system call with a + * file descriptor of -1: if it's supported, we get EBADF; otherwise, the + * typical ENOSYS. + */ + syscall(__NR_pidfd_send_signal, -1, 0, NULL, 0); + return errno == EBADF ? 1 : -1; +#else + /* + * detect kernel CLONE_PIDFD support directly: CLONE_PIDFD | + * CLONE_PARENT_SETTID causes EINVAL on kernel >= 5.2, but on older + * kernels, that combination is ignored. Therefore, if we EINVAL, we know + * that CLONE_PIDFD is supported. + * + * To avoid creating a process unnecessarily, we add CLONE_NEWUTS, which is + * a privileged operation. Therefore, if we're not root, we'll get EPERM. + * If we're root, we need to exit the child process and wait for it on the + * parent. + */ + pid_t pid = sys_clone(CLONE_PIDFD | CLONE_PARENT_SETTID | CLONE_NEWUTS, NULL); + if (pid == -1 && errno == EINVAL) + return 1; + if (pid == 0) + _exit(0); /* Child */ + if (pid > 0) + sys_waitid(P_PID, pid, NULL, WEXITED | __WALL, NULL); /* Parent */ + + return -1; +#endif +#endif // 0 +} + +#if 0 +/* To be used if waitid's P_PIDFD support gets bumped to 5.4 */ +static pid_t pidfd_to_pid(int pidfd) +{ + static const char pidtext[] = "Pid:\t"; + int fdinfo; + ssize_t ret; + char buf[256]; + char *text, *endpid = NULL; + + snprintf(buf, sizeof(buf), "/proc/self/fdinfo/%d", pidfd); + fdinfo = open(buf, O_RDONLY | O_CLOEXEC); + if (fdinfo < 0) + return fdinfo; + + ret = read(fdinfo, buf, sizeof(buf) - 1); + close(fdinfo); + if (ret < 1) + return ret; + + buf[ret] = '\0'; + text = (char *) memmem(buf, ret, pidtext, sizeof(pidtext) - 1); + if (text == NULL) + return -1; + + text += sizeof(pidtext) - 1; + ret = strtol(text, &endpid, 10); + if (ret < 0 || (endpid && *endpid != '\n')) + return -1; + return ret; +} +#endif + +int system_has_forkfd() +{ + return ffd_atomic_load(&system_forkfd_state, FFD_ATOMIC_RELAXED) > 0; +} + +int system_forkfd(int flags, pid_t *ppid, int *system) +{ + pid_t pid; + int pidfd; + + int state = ffd_atomic_load(&system_forkfd_state, FFD_ATOMIC_RELAXED); + if (state == 0) { + state = detect_clone_pidfd_support(); + ffd_atomic_store(&system_forkfd_state, state, FFD_ATOMIC_RELAXED); + } + if (state < 0) { + *system = 0; + return state; + } + + *system = 1; + pid = sys_clone(CLONE_PIDFD, &pidfd); + if (ppid) + *ppid = pid; + + if (pid == 0) { + /* Child process */ + return FFD_CHILD_PROCESS; + } + + /* parent process */ + if ((flags & FFD_CLOEXEC) == 0) { + /* pidfd defaults to O_CLOEXEC */ + fcntl(pidfd, F_SETFD, 0); + } + if (flags & FFD_NONBLOCK) + fcntl(pidfd, F_SETFL, fcntl(pidfd, F_GETFL) | O_NONBLOCK); + return pidfd; +} + +int system_forkfd_wait(int ffd, struct forkfd_info *info, struct rusage *rusage) +{ + siginfo_t si; + int options = WEXITED | __WALL; + int ret = fcntl(ffd, F_GETFL); + if (ret == -1) + return ret; + if (ret & O_NONBLOCK) + options |= WNOHANG; + + ret = sys_waitid(P_PIDFD, ffd, &si, options, rusage); + if (ret == -1 && errno == ECHILD) { + errno = EWOULDBLOCK; + } else if (ret == 0 && info) { + info->code = si.si_code; + info->status = si.si_status; + } + return ret; +} |