summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/3rdparty/forkfd/forkfd.c2
-rw-r--r--src/3rdparty/forkfd/forkfd_linux.c251
2 files changed, 253 insertions, 0 deletions
diff --git a/src/3rdparty/forkfd/forkfd.c b/src/3rdparty/forkfd/forkfd.c
index e4f3bd85de..2ae85d6f37 100644
--- a/src/3rdparty/forkfd/forkfd.c
+++ b/src/3rdparty/forkfd/forkfd.c
@@ -822,6 +822,8 @@ int forkfd_close(int ffd)
#if defined(__FreeBSD__) && __FreeBSD__ >= 9
# include "forkfd_freebsd.c"
+#elif defined(__linux__)
+# include "forkfd_linux.c"
#else
int system_has_forkfd()
{
diff --git a/src/3rdparty/forkfd/forkfd_linux.c b/src/3rdparty/forkfd/forkfd_linux.c
new file mode 100644
index 0000000000..ea587b7ed5
--- /dev/null
+++ b/src/3rdparty/forkfd/forkfd_linux.c
@@ -0,0 +1,251 @@
+/****************************************************************************
+**
+** Copyright (C) 2019 Intel Corporation.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a copy
+** of this software and associated documentation files (the "Software"), to deal
+** in the Software without restriction, including without limitation the rights
+** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+** copies of the Software, and to permit persons to whom the Software is
+** furnished to do so, subject to the following conditions:
+**
+** The above copyright notice and this permission notice shall be included in
+** all copies or substantial portions of the Software.
+**
+** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+** THE SOFTWARE.
+**
+****************************************************************************/
+
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+
+#include "forkfd.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "forkfd_atomic.h"
+
+#ifndef CLONE_PIDFD
+# define CLONE_PIDFD 0x00001000
+#endif
+#ifndef P_PIDFD
+# define P_PIDFD 3
+#endif
+
+static ffd_atomic_int system_forkfd_state = FFD_ATOMIC_INIT(0);
+
+static int sys_waitid(int which, int pid_or_pidfd, siginfo_t *infop, int options,
+ struct rusage *ru)
+{
+ /* use the waitid raw system call, which has an extra parameter that glibc
+ * doesn't offer to us */
+ return syscall(__NR_waitid, which, pid_or_pidfd, infop, options, ru);
+}
+
+static int sys_clone(unsigned long cloneflags, int *ptid)
+{
+ void *child_stack = NULL;
+ int *ctid = NULL;
+ unsigned long newtls = 0;
+#if defined(__NR_clone2)
+ size_t stack_size = 0;
+ return syscall(__NR_clone2, cloneflags, child_stack, stack_size, ptid, ctid, newtls);
+#elif defined(__cris__) || defined(__s390__)
+ /* a.k.a., CONFIG_CLONE_BACKWARDS2 architectures */
+ return syscall(__NR_clone, child_stack, cloneflags, ptid, newtls, ctid);
+#elif defined(__microblaze__)
+ /* a.k.a., CONFIG_CLONE_BACKWARDS3 architectures */
+ size_t stack_size = 0;
+ return syscall(__NR_clone, cloneflags, child_stack, stack_size, ptid, newtls, ctid);
+#elif defined(__arc__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+ defined(__nds32__) || defined(__hppa__) || defined(__powerpc__) || defined(__i386__) || \
+ defined(__x86_64__) || defined(__xtensa__) || defined(__alpha__)
+ /* ctid and newtls are inverted on CONFIG_CLONE_BACKWARDS architectures,
+ * but since both values are 0, there's no harm. */
+ return syscall(__NR_clone, cloneflags, child_stack, ptid, ctid, newtls);
+#else
+ (void) child_stack;
+ (void) ctid;
+ (void) newtls;
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+static int detect_clone_pidfd_support()
+{
+ /*
+ * Detect support for CLONE_PIDFD and P_PIDFD. Support was added in steps:
+ * - Linux 5.2 added CLONE_PIDFD support in clone(2) system call
+ * - Linux 5.2 added pidfd_send_signal(2)
+ * - Linux 5.3 added support for poll(2) on pidfds
+ * - Linux 5.3 added clone3(2)
+ * - Linux 5.4 added P_PIDFD support in waitid(2)
+ *
+ * We need CLONE_PIDFD and the poll(2) support. We could emulate the
+ * P_PIDFD support by reading the PID from /proc/self/fdinfo/n, which works
+ * in Linux 5.2, but without poll(2), we can't guarantee the functionality
+ * anyway.
+ *
+ * So we detect by trying to waitid(2) on a positive file descriptor that
+ * is definitely closed (INT_MAX). If P_PIDFD is supported, waitid(2) will
+ * return EBADF. If it isn't supported, it returns EINVAL (as it would for
+ * a negative file descriptor). This will succeed on Linux 5.4.
+ *
+ * We could have instead detected by the existence of the clone3(2) system
+ * call, but for that we would have needed to wait for __NR_clone3 to show
+ * up on the libcs. We choose to go via the waitid(2) route, which requires
+ * platform-independent constants only. It would have simplified the
+ * sys_clone() mess above...
+ */
+
+ sys_waitid(P_PIDFD, INT_MAX, NULL, WEXITED|WNOHANG, NULL);
+ return errno == EBADF ? 1 : -1;
+
+#if 0
+ /* Detection methods not used: */
+#ifdef __NR_pidfd_send_signal
+ /*
+ * pidfd_send_signal was added at the same time as CLONE_PIDFD, so if this
+ * system call exists, so does CLONE_PIDFD. We make a system call with a
+ * file descriptor of -1: if it's supported, we get EBADF; otherwise, the
+ * typical ENOSYS.
+ */
+ syscall(__NR_pidfd_send_signal, -1, 0, NULL, 0);
+ return errno == EBADF ? 1 : -1;
+#else
+ /*
+ * detect kernel CLONE_PIDFD support directly: CLONE_PIDFD |
+ * CLONE_PARENT_SETTID causes EINVAL on kernel >= 5.2, but on older
+ * kernels, that combination is ignored. Therefore, if we EINVAL, we know
+ * that CLONE_PIDFD is supported.
+ *
+ * To avoid creating a process unnecessarily, we add CLONE_NEWUTS, which is
+ * a privileged operation. Therefore, if we're not root, we'll get EPERM.
+ * If we're root, we need to exit the child process and wait for it on the
+ * parent.
+ */
+ pid_t pid = sys_clone(CLONE_PIDFD | CLONE_PARENT_SETTID | CLONE_NEWUTS, NULL);
+ if (pid == -1 && errno == EINVAL)
+ return 1;
+ if (pid == 0)
+ _exit(0); /* Child */
+ if (pid > 0)
+ sys_waitid(P_PID, pid, NULL, WEXITED | __WALL, NULL); /* Parent */
+
+ return -1;
+#endif
+#endif // 0
+}
+
+#if 0
+/* To be used if waitid's P_PIDFD support gets bumped to 5.4 */
+static pid_t pidfd_to_pid(int pidfd)
+{
+ static const char pidtext[] = "Pid:\t";
+ int fdinfo;
+ ssize_t ret;
+ char buf[256];
+ char *text, *endpid = NULL;
+
+ snprintf(buf, sizeof(buf), "/proc/self/fdinfo/%d", pidfd);
+ fdinfo = open(buf, O_RDONLY | O_CLOEXEC);
+ if (fdinfo < 0)
+ return fdinfo;
+
+ ret = read(fdinfo, buf, sizeof(buf) - 1);
+ close(fdinfo);
+ if (ret < 1)
+ return ret;
+
+ buf[ret] = '\0';
+ text = (char *) memmem(buf, ret, pidtext, sizeof(pidtext) - 1);
+ if (text == NULL)
+ return -1;
+
+ text += sizeof(pidtext) - 1;
+ ret = strtol(text, &endpid, 10);
+ if (ret < 0 || (endpid && *endpid != '\n'))
+ return -1;
+ return ret;
+}
+#endif
+
+int system_has_forkfd()
+{
+ return ffd_atomic_load(&system_forkfd_state, FFD_ATOMIC_RELAXED) > 0;
+}
+
+int system_forkfd(int flags, pid_t *ppid, int *system)
+{
+ pid_t pid;
+ int pidfd;
+
+ int state = ffd_atomic_load(&system_forkfd_state, FFD_ATOMIC_RELAXED);
+ if (state == 0) {
+ state = detect_clone_pidfd_support();
+ ffd_atomic_store(&system_forkfd_state, state, FFD_ATOMIC_RELAXED);
+ }
+ if (state < 0) {
+ *system = 0;
+ return state;
+ }
+
+ *system = 1;
+ pid = sys_clone(CLONE_PIDFD, &pidfd);
+ if (ppid)
+ *ppid = pid;
+
+ if (pid == 0) {
+ /* Child process */
+ return FFD_CHILD_PROCESS;
+ }
+
+ /* parent process */
+ if ((flags & FFD_CLOEXEC) == 0) {
+ /* pidfd defaults to O_CLOEXEC */
+ fcntl(pidfd, F_SETFD, 0);
+ }
+ if (flags & FFD_NONBLOCK)
+ fcntl(pidfd, F_SETFL, fcntl(pidfd, F_GETFL) | O_NONBLOCK);
+ return pidfd;
+}
+
+int system_forkfd_wait(int ffd, struct forkfd_info *info, struct rusage *rusage)
+{
+ siginfo_t si;
+ int options = WEXITED | __WALL;
+ int ret = fcntl(ffd, F_GETFL);
+ if (ret == -1)
+ return ret;
+ if (ret & O_NONBLOCK)
+ options |= WNOHANG;
+
+ ret = sys_waitid(P_PIDFD, ffd, &si, options, rusage);
+ if (ret == -1 && errno == ECHILD) {
+ errno = EWOULDBLOCK;
+ } else if (ret == 0 && info) {
+ info->code = si.si_code;
+ info->status = si.si_status;
+ }
+ return ret;
+}