3
0
Fork 0
forked from mirrors/linux
kernel/include/uapi/linux/pidfd.h
Christian Brauner 68db272741
pidfs: ensure that PIDFS_INFO_EXIT is available
When we currently create a pidfd we check that the task hasn't been
reaped right before we create the pidfd. But it is of course possible
that by the time we return the pidfd to userspace the task has already
been reaped since we don't check again after having created a dentry for
it.

This was fine until now because that race was meaningless. But now that
we provide PIDFD_INFO_EXIT it is a problem because it is possible that
the kernel returns a reaped pidfd and it depends on the race whether
PIDFD_INFO_EXIT information is available. This depends on if the task
gets reaped before or after a dentry has been attached to struct pid.

Make this consistent and only returned pidfds for reaped tasks if
PIDFD_INFO_EXIT information is available. This is done by performing
another check whether the task has been reaped right after we attached a
dentry to struct pid.

Since pidfs_exit() is called before struct pid's task linkage is removed
the case where the task got reaped but a dentry was already attached to
struct pid and exit information was recorded and published can be
handled correctly. In that case we do return a pidfd for a reaped task
like we would've before.

Link: https://lore.kernel.org/r/20250316-kabel-fehden-66bdb6a83436@brauner
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-03-19 14:40:18 +01:00

111 lines
4.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_LINUX_PIDFD_H
#define _UAPI_LINUX_PIDFD_H
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/ioctl.h>
/* Flags for pidfd_open(). */
#define PIDFD_NONBLOCK O_NONBLOCK
#define PIDFD_THREAD O_EXCL
#ifdef __KERNEL__
#include <linux/sched.h>
#define PIDFD_CLONE CLONE_PIDFD
#endif
/* Flags for pidfd_send_signal(). */
#define PIDFD_SIGNAL_THREAD (1UL << 0)
#define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1)
#define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2)
/* Flags for pidfd_info. */
#define PIDFD_INFO_PID (1UL << 0) /* Always returned, even if not requested */
#define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */
#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */
#define PIDFD_INFO_EXIT (1UL << 3) /* Only returned if requested. */
#define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */
/*
* The concept of process and threads in userland and the kernel is a confusing
* one - within the kernel every thread is a 'task' with its own individual PID,
* however from userland's point of view threads are grouped by a single PID,
* which is that of the 'thread group leader', typically the first thread
* spawned.
*
* To cut the Gideon knot, for internal kernel usage, we refer to
* PIDFD_SELF_THREAD to refer to the current thread (or task from a kernel
* perspective), and PIDFD_SELF_THREAD_GROUP to refer to the current thread
* group leader...
*/
#define PIDFD_SELF_THREAD -10000 /* Current thread. */
#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */
/*
* ...and for userland we make life simpler - PIDFD_SELF refers to the current
* thread, PIDFD_SELF_PROCESS refers to the process thread group leader.
*
* For nearly all practical uses, a user will want to use PIDFD_SELF.
*/
#define PIDFD_SELF PIDFD_SELF_THREAD
#define PIDFD_SELF_PROCESS PIDFD_SELF_THREAD_GROUP
struct pidfd_info {
/*
* This mask is similar to the request_mask in statx(2).
*
* Userspace indicates what extensions or expensive-to-calculate fields
* they want by setting the corresponding bits in mask. The kernel
* will ignore bits that it does not know about.
*
* When filling the structure, the kernel will only set bits
* corresponding to the fields that were actually filled by the kernel.
* This also includes any future extensions that might be automatically
* filled. If the structure size is too small to contain a field
* (requested or not), to avoid confusion the mask will not
* contain a bit for that field.
*
* As such, userspace MUST verify that mask contains the
* corresponding flags after the ioctl(2) returns to ensure that it is
* using valid data.
*/
__u64 mask;
/*
* The information contained in the following fields might be stale at the
* time it is received, as the target process might have exited as soon as
* the IOCTL was processed, and there is no way to avoid that. However, it
* is guaranteed that if the call was successful, then the information was
* correct and referred to the intended process at the time the work was
* performed. */
__u64 cgroupid;
__u32 pid;
__u32 tgid;
__u32 ppid;
__u32 ruid;
__u32 rgid;
__u32 euid;
__u32 egid;
__u32 suid;
__u32 sgid;
__u32 fsuid;
__u32 fsgid;
__s32 exit_code;
};
#define PIDFS_IOCTL_MAGIC 0xFF
#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2)
#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3)
#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4)
#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5)
#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6)
#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7)
#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
#endif /* _UAPI_LINUX_PIDFD_H */