mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	clone3: allow spawning processes into cgroups
This adds support for creating a process in a different cgroup than its parent. Callers can limit and account processes and threads right from the moment they are spawned: - A service manager can directly spawn new services into dedicated cgroups. - A process can be directly created in a frozen cgroup and will be frozen as well. - The initial accounting jitter experienced by process supervisors and daemons is eliminated with this. - Threaded applications or even thread implementations can choose to create a specific cgroup layout where each thread is spawned directly into a dedicated cgroup. This feature is limited to the unified hierarchy. Callers need to pass a directory file descriptor for the target cgroup. The caller can choose to pass an O_PATH file descriptor. All usual migration restrictions apply, i.e. there can be no processes in inner nodes. In general, creating a process directly in a target cgroup adheres to all migration restrictions. One of the biggest advantages of this feature is that CLONE_INTO_GROUP does not need to grab the write side of the cgroup cgroup_threadgroup_rwsem. This global lock makes moving tasks/threads around super expensive. With clone3() this lock is avoided. Cc: Tejun Heo <tj@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Li Zefan <lizefan@huawei.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: cgroups@vger.kernel.org Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
		
							parent
							
								
									f3553220d4
								
							
						
					
					
						commit
						ef2c41cf38
					
				
					 7 changed files with 214 additions and 39 deletions
				
			
		| 
						 | 
				
			
			@ -628,8 +628,9 @@ struct cgroup_subsys {
 | 
			
		|||
	void (*cancel_attach)(struct cgroup_taskset *tset);
 | 
			
		||||
	void (*attach)(struct cgroup_taskset *tset);
 | 
			
		||||
	void (*post_attach)(void);
 | 
			
		||||
	int (*can_fork)(struct task_struct *task);
 | 
			
		||||
	void (*cancel_fork)(struct task_struct *task);
 | 
			
		||||
	int (*can_fork)(struct task_struct *task,
 | 
			
		||||
			struct css_set *cset);
 | 
			
		||||
	void (*cancel_fork)(struct task_struct *task, struct css_set *cset);
 | 
			
		||||
	void (*fork)(struct task_struct *task);
 | 
			
		||||
	void (*exit)(struct task_struct *task);
 | 
			
		||||
	void (*release)(struct task_struct *task);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -27,6 +27,8 @@
 | 
			
		|||
 | 
			
		||||
#include <linux/cgroup-defs.h>
 | 
			
		||||
 | 
			
		||||
struct kernel_clone_args;
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_CGROUPS
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -119,9 +121,12 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 | 
			
		|||
		     struct pid *pid, struct task_struct *tsk);
 | 
			
		||||
 | 
			
		||||
void cgroup_fork(struct task_struct *p);
 | 
			
		||||
extern int cgroup_can_fork(struct task_struct *p);
 | 
			
		||||
extern void cgroup_cancel_fork(struct task_struct *p);
 | 
			
		||||
extern void cgroup_post_fork(struct task_struct *p);
 | 
			
		||||
extern int cgroup_can_fork(struct task_struct *p,
 | 
			
		||||
			   struct kernel_clone_args *kargs);
 | 
			
		||||
extern void cgroup_cancel_fork(struct task_struct *p,
 | 
			
		||||
			       struct kernel_clone_args *kargs);
 | 
			
		||||
extern void cgroup_post_fork(struct task_struct *p,
 | 
			
		||||
			     struct kernel_clone_args *kargs);
 | 
			
		||||
void cgroup_exit(struct task_struct *p);
 | 
			
		||||
void cgroup_release(struct task_struct *p);
 | 
			
		||||
void cgroup_free(struct task_struct *p);
 | 
			
		||||
| 
						 | 
				
			
			@ -705,9 +710,12 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
 | 
			
		|||
				    struct dentry *dentry) { return -EINVAL; }
 | 
			
		||||
 | 
			
		||||
static inline void cgroup_fork(struct task_struct *p) {}
 | 
			
		||||
static inline int cgroup_can_fork(struct task_struct *p) { return 0; }
 | 
			
		||||
static inline void cgroup_cancel_fork(struct task_struct *p) {}
 | 
			
		||||
static inline void cgroup_post_fork(struct task_struct *p) {}
 | 
			
		||||
static inline int cgroup_can_fork(struct task_struct *p,
 | 
			
		||||
				  struct kernel_clone_args *kargs) { return 0; }
 | 
			
		||||
static inline void cgroup_cancel_fork(struct task_struct *p,
 | 
			
		||||
				      struct kernel_clone_args *kargs) {}
 | 
			
		||||
static inline void cgroup_post_fork(struct task_struct *p,
 | 
			
		||||
				    struct kernel_clone_args *kargs) {}
 | 
			
		||||
static inline void cgroup_exit(struct task_struct *p) {}
 | 
			
		||||
static inline void cgroup_release(struct task_struct *p) {}
 | 
			
		||||
static inline void cgroup_free(struct task_struct *p) {}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -13,6 +13,7 @@
 | 
			
		|||
struct task_struct;
 | 
			
		||||
struct rusage;
 | 
			
		||||
union thread_union;
 | 
			
		||||
struct css_set;
 | 
			
		||||
 | 
			
		||||
/* All the bits taken by the old clone syscall. */
 | 
			
		||||
#define CLONE_LEGACY_FLAGS 0xffffffffULL
 | 
			
		||||
| 
						 | 
				
			
			@ -29,6 +30,9 @@ struct kernel_clone_args {
 | 
			
		|||
	pid_t *set_tid;
 | 
			
		||||
	/* Number of elements in *set_tid */
 | 
			
		||||
	size_t set_tid_size;
 | 
			
		||||
	int cgroup;
 | 
			
		||||
	struct cgroup *cgrp;
 | 
			
		||||
	struct css_set *cset;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -35,6 +35,7 @@
 | 
			
		|||
 | 
			
		||||
/* Flags for the clone3() syscall. */
 | 
			
		||||
#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
 | 
			
		||||
#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * cloning flags intersect with CSIGNAL so can be used with unshare and clone3
 | 
			
		||||
| 
						 | 
				
			
			@ -81,6 +82,8 @@
 | 
			
		|||
 * @set_tid_size: This defines the size of the array referenced
 | 
			
		||||
 *                in @set_tid. This cannot be larger than the
 | 
			
		||||
 *                kernel's limit of nested PID namespaces.
 | 
			
		||||
 * @cgroup:       If CLONE_INTO_CGROUP is specified set this to
 | 
			
		||||
 *                a file descriptor for the cgroup.
 | 
			
		||||
 *
 | 
			
		||||
 * The structure is versioned by size and thus extensible.
 | 
			
		||||
 * New struct members must go at the end of the struct and
 | 
			
		||||
| 
						 | 
				
			
			@ -97,11 +100,13 @@ struct clone_args {
 | 
			
		|||
	__aligned_u64 tls;
 | 
			
		||||
	__aligned_u64 set_tid;
 | 
			
		||||
	__aligned_u64 set_tid_size;
 | 
			
		||||
	__aligned_u64 cgroup;
 | 
			
		||||
};
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
 | 
			
		||||
#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
 | 
			
		||||
#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Scheduling policies
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5881,8 +5881,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 | 
			
		|||
 * @child: pointer to task_struct of forking parent process.
 | 
			
		||||
 *
 | 
			
		||||
 * A task is associated with the init_css_set until cgroup_post_fork()
 | 
			
		||||
 * attaches it to the parent's css_set.  Empty cg_list indicates that
 | 
			
		||||
 * @child isn't holding reference to its css_set.
 | 
			
		||||
 * attaches it to the target css_set.
 | 
			
		||||
 */
 | 
			
		||||
void cgroup_fork(struct task_struct *child)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -5908,24 +5907,154 @@ static struct cgroup *cgroup_get_from_file(struct file *f)
 | 
			
		|||
	return cgrp;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * cgroup_css_set_fork - find or create a css_set for a child process
 | 
			
		||||
 * @kargs: the arguments passed to create the child process
 | 
			
		||||
 *
 | 
			
		||||
 * This functions finds or creates a new css_set which the child
 | 
			
		||||
 * process will be attached to in cgroup_post_fork(). By default,
 | 
			
		||||
 * the child process will be given the same css_set as its parent.
 | 
			
		||||
 *
 | 
			
		||||
 * If CLONE_INTO_CGROUP is specified this function will try to find an
 | 
			
		||||
 * existing css_set which includes the requested cgroup and if not create
 | 
			
		||||
 * a new css_set that the child will be attached to later. If this function
 | 
			
		||||
 * succeeds it will hold cgroup_threadgroup_rwsem on return. If
 | 
			
		||||
 * CLONE_INTO_CGROUP is requested this function will grab cgroup mutex
 | 
			
		||||
 * before grabbing cgroup_threadgroup_rwsem and will hold a reference
 | 
			
		||||
 * to the target cgroup.
 | 
			
		||||
 */
 | 
			
		||||
static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
 | 
			
		||||
	__acquires(&cgroup_mutex) __acquires(&cgroup_threadgroup_rwsem)
 | 
			
		||||
{
 | 
			
		||||
	int ret;
 | 
			
		||||
	struct cgroup *dst_cgrp = NULL;
 | 
			
		||||
	struct css_set *cset;
 | 
			
		||||
	struct super_block *sb;
 | 
			
		||||
	struct file *f;
 | 
			
		||||
 | 
			
		||||
	if (kargs->flags & CLONE_INTO_CGROUP)
 | 
			
		||||
		mutex_lock(&cgroup_mutex);
 | 
			
		||||
 | 
			
		||||
	cgroup_threadgroup_change_begin(current);
 | 
			
		||||
 | 
			
		||||
	spin_lock_irq(&css_set_lock);
 | 
			
		||||
	cset = task_css_set(current);
 | 
			
		||||
	get_css_set(cset);
 | 
			
		||||
	spin_unlock_irq(&css_set_lock);
 | 
			
		||||
 | 
			
		||||
	if (!(kargs->flags & CLONE_INTO_CGROUP)) {
 | 
			
		||||
		kargs->cset = cset;
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	f = fget_raw(kargs->cgroup);
 | 
			
		||||
	if (!f) {
 | 
			
		||||
		ret = -EBADF;
 | 
			
		||||
		goto err;
 | 
			
		||||
	}
 | 
			
		||||
	sb = f->f_path.dentry->d_sb;
 | 
			
		||||
 | 
			
		||||
	dst_cgrp = cgroup_get_from_file(f);
 | 
			
		||||
	if (IS_ERR(dst_cgrp)) {
 | 
			
		||||
		ret = PTR_ERR(dst_cgrp);
 | 
			
		||||
		dst_cgrp = NULL;
 | 
			
		||||
		goto err;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (cgroup_is_dead(dst_cgrp)) {
 | 
			
		||||
		ret = -ENODEV;
 | 
			
		||||
		goto err;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Verify that we the target cgroup is writable for us. This is
 | 
			
		||||
	 * usually done by the vfs layer but since we're not going through
 | 
			
		||||
	 * the vfs layer here we need to do it "manually".
 | 
			
		||||
	 */
 | 
			
		||||
	ret = cgroup_may_write(dst_cgrp, sb);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		goto err;
 | 
			
		||||
 | 
			
		||||
	ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb,
 | 
			
		||||
					!(kargs->flags & CLONE_THREAD));
 | 
			
		||||
	if (ret)
 | 
			
		||||
		goto err;
 | 
			
		||||
 | 
			
		||||
	kargs->cset = find_css_set(cset, dst_cgrp);
 | 
			
		||||
	if (!kargs->cset) {
 | 
			
		||||
		ret = -ENOMEM;
 | 
			
		||||
		goto err;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	put_css_set(cset);
 | 
			
		||||
	fput(f);
 | 
			
		||||
	kargs->cgrp = dst_cgrp;
 | 
			
		||||
	return ret;
 | 
			
		||||
 | 
			
		||||
err:
 | 
			
		||||
	cgroup_threadgroup_change_end(current);
 | 
			
		||||
	mutex_unlock(&cgroup_mutex);
 | 
			
		||||
	if (f)
 | 
			
		||||
		fput(f);
 | 
			
		||||
	if (dst_cgrp)
 | 
			
		||||
		cgroup_put(dst_cgrp);
 | 
			
		||||
	put_css_set(cset);
 | 
			
		||||
	if (kargs->cset)
 | 
			
		||||
		put_css_set(kargs->cset);
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * cgroup_css_set_put_fork - drop references we took during fork
 | 
			
		||||
 * @kargs: the arguments passed to create the child process
 | 
			
		||||
 *
 | 
			
		||||
 * Drop references to the prepared css_set and target cgroup if
 | 
			
		||||
 * CLONE_INTO_CGROUP was requested.
 | 
			
		||||
 */
 | 
			
		||||
static void cgroup_css_set_put_fork(struct kernel_clone_args *kargs)
 | 
			
		||||
	__releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex)
 | 
			
		||||
{
 | 
			
		||||
	cgroup_threadgroup_change_end(current);
 | 
			
		||||
 | 
			
		||||
	if (kargs->flags & CLONE_INTO_CGROUP) {
 | 
			
		||||
		struct cgroup *cgrp = kargs->cgrp;
 | 
			
		||||
		struct css_set *cset = kargs->cset;
 | 
			
		||||
 | 
			
		||||
		mutex_unlock(&cgroup_mutex);
 | 
			
		||||
 | 
			
		||||
		if (cset) {
 | 
			
		||||
			put_css_set(cset);
 | 
			
		||||
			kargs->cset = NULL;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (cgrp) {
 | 
			
		||||
			cgroup_put(cgrp);
 | 
			
		||||
			kargs->cgrp = NULL;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * cgroup_can_fork - called on a new task before the process is exposed
 | 
			
		||||
 * @child: the child process
 | 
			
		||||
 *
 | 
			
		||||
 * This prepares a new css_set for the child process which the child will
 | 
			
		||||
 * be attached to in cgroup_post_fork().
 | 
			
		||||
 * This calls the subsystem can_fork() callbacks. If the cgroup_can_fork()
 | 
			
		||||
 * callback returns an error, the fork aborts with that error code. This
 | 
			
		||||
 * allows for a cgroup subsystem to conditionally allow or deny new forks.
 | 
			
		||||
 */
 | 
			
		||||
int cgroup_can_fork(struct task_struct *child)
 | 
			
		||||
	__acquires(&cgroup_threadgroup_rwsem) __releases(&cgroup_threadgroup_rwsem)
 | 
			
		||||
int cgroup_can_fork(struct task_struct *child, struct kernel_clone_args *kargs)
 | 
			
		||||
{
 | 
			
		||||
	struct cgroup_subsys *ss;
 | 
			
		||||
	int i, j, ret;
 | 
			
		||||
 | 
			
		||||
	cgroup_threadgroup_change_begin(current);
 | 
			
		||||
	ret = cgroup_css_set_fork(kargs);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		return ret;
 | 
			
		||||
 | 
			
		||||
	do_each_subsys_mask(ss, i, have_canfork_callback) {
 | 
			
		||||
		ret = ss->can_fork(child);
 | 
			
		||||
		ret = ss->can_fork(child, kargs->cset);
 | 
			
		||||
		if (ret)
 | 
			
		||||
			goto out_revert;
 | 
			
		||||
	} while_each_subsys_mask();
 | 
			
		||||
| 
						 | 
				
			
			@ -5937,32 +6066,34 @@ int cgroup_can_fork(struct task_struct *child)
 | 
			
		|||
		if (j >= i)
 | 
			
		||||
			break;
 | 
			
		||||
		if (ss->cancel_fork)
 | 
			
		||||
			ss->cancel_fork(child);
 | 
			
		||||
			ss->cancel_fork(child, kargs->cset);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cgroup_threadgroup_change_end(current);
 | 
			
		||||
	cgroup_css_set_put_fork(kargs);
 | 
			
		||||
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
  * cgroup_cancel_fork - called if a fork failed after cgroup_can_fork()
 | 
			
		||||
  * @child: the child process
 | 
			
		||||
  *
 | 
			
		||||
  * This calls the cancel_fork() callbacks if a fork failed *after*
 | 
			
		||||
  * cgroup_can_fork() succeded.
 | 
			
		||||
  */
 | 
			
		||||
void cgroup_cancel_fork(struct task_struct *child)
 | 
			
		||||
	__releases(&cgroup_threadgroup_rwsem)
 | 
			
		||||
 * cgroup_cancel_fork - called if a fork failed after cgroup_can_fork()
 | 
			
		||||
 * @child: the child process
 | 
			
		||||
 * @kargs: the arguments passed to create the child process
 | 
			
		||||
 *
 | 
			
		||||
 * This calls the cancel_fork() callbacks if a fork failed *after*
 | 
			
		||||
 * cgroup_can_fork() succeded and cleans up references we took to
 | 
			
		||||
 * prepare a new css_set for the child process in cgroup_can_fork().
 | 
			
		||||
 */
 | 
			
		||||
void cgroup_cancel_fork(struct task_struct *child,
 | 
			
		||||
			struct kernel_clone_args *kargs)
 | 
			
		||||
{
 | 
			
		||||
	struct cgroup_subsys *ss;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for_each_subsys(ss, i)
 | 
			
		||||
		if (ss->cancel_fork)
 | 
			
		||||
			ss->cancel_fork(child);
 | 
			
		||||
			ss->cancel_fork(child, kargs->cset);
 | 
			
		||||
 | 
			
		||||
	cgroup_threadgroup_change_end(current);
 | 
			
		||||
	cgroup_css_set_put_fork(kargs);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			@ -5972,22 +6103,27 @@ void cgroup_cancel_fork(struct task_struct *child)
 | 
			
		|||
 * Attach the child process to its css_set calling the subsystem fork()
 | 
			
		||||
 * callbacks.
 | 
			
		||||
 */
 | 
			
		||||
void cgroup_post_fork(struct task_struct *child)
 | 
			
		||||
	__releases(&cgroup_threadgroup_rwsem)
 | 
			
		||||
void cgroup_post_fork(struct task_struct *child,
 | 
			
		||||
		      struct kernel_clone_args *kargs)
 | 
			
		||||
	__releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex)
 | 
			
		||||
{
 | 
			
		||||
	struct cgroup_subsys *ss;
 | 
			
		||||
	struct css_set *cset;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	cset = kargs->cset;
 | 
			
		||||
	kargs->cset = NULL;
 | 
			
		||||
 | 
			
		||||
	spin_lock_irq(&css_set_lock);
 | 
			
		||||
 | 
			
		||||
	/* init tasks are special, only link regular threads */
 | 
			
		||||
	if (likely(child->pid)) {
 | 
			
		||||
		WARN_ON_ONCE(!list_empty(&child->cg_list));
 | 
			
		||||
		cset = task_css_set(current); /* current is @child's parent */
 | 
			
		||||
		get_css_set(cset);
 | 
			
		||||
		cset->nr_tasks++;
 | 
			
		||||
		css_set_move_task(child, NULL, cset, false);
 | 
			
		||||
	} else {
 | 
			
		||||
		put_css_set(cset);
 | 
			
		||||
		cset = NULL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -6020,7 +6156,16 @@ void cgroup_post_fork(struct task_struct *child)
 | 
			
		|||
		ss->fork(child);
 | 
			
		||||
	} while_each_subsys_mask();
 | 
			
		||||
 | 
			
		||||
	cgroup_threadgroup_change_end(current);
 | 
			
		||||
	/* Make the new cset the root_cset of the new cgroup namespace. */
 | 
			
		||||
	if (kargs->flags & CLONE_NEWCGROUP) {
 | 
			
		||||
		struct css_set *rcset = child->nsproxy->cgroup_ns->root_cset;
 | 
			
		||||
 | 
			
		||||
		get_css_set(cset);
 | 
			
		||||
		child->nsproxy->cgroup_ns->root_cset = cset;
 | 
			
		||||
		put_css_set(rcset);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cgroup_css_set_put_fork(kargs);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -33,6 +33,7 @@
 | 
			
		|||
#include <linux/atomic.h>
 | 
			
		||||
#include <linux/cgroup.h>
 | 
			
		||||
#include <linux/slab.h>
 | 
			
		||||
#include <linux/sched/task.h>
 | 
			
		||||
 | 
			
		||||
#define PIDS_MAX (PID_MAX_LIMIT + 1ULL)
 | 
			
		||||
#define PIDS_MAX_STR "max"
 | 
			
		||||
| 
						 | 
				
			
			@ -214,13 +215,16 @@ static void pids_cancel_attach(struct cgroup_taskset *tset)
 | 
			
		|||
 * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
 | 
			
		||||
 * on cgroup_threadgroup_change_begin() held by the copy_process().
 | 
			
		||||
 */
 | 
			
		||||
static int pids_can_fork(struct task_struct *task)
 | 
			
		||||
static int pids_can_fork(struct task_struct *task, struct css_set *cset)
 | 
			
		||||
{
 | 
			
		||||
	struct cgroup_subsys_state *css;
 | 
			
		||||
	struct pids_cgroup *pids;
 | 
			
		||||
	int err;
 | 
			
		||||
 | 
			
		||||
	css = task_css_check(current, pids_cgrp_id, true);
 | 
			
		||||
	if (cset)
 | 
			
		||||
		css = cset->subsys[pids_cgrp_id];
 | 
			
		||||
	else
 | 
			
		||||
		css = task_css_check(current, pids_cgrp_id, true);
 | 
			
		||||
	pids = css_pids(css);
 | 
			
		||||
	err = pids_try_charge(pids, 1);
 | 
			
		||||
	if (err) {
 | 
			
		||||
| 
						 | 
				
			
			@ -235,12 +239,15 @@ static int pids_can_fork(struct task_struct *task)
 | 
			
		|||
	return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void pids_cancel_fork(struct task_struct *task)
 | 
			
		||||
static void pids_cancel_fork(struct task_struct *task, struct css_set *cset)
 | 
			
		||||
{
 | 
			
		||||
	struct cgroup_subsys_state *css;
 | 
			
		||||
	struct pids_cgroup *pids;
 | 
			
		||||
 | 
			
		||||
	css = task_css_check(current, pids_cgrp_id, true);
 | 
			
		||||
	if (cset)
 | 
			
		||||
		css = cset->subsys[pids_cgrp_id];
 | 
			
		||||
	else
 | 
			
		||||
		css = task_css_check(current, pids_cgrp_id, true);
 | 
			
		||||
	pids = css_pids(css);
 | 
			
		||||
	pids_uncharge(pids, 1);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2180,7 +2180,7 @@ static __latent_entropy struct task_struct *copy_process(
 | 
			
		|||
	 * between here and cgroup_post_fork() if an organisation operation is in
 | 
			
		||||
	 * progress.
 | 
			
		||||
	 */
 | 
			
		||||
	retval = cgroup_can_fork(p);
 | 
			
		||||
	retval = cgroup_can_fork(p, args);
 | 
			
		||||
	if (retval)
 | 
			
		||||
		goto bad_fork_put_pidfd;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2287,7 +2287,7 @@ static __latent_entropy struct task_struct *copy_process(
 | 
			
		|||
	write_unlock_irq(&tasklist_lock);
 | 
			
		||||
 | 
			
		||||
	proc_fork_connector(p);
 | 
			
		||||
	cgroup_post_fork(p);
 | 
			
		||||
	cgroup_post_fork(p, args);
 | 
			
		||||
	perf_event_fork(p);
 | 
			
		||||
 | 
			
		||||
	trace_task_newtask(p, clone_flags);
 | 
			
		||||
| 
						 | 
				
			
			@ -2298,7 +2298,7 @@ static __latent_entropy struct task_struct *copy_process(
 | 
			
		|||
bad_fork_cancel_cgroup:
 | 
			
		||||
	spin_unlock(¤t->sighand->siglock);
 | 
			
		||||
	write_unlock_irq(&tasklist_lock);
 | 
			
		||||
	cgroup_cancel_fork(p);
 | 
			
		||||
	cgroup_cancel_fork(p, args);
 | 
			
		||||
bad_fork_put_pidfd:
 | 
			
		||||
	if (clone_flags & CLONE_PIDFD) {
 | 
			
		||||
		fput(pidfile);
 | 
			
		||||
| 
						 | 
				
			
			@ -2627,6 +2627,9 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
 | 
			
		|||
		     !valid_signal(args.exit_signal)))
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	if ((args.flags & CLONE_INTO_CGROUP) && args.cgroup < 0)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	*kargs = (struct kernel_clone_args){
 | 
			
		||||
		.flags		= args.flags,
 | 
			
		||||
		.pidfd		= u64_to_user_ptr(args.pidfd),
 | 
			
		||||
| 
						 | 
				
			
			@ -2637,6 +2640,7 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
 | 
			
		|||
		.stack_size	= args.stack_size,
 | 
			
		||||
		.tls		= args.tls,
 | 
			
		||||
		.set_tid_size	= args.set_tid_size,
 | 
			
		||||
		.cgroup		= args.cgroup,
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	if (args.set_tid &&
 | 
			
		||||
| 
						 | 
				
			
			@ -2680,7 +2684,8 @@ static inline bool clone3_stack_valid(struct kernel_clone_args *kargs)
 | 
			
		|||
static bool clone3_args_valid(struct kernel_clone_args *kargs)
 | 
			
		||||
{
 | 
			
		||||
	/* Verify that no unknown flags are passed along. */
 | 
			
		||||
	if (kargs->flags & ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND))
 | 
			
		||||
	if (kargs->flags &
 | 
			
		||||
	    ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP))
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue