mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	Merge branch 'ucount-fixes-for-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull ucounts fixes from Eric Biederman: "There has been one very hard to track down bug in the ucount code that we have been tracking since roughly v5.14 was released. Alex managed to find a reliable reproducer a few days ago and then I was able to instrument the code and figure out what the issue was. It turns out the sigqueue_alloc single atomic operation optimization did not play nicely with ucounts multiple level rlimits. It turned out that either sigqueue_alloc or sigqueue_free could be operating on multiple levels and trigger the conditions for the optimization on more than one level at the same time. To deal with that situation I have introduced inc_rlimit_get_ucounts and dec_rlimit_put_ucounts that just focuses on the optimization and the rlimit and ucount changes. While looking into the big bug I found I couple of other little issues so I am including those fixes here as well. When I have time I would very much like to dig into process ownership of the shared signal queue and see if we could pick a single owner for the entire queue so that all of the rlimits can count to that owner. That should entirely remove the need to call get_ucounts and put_ucounts in sigqueue_alloc and sigqueue_free. It is difficult because Linux unlike POSIX supports setuid that works on a single thread" * 'ucount-fixes-for-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: ucounts: Move get_ucounts from cred_alloc_blank to key_change_session_keyring ucounts: Proper error handling in set_cred_ucounts ucounts: Pair inc_rlimit_ucounts with dec_rlimit_ucoutns in commit_creds ucounts: Fix signal ucount refcounting
This commit is contained in:
		
						commit
						9d235ac01f
					
				
					 5 changed files with 69 additions and 24 deletions
				
			
		| 
						 | 
					@ -127,6 +127,8 @@ static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type t
 | 
				
			||||||
 | 
					
 | 
				
			||||||
long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
 | 
					long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
 | 
				
			||||||
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
 | 
					bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
 | 
				
			||||||
 | 
					long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type);
 | 
				
			||||||
 | 
					void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type);
 | 
				
			||||||
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
 | 
					bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void set_rlimit_ucount_max(struct user_namespace *ns,
 | 
					static inline void set_rlimit_ucount_max(struct user_namespace *ns,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -225,8 +225,6 @@ struct cred *cred_alloc_blank(void)
 | 
				
			||||||
#ifdef CONFIG_DEBUG_CREDENTIALS
 | 
					#ifdef CONFIG_DEBUG_CREDENTIALS
 | 
				
			||||||
	new->magic = CRED_MAGIC;
 | 
						new->magic = CRED_MAGIC;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	new->ucounts = get_ucounts(&init_ucounts);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0)
 | 
						if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0)
 | 
				
			||||||
		goto error;
 | 
							goto error;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -501,7 +499,7 @@ int commit_creds(struct cred *new)
 | 
				
			||||||
		inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1);
 | 
							inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1);
 | 
				
			||||||
	rcu_assign_pointer(task->real_cred, new);
 | 
						rcu_assign_pointer(task->real_cred, new);
 | 
				
			||||||
	rcu_assign_pointer(task->cred, new);
 | 
						rcu_assign_pointer(task->cred, new);
 | 
				
			||||||
	if (new->user != old->user)
 | 
						if (new->user != old->user || new->user_ns != old->user_ns)
 | 
				
			||||||
		dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1);
 | 
							dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1);
 | 
				
			||||||
	alter_cred_subscribers(old, -2);
 | 
						alter_cred_subscribers(old, -2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -669,7 +667,7 @@ int set_cred_ucounts(struct cred *new)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct task_struct *task = current;
 | 
						struct task_struct *task = current;
 | 
				
			||||||
	const struct cred *old = task->real_cred;
 | 
						const struct cred *old = task->real_cred;
 | 
				
			||||||
	struct ucounts *old_ucounts = new->ucounts;
 | 
						struct ucounts *new_ucounts, *old_ucounts = new->ucounts;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (new->user == old->user && new->user_ns == old->user_ns)
 | 
						if (new->user == old->user && new->user_ns == old->user_ns)
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
| 
						 | 
					@ -681,9 +679,10 @@ int set_cred_ucounts(struct cred *new)
 | 
				
			||||||
	if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid))
 | 
						if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid))
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!(new->ucounts = alloc_ucounts(new->user_ns, new->euid)))
 | 
						if (!(new_ucounts = alloc_ucounts(new->user_ns, new->euid)))
 | 
				
			||||||
		return -EAGAIN;
 | 
							return -EAGAIN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						new->ucounts = new_ucounts;
 | 
				
			||||||
	if (old_ucounts)
 | 
						if (old_ucounts)
 | 
				
			||||||
		put_ucounts(old_ucounts);
 | 
							put_ucounts(old_ucounts);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -426,22 +426,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	rcu_read_lock();
 | 
						rcu_read_lock();
 | 
				
			||||||
	ucounts = task_ucounts(t);
 | 
						ucounts = task_ucounts(t);
 | 
				
			||||||
	sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
 | 
						sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
 | 
				
			||||||
	switch (sigpending) {
 | 
					 | 
				
			||||||
	case 1:
 | 
					 | 
				
			||||||
		if (likely(get_ucounts(ucounts)))
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
		fallthrough;
 | 
					 | 
				
			||||||
	case LONG_MAX:
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * we need to decrease the ucount in the userns tree on any
 | 
					 | 
				
			||||||
		 * failure to avoid counts leaking.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
 | 
					 | 
				
			||||||
	rcu_read_unlock();
 | 
						rcu_read_unlock();
 | 
				
			||||||
 | 
						if (!sigpending)
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	rcu_read_unlock();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
 | 
						if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
 | 
				
			||||||
		q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
 | 
							q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
 | 
				
			||||||
| 
						 | 
					@ -450,8 +438,7 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (unlikely(q == NULL)) {
 | 
						if (unlikely(q == NULL)) {
 | 
				
			||||||
		if (dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1))
 | 
							dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
 | 
				
			||||||
			put_ucounts(ucounts);
 | 
					 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		INIT_LIST_HEAD(&q->list);
 | 
							INIT_LIST_HEAD(&q->list);
 | 
				
			||||||
		q->flags = sigqueue_flags;
 | 
							q->flags = sigqueue_flags;
 | 
				
			||||||
| 
						 | 
					@ -464,8 +451,8 @@ static void __sigqueue_free(struct sigqueue *q)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (q->flags & SIGQUEUE_PREALLOC)
 | 
						if (q->flags & SIGQUEUE_PREALLOC)
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) {
 | 
						if (q->ucounts) {
 | 
				
			||||||
		put_ucounts(q->ucounts);
 | 
							dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
 | 
				
			||||||
		q->ucounts = NULL;
 | 
							q->ucounts = NULL;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	kmem_cache_free(sigqueue_cachep, q);
 | 
						kmem_cache_free(sigqueue_cachep, q);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -284,6 +284,55 @@ bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
 | 
				
			||||||
	return (new == 0);
 | 
						return (new == 0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts,
 | 
				
			||||||
 | 
									struct ucounts *last, enum ucount_type type)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct ucounts *iter, *next;
 | 
				
			||||||
 | 
						for (iter = ucounts; iter != last; iter = next) {
 | 
				
			||||||
 | 
							long dec = atomic_long_add_return(-1, &iter->ucount[type]);
 | 
				
			||||||
 | 
							WARN_ON_ONCE(dec < 0);
 | 
				
			||||||
 | 
							next = iter->ns->ucounts;
 | 
				
			||||||
 | 
							if (dec == 0)
 | 
				
			||||||
 | 
								put_ucounts(iter);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						do_dec_rlimit_put_ucounts(ucounts, NULL, type);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						/* Caller must hold a reference to ucounts */
 | 
				
			||||||
 | 
						struct ucounts *iter;
 | 
				
			||||||
 | 
						long dec, ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (iter = ucounts; iter; iter = iter->ns->ucounts) {
 | 
				
			||||||
 | 
							long max = READ_ONCE(iter->ns->ucount_max[type]);
 | 
				
			||||||
 | 
							long new = atomic_long_add_return(1, &iter->ucount[type]);
 | 
				
			||||||
 | 
							if (new < 0 || new > max)
 | 
				
			||||||
 | 
								goto unwind;
 | 
				
			||||||
 | 
							if (iter == ucounts)
 | 
				
			||||||
 | 
								ret = new;
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Grab an extra ucount reference for the caller when
 | 
				
			||||||
 | 
							 * the rlimit count was previously 0.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (new != 1)
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
							if (!get_ucounts(iter))
 | 
				
			||||||
 | 
								goto dec_unwind;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					dec_unwind:
 | 
				
			||||||
 | 
						dec = atomic_long_add_return(-1, &iter->ucount[type]);
 | 
				
			||||||
 | 
						WARN_ON_ONCE(dec < 0);
 | 
				
			||||||
 | 
					unwind:
 | 
				
			||||||
 | 
						do_dec_rlimit_put_ucounts(ucounts, iter, type);
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max)
 | 
					bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct ucounts *iter;
 | 
						struct ucounts *iter;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -918,6 +918,13 @@ void key_change_session_keyring(struct callback_head *twork)
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* If get_ucounts fails more bits are needed in the refcount */
 | 
				
			||||||
 | 
						if (unlikely(!get_ucounts(old->ucounts))) {
 | 
				
			||||||
 | 
							WARN_ONCE(1, "In %s get_ucounts failed\n", __func__);
 | 
				
			||||||
 | 
							put_cred(new);
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	new->  uid	= old->  uid;
 | 
						new->  uid	= old->  uid;
 | 
				
			||||||
	new-> euid	= old-> euid;
 | 
						new-> euid	= old-> euid;
 | 
				
			||||||
	new-> suid	= old-> suid;
 | 
						new-> suid	= old-> suid;
 | 
				
			||||||
| 
						 | 
					@ -927,6 +934,7 @@ void key_change_session_keyring(struct callback_head *twork)
 | 
				
			||||||
	new-> sgid	= old-> sgid;
 | 
						new-> sgid	= old-> sgid;
 | 
				
			||||||
	new->fsgid	= old->fsgid;
 | 
						new->fsgid	= old->fsgid;
 | 
				
			||||||
	new->user	= get_uid(old->user);
 | 
						new->user	= get_uid(old->user);
 | 
				
			||||||
 | 
						new->ucounts	= old->ucounts;
 | 
				
			||||||
	new->user_ns	= get_user_ns(old->user_ns);
 | 
						new->user_ns	= get_user_ns(old->user_ns);
 | 
				
			||||||
	new->group_info	= get_group_info(old->group_info);
 | 
						new->group_info	= get_group_info(old->group_info);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue