mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	psi: Fix uaf issue when psi trigger is destroyed while being polled
With write operation on psi files replacing old trigger with a new one,
the lifetime of its waitqueue is totally arbitrary. Overwriting an
existing trigger causes its waitqueue to be freed and pending poll()
will stumble on trigger->event_wait which was destroyed.
Fix this by disallowing to redefine an existing psi trigger. If a write
operation is used on a file descriptor with an already existing psi
trigger, the operation will fail with EBUSY error.
Also bypass a check for psi_disabled in the psi_trigger_destroy as the
flag can be flipped after the trigger is created, leading to a memory
leak.
Fixes: 0e94682b73 ("psi: introduce psi monitor")
Reported-by: syzbot+cdb5dd11c97cc532efad@syzkaller.appspotmail.com
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Analyzed-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20220111232309.1786347-1-surenb@google.com
			
			
This commit is contained in:
		
							parent
							
								
									fb3b0673b7
								
							
						
					
					
						commit
						a06247c680
					
				
					 5 changed files with 40 additions and 45 deletions
				
			
		| 
						 | 
				
			
			@ -92,7 +92,8 @@ Triggers can be set on more than one psi metric and more than one trigger
 | 
			
		|||
for the same psi metric can be specified. However for each trigger a separate
 | 
			
		||||
file descriptor is required to be able to poll it separately from others,
 | 
			
		||||
therefore for each trigger a separate open() syscall should be made even
 | 
			
		||||
when opening the same psi interface file.
 | 
			
		||||
when opening the same psi interface file. Write operations to a file descriptor
 | 
			
		||||
with an already existing psi trigger will fail with EBUSY.
 | 
			
		||||
 | 
			
		||||
Monitors activate only when system enters stall state for the monitored
 | 
			
		||||
psi metric and deactivates upon exit from the stall state. While system is
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -33,7 +33,7 @@ void cgroup_move_task(struct task_struct *p, struct css_set *to);
 | 
			
		|||
 | 
			
		||||
struct psi_trigger *psi_trigger_create(struct psi_group *group,
 | 
			
		||||
			char *buf, size_t nbytes, enum psi_res res);
 | 
			
		||||
void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
 | 
			
		||||
void psi_trigger_destroy(struct psi_trigger *t);
 | 
			
		||||
 | 
			
		||||
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
 | 
			
		||||
			poll_table *wait);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -141,9 +141,6 @@ struct psi_trigger {
 | 
			
		|||
	 * events to one per window
 | 
			
		||||
	 */
 | 
			
		||||
	u64 last_event_time;
 | 
			
		||||
 | 
			
		||||
	/* Refcounting to prevent premature destruction */
 | 
			
		||||
	struct kref refcount;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct psi_group {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3643,6 +3643,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
 | 
			
		|||
	cgroup_get(cgrp);
 | 
			
		||||
	cgroup_kn_unlock(of->kn);
 | 
			
		||||
 | 
			
		||||
	/* Allow only one trigger per file descriptor */
 | 
			
		||||
	if (ctx->psi.trigger) {
 | 
			
		||||
		cgroup_put(cgrp);
 | 
			
		||||
		return -EBUSY;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
 | 
			
		||||
	new = psi_trigger_create(psi, buf, nbytes, res);
 | 
			
		||||
	if (IS_ERR(new)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -3650,8 +3656,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
 | 
			
		|||
		return PTR_ERR(new);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	psi_trigger_replace(&ctx->psi.trigger, new);
 | 
			
		||||
 | 
			
		||||
	smp_store_release(&ctx->psi.trigger, new);
 | 
			
		||||
	cgroup_put(cgrp);
 | 
			
		||||
 | 
			
		||||
	return nbytes;
 | 
			
		||||
| 
						 | 
				
			
			@ -3690,7 +3695,7 @@ static void cgroup_pressure_release(struct kernfs_open_file *of)
 | 
			
		|||
{
 | 
			
		||||
	struct cgroup_file_ctx *ctx = of->priv;
 | 
			
		||||
 | 
			
		||||
	psi_trigger_replace(&ctx->psi.trigger, NULL);
 | 
			
		||||
	psi_trigger_destroy(ctx->psi.trigger);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool cgroup_psi_enabled(void)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1162,7 +1162,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
 | 
			
		|||
	t->event = 0;
 | 
			
		||||
	t->last_event_time = 0;
 | 
			
		||||
	init_waitqueue_head(&t->event_wait);
 | 
			
		||||
	kref_init(&t->refcount);
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&group->trigger_lock);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1191,15 +1190,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
 | 
			
		|||
	return t;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void psi_trigger_destroy(struct kref *ref)
 | 
			
		||||
void psi_trigger_destroy(struct psi_trigger *t)
 | 
			
		||||
{
 | 
			
		||||
	struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
 | 
			
		||||
	struct psi_group *group = t->group;
 | 
			
		||||
	struct psi_group *group;
 | 
			
		||||
	struct task_struct *task_to_destroy = NULL;
 | 
			
		||||
 | 
			
		||||
	if (static_branch_likely(&psi_disabled))
 | 
			
		||||
	/*
 | 
			
		||||
	 * We do not check psi_disabled since it might have been disabled after
 | 
			
		||||
	 * the trigger got created.
 | 
			
		||||
	 */
 | 
			
		||||
	if (!t)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	group = t->group;
 | 
			
		||||
	/*
 | 
			
		||||
	 * Wakeup waiters to stop polling. Can happen if cgroup is deleted
 | 
			
		||||
	 * from under a polling process.
 | 
			
		||||
| 
						 | 
				
			
			@ -1235,9 +1238,9 @@ static void psi_trigger_destroy(struct kref *ref)
 | 
			
		|||
	mutex_unlock(&group->trigger_lock);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Wait for both *trigger_ptr from psi_trigger_replace and
 | 
			
		||||
	 * poll_task RCUs to complete their read-side critical sections
 | 
			
		||||
	 * before destroying the trigger and optionally the poll_task
 | 
			
		||||
	 * Wait for psi_schedule_poll_work RCU to complete its read-side
 | 
			
		||||
	 * critical section before destroying the trigger and optionally the
 | 
			
		||||
	 * poll_task.
 | 
			
		||||
	 */
 | 
			
		||||
	synchronize_rcu();
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -1254,18 +1257,6 @@ static void psi_trigger_destroy(struct kref *ref)
 | 
			
		|||
	kfree(t);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
 | 
			
		||||
{
 | 
			
		||||
	struct psi_trigger *old = *trigger_ptr;
 | 
			
		||||
 | 
			
		||||
	if (static_branch_likely(&psi_disabled))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	rcu_assign_pointer(*trigger_ptr, new);
 | 
			
		||||
	if (old)
 | 
			
		||||
		kref_put(&old->refcount, psi_trigger_destroy);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__poll_t psi_trigger_poll(void **trigger_ptr,
 | 
			
		||||
				struct file *file, poll_table *wait)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -1275,24 +1266,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
 | 
			
		|||
	if (static_branch_likely(&psi_disabled))
 | 
			
		||||
		return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
 | 
			
		||||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
 | 
			
		||||
	t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
 | 
			
		||||
	if (!t) {
 | 
			
		||||
		rcu_read_unlock();
 | 
			
		||||
	t = smp_load_acquire(trigger_ptr);
 | 
			
		||||
	if (!t)
 | 
			
		||||
		return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
 | 
			
		||||
	}
 | 
			
		||||
	kref_get(&t->refcount);
 | 
			
		||||
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
 | 
			
		||||
	poll_wait(file, &t->event_wait, wait);
 | 
			
		||||
 | 
			
		||||
	if (cmpxchg(&t->event, 1, 0) == 1)
 | 
			
		||||
		ret |= EPOLLPRI;
 | 
			
		||||
 | 
			
		||||
	kref_put(&t->refcount, psi_trigger_destroy);
 | 
			
		||||
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1316,14 +1298,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
 | 
			
		|||
 | 
			
		||||
	buf[buf_size - 1] = '\0';
 | 
			
		||||
 | 
			
		||||
	new = psi_trigger_create(&psi_system, buf, nbytes, res);
 | 
			
		||||
	if (IS_ERR(new))
 | 
			
		||||
		return PTR_ERR(new);
 | 
			
		||||
 | 
			
		||||
	seq = file->private_data;
 | 
			
		||||
 | 
			
		||||
	/* Take seq->lock to protect seq->private from concurrent writes */
 | 
			
		||||
	mutex_lock(&seq->lock);
 | 
			
		||||
	psi_trigger_replace(&seq->private, new);
 | 
			
		||||
 | 
			
		||||
	/* Allow only one trigger per file descriptor */
 | 
			
		||||
	if (seq->private) {
 | 
			
		||||
		mutex_unlock(&seq->lock);
 | 
			
		||||
		return -EBUSY;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	new = psi_trigger_create(&psi_system, buf, nbytes, res);
 | 
			
		||||
	if (IS_ERR(new)) {
 | 
			
		||||
		mutex_unlock(&seq->lock);
 | 
			
		||||
		return PTR_ERR(new);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	smp_store_release(&seq->private, new);
 | 
			
		||||
	mutex_unlock(&seq->lock);
 | 
			
		||||
 | 
			
		||||
	return nbytes;
 | 
			
		||||
| 
						 | 
				
			
			@ -1358,7 +1350,7 @@ static int psi_fop_release(struct inode *inode, struct file *file)
 | 
			
		|||
{
 | 
			
		||||
	struct seq_file *seq = file->private_data;
 | 
			
		||||
 | 
			
		||||
	psi_trigger_replace(&seq->private, NULL);
 | 
			
		||||
	psi_trigger_destroy(seq->private);
 | 
			
		||||
	return single_release(inode, file);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue