mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	block: Fix a race between request queue removal and the block cgroup controller
Avoid that the following race can occur:
blk_cleanup_queue()               blkcg_print_blkgs()
  spin_lock_irq(lock) (1)           spin_lock_irq(blkg->q->queue_lock) (2,5)
    q->queue_lock = &q->__queue_lock (3)
  spin_unlock_irq(lock) (4)
                                    spin_unlock_irq(blkg->q->queue_lock) (6)
(1) take driver lock;
(2) busy loop for driver lock;
(3) override driver lock with internal lock;
(4) unlock driver lock;
(5) can take driver lock now;
(6) but unlock internal lock.
This change is safe because only the SCSI core and the NVME core keep
a reference on a request queue after having called blk_cleanup_queue().
Neither driver accesses any of the removed data structures between its
blk_cleanup_queue() and blk_put_queue() calls.
Reported-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Jan Kara <jack@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
			
			
This commit is contained in:
		
							parent
							
								
									498f6650ae
								
							
						
					
					
						commit
						a063057d7c
					
				
					 2 changed files with 31 additions and 7 deletions
				
			
		| 
						 | 
				
			
			@ -719,6 +719,37 @@ void blk_cleanup_queue(struct request_queue *q)
 | 
			
		|||
	del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
 | 
			
		||||
	blk_sync_queue(q);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * I/O scheduler exit is only safe after the sysfs scheduler attribute
 | 
			
		||||
	 * has been removed.
 | 
			
		||||
	 */
 | 
			
		||||
	WARN_ON_ONCE(q->kobj.state_in_sysfs);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Since the I/O scheduler exit code may access cgroup information,
 | 
			
		||||
	 * perform I/O scheduler exit before disassociating from the block
 | 
			
		||||
	 * cgroup controller.
 | 
			
		||||
	 */
 | 
			
		||||
	if (q->elevator) {
 | 
			
		||||
		ioc_clear_queue(q);
 | 
			
		||||
		elevator_exit(q, q->elevator);
 | 
			
		||||
		q->elevator = NULL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Remove all references to @q from the block cgroup controller before
 | 
			
		||||
	 * restoring @q->queue_lock to avoid that restoring this pointer causes
 | 
			
		||||
	 * e.g. blkcg_print_blkgs() to crash.
 | 
			
		||||
	 */
 | 
			
		||||
	blkcg_exit_queue(q);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Since the cgroup code may dereference the @q->backing_dev_info
 | 
			
		||||
	 * pointer, only decrease its reference count after having removed the
 | 
			
		||||
	 * association with the block cgroup controller.
 | 
			
		||||
	 */
 | 
			
		||||
	bdi_put(q->backing_dev_info);
 | 
			
		||||
 | 
			
		||||
	if (q->mq_ops)
 | 
			
		||||
		blk_mq_free_queue(q);
 | 
			
		||||
	percpu_ref_exit(&q->q_usage_counter);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -798,13 +798,6 @@ static void __blk_release_queue(struct work_struct *work)
 | 
			
		|||
	if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
 | 
			
		||||
		blk_stat_remove_callback(q, q->poll_cb);
 | 
			
		||||
	blk_stat_free_callback(q->poll_cb);
 | 
			
		||||
	bdi_put(q->backing_dev_info);
 | 
			
		||||
	blkcg_exit_queue(q);
 | 
			
		||||
 | 
			
		||||
	if (q->elevator) {
 | 
			
		||||
		ioc_clear_queue(q);
 | 
			
		||||
		elevator_exit(q, q->elevator);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	blk_free_queue_stats(q->stats);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue