forked from mirrors/linux
		
	block: Fix a race between the cgroup code and request queue initialization
Initialize the request queue lock earlier such that the following
race can no longer occur:
blk_init_queue_node()             blkcg_print_blkgs()
  blk_alloc_queue_node (1)
    q->queue_lock = &q->__queue_lock (2)
    blkcg_init_queue(q) (3)
                                    spin_lock_irq(blkg->q->queue_lock) (4)
  q->queue_lock = lock (5)
                                    spin_unlock_irq(blkg->q->queue_lock) (6)
(1) allocate an uninitialized queue;
(2) initialize queue_lock to its default internal lock;
(3) initialize blkcg part of request queue, which will create blkg and
    then insert it to blkg_list;
(4) traverse blkg_list and find the created blkg, and then take its
    queue lock, here it is the default *internal lock*;
(5) *race window*, now queue_lock is overridden with *driver specified
    lock*;
(6) now unlock *driver specified lock*, not the locked *internal lock*,
    unlock balance breaks.
The changes in this patch are as follows:
- Move the .queue_lock initialization from blk_init_queue_node() into
  blk_alloc_queue_node().
- Only override the .queue_lock pointer for legacy queues because it
  is not useful for blk-mq queues to override this pointer.
- For all all block drivers that initialize .queue_lock explicitly,
  change the blk_alloc_queue() call in the driver into a
  blk_alloc_queue_node() call and remove the explicit .queue_lock
  initialization. Additionally, initialize the spin lock that will
  be used as queue lock earlier if necessary.
Reported-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Philipp Reisner <philipp.reisner@linbit.com>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
			
			
This commit is contained in:
		
							parent
							
								
									5ee0524ba1
								
							
						
					
					
						commit
						498f6650ae
					
				
					 3 changed files with 20 additions and 14 deletions
				
			
		| 
						 | 
					@ -888,6 +888,19 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
 | 
				
			||||||
	kblockd_schedule_work(&q->timeout_work);
 | 
						kblockd_schedule_work(&q->timeout_work);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * blk_alloc_queue_node - allocate a request queue
 | 
				
			||||||
 | 
					 * @gfp_mask: memory allocation flags
 | 
				
			||||||
 | 
					 * @node_id: NUMA node to allocate memory from
 | 
				
			||||||
 | 
					 * @lock: For legacy queues, pointer to a spinlock that will be used to e.g.
 | 
				
			||||||
 | 
					 *        serialize calls to the legacy .request_fn() callback. Ignored for
 | 
				
			||||||
 | 
					 *	  blk-mq request queues.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Note: pass the queue lock as the third argument to this function instead of
 | 
				
			||||||
 | 
					 * setting the queue lock pointer explicitly to avoid triggering a sporadic
 | 
				
			||||||
 | 
					 * crash in the blkcg code. This function namely calls blkcg_init_queue() and
 | 
				
			||||||
 | 
					 * the queue lock pointer must be set before blkcg_init_queue() is called.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
 | 
					struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
 | 
				
			||||||
					   spinlock_t *lock)
 | 
										   spinlock_t *lock)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -940,11 +953,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
 | 
				
			||||||
	mutex_init(&q->sysfs_lock);
 | 
						mutex_init(&q->sysfs_lock);
 | 
				
			||||||
	spin_lock_init(&q->__queue_lock);
 | 
						spin_lock_init(&q->__queue_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						if (!q->mq_ops)
 | 
				
			||||||
	 * By default initialize queue_lock to internal lock and driver can
 | 
							q->queue_lock = lock ? : &q->__queue_lock;
 | 
				
			||||||
	 * override it later if need be.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	q->queue_lock = &q->__queue_lock;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * A queue starts its life with bypass turned on to avoid
 | 
						 * A queue starts its life with bypass turned on to avoid
 | 
				
			||||||
| 
						 | 
					@ -1031,13 +1041,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct request_queue *q;
 | 
						struct request_queue *q;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	q = blk_alloc_queue_node(GFP_KERNEL, node_id, NULL);
 | 
						q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock);
 | 
				
			||||||
	if (!q)
 | 
						if (!q)
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	q->request_fn = rfn;
 | 
						q->request_fn = rfn;
 | 
				
			||||||
	if (lock)
 | 
					 | 
				
			||||||
		q->queue_lock = lock;
 | 
					 | 
				
			||||||
	if (blk_init_allocated_queue(q) < 0) {
 | 
						if (blk_init_allocated_queue(q) < 0) {
 | 
				
			||||||
		blk_cleanup_queue(q);
 | 
							blk_cleanup_queue(q);
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2816,7 +2816,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	drbd_init_set_defaults(device);
 | 
						drbd_init_set_defaults(device);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	q = blk_alloc_queue(GFP_KERNEL);
 | 
						q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock);
 | 
				
			||||||
	if (!q)
 | 
						if (!q)
 | 
				
			||||||
		goto out_no_q;
 | 
							goto out_no_q;
 | 
				
			||||||
	device->rq_queue = q;
 | 
						device->rq_queue = q;
 | 
				
			||||||
| 
						 | 
					@ -2848,7 +2848,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 | 
				
			||||||
	/* Setting the max_hw_sectors to an odd value of 8kibyte here
 | 
						/* Setting the max_hw_sectors to an odd value of 8kibyte here
 | 
				
			||||||
	   This triggers a max_bio_size message upon first attach or connect */
 | 
						   This triggers a max_bio_size message upon first attach or connect */
 | 
				
			||||||
	blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
 | 
						blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
 | 
				
			||||||
	q->queue_lock = &resource->req_lock;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	device->md_io.page = alloc_page(GFP_KERNEL);
 | 
						device->md_io.page = alloc_page(GFP_KERNEL);
 | 
				
			||||||
	if (!device->md_io.page)
 | 
						if (!device->md_io.page)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -888,13 +888,14 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 | 
				
			||||||
	card->Active = -1;	/* no page is active */
 | 
						card->Active = -1;	/* no page is active */
 | 
				
			||||||
	card->bio = NULL;
 | 
						card->bio = NULL;
 | 
				
			||||||
	card->biotail = &card->bio;
 | 
						card->biotail = &card->bio;
 | 
				
			||||||
 | 
						spin_lock_init(&card->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	card->queue = blk_alloc_queue(GFP_KERNEL);
 | 
						card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE,
 | 
				
			||||||
 | 
										   &card->lock);
 | 
				
			||||||
	if (!card->queue)
 | 
						if (!card->queue)
 | 
				
			||||||
		goto failed_alloc;
 | 
							goto failed_alloc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	blk_queue_make_request(card->queue, mm_make_request);
 | 
						blk_queue_make_request(card->queue, mm_make_request);
 | 
				
			||||||
	card->queue->queue_lock = &card->lock;
 | 
					 | 
				
			||||||
	card->queue->queuedata = card;
 | 
						card->queue->queuedata = card;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tasklet_init(&card->tasklet, process_page, (unsigned long)card);
 | 
						tasklet_init(&card->tasklet, process_page, (unsigned long)card);
 | 
				
			||||||
| 
						 | 
					@ -968,8 +969,6 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 | 
				
			||||||
	dev_printk(KERN_INFO, &card->dev->dev,
 | 
						dev_printk(KERN_INFO, &card->dev->dev,
 | 
				
			||||||
		"Window size %d bytes, IRQ %d\n", data, dev->irq);
 | 
							"Window size %d bytes, IRQ %d\n", data, dev->irq);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock_init(&card->lock);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	pci_set_drvdata(dev, card);
 | 
						pci_set_drvdata(dev, card);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (pci_write_cmd != 0x0F) 	/* If not Memory Write & Invalidate */
 | 
						if (pci_write_cmd != 0x0F) 	/* If not Memory Write & Invalidate */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue