forked from mirrors/linux
		
	blk-mq: reduce unnecessary software queue looping
In flush_busy_ctxs() and blk_mq_hctx_has_pending(), regardless of how many ctxs assigned to one hctx, they will all loop hctx->ctx_map.map_size times. Here hctx->ctx_map.map_size is a const ALIGN(nr_cpu_ids, 8) / 8. Especially, flush_busy_ctxs() is in hot code path. And it's unnecessary. Change ->map_size to contain the actually mapped software queues, so we only loop for as many iterations as we have to. And remove cpumask setting and nr_ctx count in blk_mq_init_cpu_queues() since they are all re-done in blk_mq_map_swqueue(). blk_mq_map_swqueue(). Signed-off-by: Chong Yuan <chong.yuan@memblaze.com> Reviewed-by: Wenbo Wang <wenbo.wang@memblaze.com> Updated by me for formatting and commenting. Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
		
							parent
							
								
									dc48e56d76
								
							
						
					
					
						commit
						889fa31f00
					
				
					 1 changed files with 9 additions and 4 deletions
				
			
		| 
						 | 
					@ -1522,8 +1522,6 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node)
 | 
				
			||||||
	if (!bitmap->map)
 | 
						if (!bitmap->map)
 | 
				
			||||||
		return -ENOMEM;
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bitmap->map_size = num_maps;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	total = nr_cpu_ids;
 | 
						total = nr_cpu_ids;
 | 
				
			||||||
	for (i = 0; i < num_maps; i++) {
 | 
						for (i = 0; i < num_maps; i++) {
 | 
				
			||||||
		bitmap->map[i].depth = min(total, bitmap->bits_per_word);
 | 
							bitmap->map[i].depth = min(total, bitmap->bits_per_word);
 | 
				
			||||||
| 
						 | 
					@ -1764,8 +1762,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		hctx = q->mq_ops->map_queue(q, i);
 | 
							hctx = q->mq_ops->map_queue(q, i);
 | 
				
			||||||
		cpumask_set_cpu(i, hctx->cpumask);
 | 
					 | 
				
			||||||
		hctx->nr_ctx++;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Set local node, IFF we have more than one hw queue. If
 | 
							 * Set local node, IFF we have more than one hw queue. If
 | 
				
			||||||
| 
						 | 
					@ -1802,6 +1798,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	queue_for_each_hw_ctx(q, hctx, i) {
 | 
						queue_for_each_hw_ctx(q, hctx, i) {
 | 
				
			||||||
 | 
							struct blk_mq_ctxmap *map = &hctx->ctx_map;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * If no software queues are mapped to this hardware queue,
 | 
							 * If no software queues are mapped to this hardware queue,
 | 
				
			||||||
		 * disable it and free the request entries.
 | 
							 * disable it and free the request entries.
 | 
				
			||||||
| 
						 | 
					@ -1817,6 +1815,13 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Set the map size to the number of mapped software queues.
 | 
				
			||||||
 | 
							 * This is more accurate and more efficient than looping
 | 
				
			||||||
 | 
							 * over all possibly mapped software queues.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							map->map_size = hctx->nr_ctx / map->bits_per_word;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Initialize batch roundrobin counts
 | 
							 * Initialize batch roundrobin counts
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue