forked from mirrors/linux
		
	Fix congestion_wait() sync/async vs read/write confusion
Commit 1faa16d228 accidentally broke
the bdi congestion wait queue logic, causing us to wait on congestion
for WRITE (== 1) when we really wanted BLK_RW_ASYNC (== 0) instead.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
			
			
This commit is contained in:
		
							parent
							
								
									c2cc49a2f8
								
							
						
					
					
						commit
						8aa7e847d8
					
				
					 16 changed files with 43 additions and 40 deletions
				
			
		| 
						 | 
				
			
			@ -751,7 +751,7 @@ unsigned long __copy_to_user_ll(void __user *to, const void *from,
 | 
			
		|||
 | 
			
		||||
			if (retval == -ENOMEM && is_global_init(current)) {
 | 
			
		||||
				up_read(¤t->mm->mmap_sem);
 | 
			
		||||
				congestion_wait(WRITE, HZ/50);
 | 
			
		||||
				congestion_wait(BLK_RW_ASYNC, HZ/50);
 | 
			
		||||
				goto survive;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1372,8 +1372,10 @@ static int pkt_handle_queue(struct pktcdvd_device *pd)
 | 
			
		|||
	wakeup = (pd->write_congestion_on > 0
 | 
			
		||||
	 		&& pd->bio_queue_size <= pd->write_congestion_off);
 | 
			
		||||
	spin_unlock(&pd->lock);
 | 
			
		||||
	if (wakeup)
 | 
			
		||||
		clear_bdi_congested(&pd->disk->queue->backing_dev_info, WRITE);
 | 
			
		||||
	if (wakeup) {
 | 
			
		||||
		clear_bdi_congested(&pd->disk->queue->backing_dev_info,
 | 
			
		||||
					BLK_RW_ASYNC);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
 | 
			
		||||
	pkt_set_state(pkt, PACKET_WAITING_STATE);
 | 
			
		||||
| 
						 | 
				
			
			@ -2592,10 +2594,10 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio)
 | 
			
		|||
	spin_lock(&pd->lock);
 | 
			
		||||
	if (pd->write_congestion_on > 0
 | 
			
		||||
	    && pd->bio_queue_size >= pd->write_congestion_on) {
 | 
			
		||||
		set_bdi_congested(&q->backing_dev_info, WRITE);
 | 
			
		||||
		set_bdi_congested(&q->backing_dev_info, BLK_RW_ASYNC);
 | 
			
		||||
		do {
 | 
			
		||||
			spin_unlock(&pd->lock);
 | 
			
		||||
			congestion_wait(WRITE, HZ);
 | 
			
		||||
			congestion_wait(BLK_RW_ASYNC, HZ);
 | 
			
		||||
			spin_lock(&pd->lock);
 | 
			
		||||
		} while(pd->bio_queue_size > pd->write_congestion_off);
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -776,7 +776,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 | 
			
		|||
		 * But don't wait if split was due to the io size restriction
 | 
			
		||||
		 */
 | 
			
		||||
		if (unlikely(out_of_pages))
 | 
			
		||||
			congestion_wait(WRITE, HZ/100);
 | 
			
		||||
			congestion_wait(BLK_RW_ASYNC, HZ/100);
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * With async crypto it is unsafe to share the crypto context
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -134,7 +134,7 @@ static int fat_file_release(struct inode *inode, struct file *filp)
 | 
			
		|||
	if ((filp->f_mode & FMODE_WRITE) &&
 | 
			
		||||
	     MSDOS_SB(inode->i_sb)->options.flush) {
 | 
			
		||||
		fat_flush_inodes(inode->i_sb, inode, NULL);
 | 
			
		||||
		congestion_wait(WRITE, HZ/10);
 | 
			
		||||
		congestion_wait(BLK_RW_ASYNC, HZ/10);
 | 
			
		||||
	}
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -286,8 +286,8 @@ __releases(&fc->lock)
 | 
			
		|||
		}
 | 
			
		||||
		if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
 | 
			
		||||
		    fc->connected && fc->bdi_initialized) {
 | 
			
		||||
			clear_bdi_congested(&fc->bdi, READ);
 | 
			
		||||
			clear_bdi_congested(&fc->bdi, WRITE);
 | 
			
		||||
			clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 | 
			
		||||
			clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 | 
			
		||||
		}
 | 
			
		||||
		fc->num_background--;
 | 
			
		||||
		fc->active_background--;
 | 
			
		||||
| 
						 | 
				
			
			@ -414,8 +414,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 | 
			
		|||
		fc->blocked = 1;
 | 
			
		||||
	if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
 | 
			
		||||
	    fc->bdi_initialized) {
 | 
			
		||||
		set_bdi_congested(&fc->bdi, READ);
 | 
			
		||||
		set_bdi_congested(&fc->bdi, WRITE);
 | 
			
		||||
		set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 | 
			
		||||
		set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 | 
			
		||||
	}
 | 
			
		||||
	list_add_tail(&req->list, &fc->bg_queue);
 | 
			
		||||
	flush_bg_queue(fc);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -202,8 +202,10 @@ static int nfs_set_page_writeback(struct page *page)
 | 
			
		|||
		struct nfs_server *nfss = NFS_SERVER(inode);
 | 
			
		||||
 | 
			
		||||
		if (atomic_long_inc_return(&nfss->writeback) >
 | 
			
		||||
				NFS_CONGESTION_ON_THRESH)
 | 
			
		||||
			set_bdi_congested(&nfss->backing_dev_info, WRITE);
 | 
			
		||||
				NFS_CONGESTION_ON_THRESH) {
 | 
			
		||||
			set_bdi_congested(&nfss->backing_dev_info,
 | 
			
		||||
						BLK_RW_ASYNC);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -215,7 +217,7 @@ static void nfs_end_page_writeback(struct page *page)
 | 
			
		|||
 | 
			
		||||
	end_page_writeback(page);
 | 
			
		||||
	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
 | 
			
		||||
		clear_bdi_congested(&nfss->backing_dev_info, WRITE);
 | 
			
		||||
		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -997,7 +997,7 @@ static int reiserfs_async_progress_wait(struct super_block *s)
 | 
			
		|||
	DEFINE_WAIT(wait);
 | 
			
		||||
	struct reiserfs_journal *j = SB_JOURNAL(s);
 | 
			
		||||
	if (atomic_read(&j->j_async_throttle))
 | 
			
		||||
		congestion_wait(WRITE, HZ / 10);
 | 
			
		||||
		congestion_wait(BLK_RW_ASYNC, HZ / 10);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -53,7 +53,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
 | 
			
		|||
			printk(KERN_ERR "XFS: possible memory allocation "
 | 
			
		||||
					"deadlock in %s (mode:0x%x)\n",
 | 
			
		||||
					__func__, lflags);
 | 
			
		||||
		congestion_wait(WRITE, HZ/50);
 | 
			
		||||
		congestion_wait(BLK_RW_ASYNC, HZ/50);
 | 
			
		||||
	} while (1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -130,7 +130,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
 | 
			
		|||
			printk(KERN_ERR "XFS: possible memory allocation "
 | 
			
		||||
					"deadlock in %s (mode:0x%x)\n",
 | 
			
		||||
					__func__, lflags);
 | 
			
		||||
		congestion_wait(WRITE, HZ/50);
 | 
			
		||||
		congestion_wait(BLK_RW_ASYNC, HZ/50);
 | 
			
		||||
	} while (1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -412,7 +412,7 @@ _xfs_buf_lookup_pages(
 | 
			
		|||
 | 
			
		||||
			XFS_STATS_INC(xb_page_retries);
 | 
			
		||||
			xfsbufd_wakeup(0, gfp_mask);
 | 
			
		||||
			congestion_wait(WRITE, HZ/50);
 | 
			
		||||
			congestion_wait(BLK_RW_ASYNC, HZ/50);
 | 
			
		||||
			goto retry;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -229,9 +229,9 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 | 
			
		|||
				  (1 << BDI_async_congested));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void clear_bdi_congested(struct backing_dev_info *bdi, int rw);
 | 
			
		||||
void set_bdi_congested(struct backing_dev_info *bdi, int rw);
 | 
			
		||||
long congestion_wait(int rw, long timeout);
 | 
			
		||||
void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
 | 
			
		||||
void set_bdi_congested(struct backing_dev_info *bdi, int sync);
 | 
			
		||||
long congestion_wait(int sync, long timeout);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -779,18 +779,18 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 | 
			
		|||
 * congested queues, and wake up anyone who was waiting for requests to be
 | 
			
		||||
 * put back.
 | 
			
		||||
 */
 | 
			
		||||
static inline void blk_clear_queue_congested(struct request_queue *q, int rw)
 | 
			
		||||
static inline void blk_clear_queue_congested(struct request_queue *q, int sync)
 | 
			
		||||
{
 | 
			
		||||
	clear_bdi_congested(&q->backing_dev_info, rw);
 | 
			
		||||
	clear_bdi_congested(&q->backing_dev_info, sync);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * A queue has just entered congestion.  Flag that in the queue's VM-visible
 | 
			
		||||
 * state flags and increment the global gounter of congested queues.
 | 
			
		||||
 */
 | 
			
		||||
static inline void blk_set_queue_congested(struct request_queue *q, int rw)
 | 
			
		||||
static inline void blk_set_queue_congested(struct request_queue *q, int sync)
 | 
			
		||||
{
 | 
			
		||||
	set_bdi_congested(&q->backing_dev_info, rw);
 | 
			
		||||
	set_bdi_congested(&q->backing_dev_info, sync);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern void blk_start_queue(struct request_queue *q);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -283,7 +283,6 @@ static wait_queue_head_t congestion_wqh[2] = {
 | 
			
		|||
		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
 | 
			
		||||
{
 | 
			
		||||
	enum bdi_state bit;
 | 
			
		||||
| 
						 | 
				
			
			@ -308,18 +307,18 @@ EXPORT_SYMBOL(set_bdi_congested);
 | 
			
		|||
 | 
			
		||||
/**
 | 
			
		||||
 * congestion_wait - wait for a backing_dev to become uncongested
 | 
			
		||||
 * @rw: READ or WRITE
 | 
			
		||||
 * @sync: SYNC or ASYNC IO
 | 
			
		||||
 * @timeout: timeout in jiffies
 | 
			
		||||
 *
 | 
			
		||||
 * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
 | 
			
		||||
 * write congestion.  If no backing_devs are congested then just wait for the
 | 
			
		||||
 * next write to be completed.
 | 
			
		||||
 */
 | 
			
		||||
long congestion_wait(int rw, long timeout)
 | 
			
		||||
long congestion_wait(int sync, long timeout)
 | 
			
		||||
{
 | 
			
		||||
	long ret;
 | 
			
		||||
	DEFINE_WAIT(wait);
 | 
			
		||||
	wait_queue_head_t *wqh = &congestion_wqh[rw];
 | 
			
		||||
	wait_queue_head_t *wqh = &congestion_wqh[sync];
 | 
			
		||||
 | 
			
		||||
	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
 | 
			
		||||
	ret = io_schedule_timeout(timeout);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1973,7 +1973,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)
 | 
			
		|||
		if (!progress) {
 | 
			
		||||
			nr_retries--;
 | 
			
		||||
			/* maybe some writeback is necessary */
 | 
			
		||||
			congestion_wait(WRITE, HZ/10);
 | 
			
		||||
			congestion_wait(BLK_RW_ASYNC, HZ/10);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -575,7 +575,7 @@ static void balance_dirty_pages(struct address_space *mapping)
 | 
			
		|||
		if (pages_written >= write_chunk)
 | 
			
		||||
			break;		/* We've done our duty */
 | 
			
		||||
 | 
			
		||||
		congestion_wait(WRITE, HZ/10);
 | 
			
		||||
		congestion_wait(BLK_RW_ASYNC, HZ/10);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
 | 
			
		||||
| 
						 | 
				
			
			@ -669,7 +669,7 @@ void throttle_vm_writeout(gfp_t gfp_mask)
 | 
			
		|||
                if (global_page_state(NR_UNSTABLE_NFS) +
 | 
			
		||||
			global_page_state(NR_WRITEBACK) <= dirty_thresh)
 | 
			
		||||
                        	break;
 | 
			
		||||
                congestion_wait(WRITE, HZ/10);
 | 
			
		||||
                congestion_wait(BLK_RW_ASYNC, HZ/10);
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * The caller might hold locks which can prevent IO completion
 | 
			
		||||
| 
						 | 
				
			
			@ -715,7 +715,7 @@ static void background_writeout(unsigned long _min_pages)
 | 
			
		|||
		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
 | 
			
		||||
			/* Wrote less than expected */
 | 
			
		||||
			if (wbc.encountered_congestion || wbc.more_io)
 | 
			
		||||
				congestion_wait(WRITE, HZ/10);
 | 
			
		||||
				congestion_wait(BLK_RW_ASYNC, HZ/10);
 | 
			
		||||
			else
 | 
			
		||||
				break;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -787,7 +787,7 @@ static void wb_kupdate(unsigned long arg)
 | 
			
		|||
		writeback_inodes(&wbc);
 | 
			
		||||
		if (wbc.nr_to_write > 0) {
 | 
			
		||||
			if (wbc.encountered_congestion || wbc.more_io)
 | 
			
		||||
				congestion_wait(WRITE, HZ/10);
 | 
			
		||||
				congestion_wait(BLK_RW_ASYNC, HZ/10);
 | 
			
		||||
			else
 | 
			
		||||
				break;	/* All the old data is written */
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1666,7 +1666,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
 | 
			
		|||
			preferred_zone, migratetype);
 | 
			
		||||
 | 
			
		||||
		if (!page && gfp_mask & __GFP_NOFAIL)
 | 
			
		||||
			congestion_wait(WRITE, HZ/50);
 | 
			
		||||
			congestion_wait(BLK_RW_ASYNC, HZ/50);
 | 
			
		||||
	} while (!page && (gfp_mask & __GFP_NOFAIL));
 | 
			
		||||
 | 
			
		||||
	return page;
 | 
			
		||||
| 
						 | 
				
			
			@ -1831,7 +1831,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 | 
			
		|||
	pages_reclaimed += did_some_progress;
 | 
			
		||||
	if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
 | 
			
		||||
		/* Wait for some write requests to complete then retry */
 | 
			
		||||
		congestion_wait(WRITE, HZ/50);
 | 
			
		||||
		congestion_wait(BLK_RW_ASYNC, HZ/50);
 | 
			
		||||
		goto rebalance;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1104,7 +1104,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 | 
			
		|||
		 */
 | 
			
		||||
		if (nr_freed < nr_taken && !current_is_kswapd() &&
 | 
			
		||||
		    lumpy_reclaim) {
 | 
			
		||||
			congestion_wait(WRITE, HZ/10);
 | 
			
		||||
			congestion_wait(BLK_RW_ASYNC, HZ/10);
 | 
			
		||||
 | 
			
		||||
			/*
 | 
			
		||||
			 * The attempt at page out may have made some
 | 
			
		||||
| 
						 | 
				
			
			@ -1721,7 +1721,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 | 
			
		|||
 | 
			
		||||
		/* Take a nap, wait for some writeback to complete */
 | 
			
		||||
		if (sc->nr_scanned && priority < DEF_PRIORITY - 2)
 | 
			
		||||
			congestion_wait(WRITE, HZ/10);
 | 
			
		||||
			congestion_wait(BLK_RW_ASYNC, HZ/10);
 | 
			
		||||
	}
 | 
			
		||||
	/* top priority shrink_zones still had more to do? don't OOM, then */
 | 
			
		||||
	if (!sc->all_unreclaimable && scanning_global_lru(sc))
 | 
			
		||||
| 
						 | 
				
			
			@ -1960,7 +1960,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
 | 
			
		|||
		 * another pass across the zones.
 | 
			
		||||
		 */
 | 
			
		||||
		if (total_scanned && priority < DEF_PRIORITY - 2)
 | 
			
		||||
			congestion_wait(WRITE, HZ/10);
 | 
			
		||||
			congestion_wait(BLK_RW_ASYNC, HZ/10);
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * We do this so kswapd doesn't build up large priorities for
 | 
			
		||||
| 
						 | 
				
			
			@ -2233,7 +2233,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 | 
			
		|||
				goto out;
 | 
			
		||||
 | 
			
		||||
			if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
 | 
			
		||||
				congestion_wait(WRITE, HZ / 10);
 | 
			
		||||
				congestion_wait(BLK_RW_ASYNC, HZ / 10);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue