mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	dm thin: support for non power of 2 pool blocksize
Non power of 2 blocksize support is needed to properly align thinp IO on storage that has non power of 2 optimal IO sizes (e.g. RAID6 10+2). Use sector_div to support non power of 2 blocksize for the pool's data device. This provides comparable performance to the power of 2 math that was performed until now (as tested on modern x86_64 hardware). The kernel currently assumes that limits->discard_granularity is a power of two so the thin target only enables discard support if the block size is a power of two. Eliminate pool structure's 'block_shift', 'offset_mask' and remaining 4 byte holes. Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
This commit is contained in:
		
							parent
							
								
									33d07c0dfa
								
							
						
					
					
						commit
						55f2b8bdb0
					
				
					 1 changed files with 37 additions and 22 deletions
				
			
		| 
						 | 
					@ -510,10 +510,8 @@ struct pool {
 | 
				
			||||||
	struct block_device *md_dev;
 | 
						struct block_device *md_dev;
 | 
				
			||||||
	struct dm_pool_metadata *pmd;
 | 
						struct dm_pool_metadata *pmd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	uint32_t sectors_per_block;
 | 
					 | 
				
			||||||
	unsigned block_shift;
 | 
					 | 
				
			||||||
	dm_block_t offset_mask;
 | 
					 | 
				
			||||||
	dm_block_t low_water_blocks;
 | 
						dm_block_t low_water_blocks;
 | 
				
			||||||
 | 
						uint32_t sectors_per_block;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct pool_features pf;
 | 
						struct pool_features pf;
 | 
				
			||||||
	unsigned low_water_triggered:1;	/* A dm event has been sent */
 | 
						unsigned low_water_triggered:1;	/* A dm event has been sent */
 | 
				
			||||||
| 
						 | 
					@ -526,8 +524,8 @@ struct pool {
 | 
				
			||||||
	struct work_struct worker;
 | 
						struct work_struct worker;
 | 
				
			||||||
	struct delayed_work waker;
 | 
						struct delayed_work waker;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	unsigned ref_count;
 | 
					 | 
				
			||||||
	unsigned long last_commit_jiffies;
 | 
						unsigned long last_commit_jiffies;
 | 
				
			||||||
 | 
						unsigned ref_count;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spinlock_t lock;
 | 
						spinlock_t lock;
 | 
				
			||||||
	struct bio_list deferred_bios;
 | 
						struct bio_list deferred_bios;
 | 
				
			||||||
| 
						 | 
					@ -679,16 +677,21 @@ static void requeue_io(struct thin_c *tc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
 | 
					static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return bio->bi_sector >> tc->pool->block_shift;
 | 
						sector_t block_nr = bio->bi_sector;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						(void) sector_div(block_nr, tc->pool->sectors_per_block);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return block_nr;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
 | 
					static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct pool *pool = tc->pool;
 | 
						struct pool *pool = tc->pool;
 | 
				
			||||||
 | 
						sector_t bi_sector = bio->bi_sector;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bio->bi_bdev = tc->pool_dev->bdev;
 | 
						bio->bi_bdev = tc->pool_dev->bdev;
 | 
				
			||||||
	bio->bi_sector = (block << pool->block_shift) +
 | 
						bio->bi_sector = (block * pool->sectors_per_block) +
 | 
				
			||||||
		(bio->bi_sector & pool->offset_mask);
 | 
								 sector_div(bi_sector, pool->sectors_per_block);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void remap_to_origin(struct thin_c *tc, struct bio *bio)
 | 
					static void remap_to_origin(struct thin_c *tc, struct bio *bio)
 | 
				
			||||||
| 
						 | 
					@ -933,9 +936,10 @@ static void process_prepared(struct pool *pool, struct list_head *head,
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static int io_overlaps_block(struct pool *pool, struct bio *bio)
 | 
					static int io_overlaps_block(struct pool *pool, struct bio *bio)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return !(bio->bi_sector & pool->offset_mask) &&
 | 
						sector_t bi_sector = bio->bi_sector;
 | 
				
			||||||
		(bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return !sector_div(bi_sector, pool->sectors_per_block) &&
 | 
				
			||||||
 | 
							(bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int io_overwrites_block(struct pool *pool, struct bio *bio)
 | 
					static int io_overwrites_block(struct pool *pool, struct bio *bio)
 | 
				
			||||||
| 
						 | 
					@ -1239,8 +1243,8 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
 | 
				
			||||||
			 * part of the discard that is in a subsequent
 | 
								 * part of the discard that is in a subsequent
 | 
				
			||||||
			 * block.
 | 
								 * block.
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
			sector_t offset = bio->bi_sector - (block << pool->block_shift);
 | 
								sector_t offset = bio->bi_sector - (block * pool->sectors_per_block);
 | 
				
			||||||
			unsigned remaining = (pool->sectors_per_block - offset) << 9;
 | 
								unsigned remaining = (pool->sectors_per_block - offset) << SECTOR_SHIFT;
 | 
				
			||||||
			bio->bi_size = min(bio->bi_size, remaining);
 | 
								bio->bi_size = min(bio->bi_size, remaining);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			cell_release_singleton(cell, bio);
 | 
								cell_release_singleton(cell, bio);
 | 
				
			||||||
| 
						 | 
					@ -1722,8 +1726,6 @@ static struct pool *pool_create(struct mapped_device *pool_md,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pool->pmd = pmd;
 | 
						pool->pmd = pmd;
 | 
				
			||||||
	pool->sectors_per_block = block_size;
 | 
						pool->sectors_per_block = block_size;
 | 
				
			||||||
	pool->block_shift = ffs(block_size) - 1;
 | 
					 | 
				
			||||||
	pool->offset_mask = block_size - 1;
 | 
					 | 
				
			||||||
	pool->low_water_blocks = 0;
 | 
						pool->low_water_blocks = 0;
 | 
				
			||||||
	pool_features_init(&pool->pf);
 | 
						pool_features_init(&pool->pf);
 | 
				
			||||||
	pool->prison = prison_create(PRISON_CELLS);
 | 
						pool->prison = prison_create(PRISON_CELLS);
 | 
				
			||||||
| 
						 | 
					@ -1971,7 +1973,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 | 
				
			||||||
	if (kstrtoul(argv[2], 10, &block_size) || !block_size ||
 | 
						if (kstrtoul(argv[2], 10, &block_size) || !block_size ||
 | 
				
			||||||
	    block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
 | 
						    block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
 | 
				
			||||||
	    block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
 | 
						    block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
 | 
				
			||||||
	    !is_power_of_2(block_size)) {
 | 
						    block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
 | 
				
			||||||
		ti->error = "Invalid block size";
 | 
							ti->error = "Invalid block size";
 | 
				
			||||||
		r = -EINVAL;
 | 
							r = -EINVAL;
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
| 
						 | 
					@ -2018,6 +2020,15 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 | 
				
			||||||
		goto out_flags_changed;
 | 
							goto out_flags_changed;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * The block layer requires discard_granularity to be a power of 2.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (pf.discard_enabled && !is_power_of_2(block_size)) {
 | 
				
			||||||
 | 
							ti->error = "Discard support must be disabled when the block size is not a power of 2";
 | 
				
			||||||
 | 
							r = -EINVAL;
 | 
				
			||||||
 | 
							goto out_flags_changed;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pt->pool = pool;
 | 
						pt->pool = pool;
 | 
				
			||||||
	pt->ti = ti;
 | 
						pt->ti = ti;
 | 
				
			||||||
	pt->metadata_dev = metadata_dev;
 | 
						pt->metadata_dev = metadata_dev;
 | 
				
			||||||
| 
						 | 
					@ -2097,7 +2108,8 @@ static int pool_preresume(struct dm_target *ti)
 | 
				
			||||||
	int r;
 | 
						int r;
 | 
				
			||||||
	struct pool_c *pt = ti->private;
 | 
						struct pool_c *pt = ti->private;
 | 
				
			||||||
	struct pool *pool = pt->pool;
 | 
						struct pool *pool = pt->pool;
 | 
				
			||||||
	dm_block_t data_size, sb_data_size;
 | 
						sector_t data_size = ti->len;
 | 
				
			||||||
 | 
						dm_block_t sb_data_size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Take control of the pool object.
 | 
						 * Take control of the pool object.
 | 
				
			||||||
| 
						 | 
					@ -2106,7 +2118,8 @@ static int pool_preresume(struct dm_target *ti)
 | 
				
			||||||
	if (r)
 | 
						if (r)
 | 
				
			||||||
		return r;
 | 
							return r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	data_size = ti->len >> pool->block_shift;
 | 
						(void) sector_div(data_size, pool->sectors_per_block);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
 | 
						r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
 | 
				
			||||||
	if (r) {
 | 
						if (r) {
 | 
				
			||||||
		DMERR("failed to retrieve data device size");
 | 
							DMERR("failed to retrieve data device size");
 | 
				
			||||||
| 
						 | 
					@ -2115,7 +2128,7 @@ static int pool_preresume(struct dm_target *ti)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (data_size < sb_data_size) {
 | 
						if (data_size < sb_data_size) {
 | 
				
			||||||
		DMERR("pool target too small, is %llu blocks (expected %llu)",
 | 
							DMERR("pool target too small, is %llu blocks (expected %llu)",
 | 
				
			||||||
		      data_size, sb_data_size);
 | 
							      (unsigned long long)data_size, sb_data_size);
 | 
				
			||||||
		return -EINVAL;
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	} else if (data_size > sb_data_size) {
 | 
						} else if (data_size > sb_data_size) {
 | 
				
			||||||
| 
						 | 
					@ -2764,19 +2777,21 @@ static int thin_status(struct dm_target *ti, status_type_t type,
 | 
				
			||||||
static int thin_iterate_devices(struct dm_target *ti,
 | 
					static int thin_iterate_devices(struct dm_target *ti,
 | 
				
			||||||
				iterate_devices_callout_fn fn, void *data)
 | 
									iterate_devices_callout_fn fn, void *data)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	dm_block_t blocks;
 | 
						sector_t blocks;
 | 
				
			||||||
	struct thin_c *tc = ti->private;
 | 
						struct thin_c *tc = ti->private;
 | 
				
			||||||
 | 
						struct pool *pool = tc->pool;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * We can't call dm_pool_get_data_dev_size() since that blocks.  So
 | 
						 * We can't call dm_pool_get_data_dev_size() since that blocks.  So
 | 
				
			||||||
	 * we follow a more convoluted path through to the pool's target.
 | 
						 * we follow a more convoluted path through to the pool's target.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (!tc->pool->ti)
 | 
						if (!pool->ti)
 | 
				
			||||||
		return 0;	/* nothing is bound */
 | 
							return 0;	/* nothing is bound */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	blocks = tc->pool->ti->len >> tc->pool->block_shift;
 | 
						blocks = pool->ti->len;
 | 
				
			||||||
 | 
						(void) sector_div(blocks, pool->sectors_per_block);
 | 
				
			||||||
	if (blocks)
 | 
						if (blocks)
 | 
				
			||||||
		return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data);
 | 
							return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -2793,7 +2808,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct target_type thin_target = {
 | 
					static struct target_type thin_target = {
 | 
				
			||||||
	.name = "thin",
 | 
						.name = "thin",
 | 
				
			||||||
	.version = {1, 1, 0},
 | 
						.version = {1, 2, 0},
 | 
				
			||||||
	.module	= THIS_MODULE,
 | 
						.module	= THIS_MODULE,
 | 
				
			||||||
	.ctr = thin_ctr,
 | 
						.ctr = thin_ctr,
 | 
				
			||||||
	.dtr = thin_dtr,
 | 
						.dtr = thin_dtr,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue