mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	ext4: speed-up releasing blocks on commit
Improve mb_free_blocks speed by clearing entire range at once instead of iterating over each bit. Freeing block-by-block also makes buddy bitmap subtree flip twice making most of the work a no-op. Very few bits in buddy bitmap require change, e.g. freeing entire group is a 1 bit flip only. As a result, releasing blocks of 60G file now takes 5ms instead of 2.7s. This is especially good for non-preemptive kernels as there is no rescheduling during release. Signed-off-by: Andrey Sidorov <qrxd43@motorola.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
		
							parent
							
								
									5c1ff33640
								
							
						
					
					
						commit
						eabe0444df
					
				
					 1 changed files with 175 additions and 64 deletions
				
			
		| 
						 | 
					@ -405,6 +405,12 @@ static inline void mb_clear_bit(int bit, void *addr)
 | 
				
			||||||
	ext4_clear_bit(bit, addr);
 | 
						ext4_clear_bit(bit, addr);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline int mb_test_and_clear_bit(int bit, void *addr)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						addr = mb_correct_addr_and_bit(&bit, addr);
 | 
				
			||||||
 | 
						return ext4_test_and_clear_bit(bit, addr);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int mb_find_next_zero_bit(void *addr, int max, int start)
 | 
					static inline int mb_find_next_zero_bit(void *addr, int max, int start)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int fix = 0, ret, tmpmax;
 | 
						int fix = 0, ret, tmpmax;
 | 
				
			||||||
| 
						 | 
					@ -764,6 +770,24 @@ void ext4_mb_generate_buddy(struct super_block *sb,
 | 
				
			||||||
	spin_unlock(&EXT4_SB(sb)->s_bal_lock);
 | 
						spin_unlock(&EXT4_SB(sb)->s_bal_lock);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void mb_regenerate_buddy(struct ext4_buddy *e4b)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int count;
 | 
				
			||||||
 | 
						int order = 1;
 | 
				
			||||||
 | 
						void *buddy;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						while ((buddy = mb_find_buddy(e4b, order++, &count))) {
 | 
				
			||||||
 | 
							ext4_set_bits(buddy, 0, count);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						e4b->bd_info->bb_fragments = 0;
 | 
				
			||||||
 | 
						memset(e4b->bd_info->bb_counters, 0,
 | 
				
			||||||
 | 
							sizeof(*e4b->bd_info->bb_counters) *
 | 
				
			||||||
 | 
							(e4b->bd_sb->s_blocksize_bits + 2));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
 | 
				
			||||||
 | 
							e4b->bd_bitmap, e4b->bd_group);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* The buddy information is attached the buddy cache inode
 | 
					/* The buddy information is attached the buddy cache inode
 | 
				
			||||||
 * for convenience. The information regarding each group
 | 
					 * for convenience. The information regarding each group
 | 
				
			||||||
 * is loaded via ext4_mb_load_buddy. The information involve
 | 
					 * is loaded via ext4_mb_load_buddy. The information involve
 | 
				
			||||||
| 
						 | 
					@ -1246,6 +1270,33 @@ static void mb_clear_bits(void *bm, int cur, int len)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* clear bits in given range
 | 
				
			||||||
 | 
					 * will return first found zero bit if any, -1 otherwise
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int mb_test_and_clear_bits(void *bm, int cur, int len)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						__u32 *addr;
 | 
				
			||||||
 | 
						int zero_bit = -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						len = cur + len;
 | 
				
			||||||
 | 
						while (cur < len) {
 | 
				
			||||||
 | 
							if ((cur & 31) == 0 && (len - cur) >= 32) {
 | 
				
			||||||
 | 
								/* fast path: clear whole word at once */
 | 
				
			||||||
 | 
								addr = bm + (cur >> 3);
 | 
				
			||||||
 | 
								if (*addr != (__u32)(-1) && zero_bit == -1)
 | 
				
			||||||
 | 
									zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
 | 
				
			||||||
 | 
								*addr = 0;
 | 
				
			||||||
 | 
								cur += 32;
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
 | 
				
			||||||
 | 
								zero_bit = cur;
 | 
				
			||||||
 | 
							cur++;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return zero_bit;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ext4_set_bits(void *bm, int cur, int len)
 | 
					void ext4_set_bits(void *bm, int cur, int len)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	__u32 *addr;
 | 
						__u32 *addr;
 | 
				
			||||||
| 
						 | 
					@ -1264,17 +1315,90 @@ void ext4_set_bits(void *bm, int cur, int len)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
 | 
					/*
 | 
				
			||||||
			  int first, int count)
 | 
					 * _________________________________________________________________ */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int block = 0;
 | 
						if (mb_test_bit(*bit + side, bitmap)) {
 | 
				
			||||||
	int max = 0;
 | 
							mb_clear_bit(*bit, bitmap);
 | 
				
			||||||
	int order;
 | 
							(*bit) -= side;
 | 
				
			||||||
	void *buddy;
 | 
							return 1;
 | 
				
			||||||
	void *buddy2;
 | 
						}
 | 
				
			||||||
 | 
						else {
 | 
				
			||||||
 | 
							(*bit) += side;
 | 
				
			||||||
 | 
							mb_set_bit(*bit, bitmap);
 | 
				
			||||||
 | 
							return -1;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int max;
 | 
				
			||||||
 | 
						int order = 1;
 | 
				
			||||||
 | 
						void *buddy = mb_find_buddy(e4b, order, &max);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						while (buddy) {
 | 
				
			||||||
 | 
							void *buddy2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Bits in range [first; last] are known to be set since
 | 
				
			||||||
 | 
							 * corresponding blocks were allocated. Bits in range
 | 
				
			||||||
 | 
							 * (first; last) will stay set because they form buddies on
 | 
				
			||||||
 | 
							 * upper layer. We just deal with borders if they don't
 | 
				
			||||||
 | 
							 * align with upper layer and then go up.
 | 
				
			||||||
 | 
							 * Releasing entire group is all about clearing
 | 
				
			||||||
 | 
							 * single bit of highest order buddy.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Example:
 | 
				
			||||||
 | 
							 * ---------------------------------
 | 
				
			||||||
 | 
							 * |   1   |   1   |   1   |   1   |
 | 
				
			||||||
 | 
							 * ---------------------------------
 | 
				
			||||||
 | 
							 * | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
 | 
				
			||||||
 | 
							 * ---------------------------------
 | 
				
			||||||
 | 
							 *   0   1   2   3   4   5   6   7
 | 
				
			||||||
 | 
							 *      \_____________________/
 | 
				
			||||||
 | 
							 *
 | 
				
			||||||
 | 
							 * Neither [1] nor [6] is aligned to above layer.
 | 
				
			||||||
 | 
							 * Left neighbour [0] is free, so mark it busy,
 | 
				
			||||||
 | 
							 * decrease bb_counters and extend range to
 | 
				
			||||||
 | 
							 * [0; 6]
 | 
				
			||||||
 | 
							 * Right neighbour [7] is busy. It can't be coaleasced with [6], so
 | 
				
			||||||
 | 
							 * mark [6] free, increase bb_counters and shrink range to
 | 
				
			||||||
 | 
							 * [0; 5].
 | 
				
			||||||
 | 
							 * Then shift range to [0; 2], go up and do the same.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (first & 1)
 | 
				
			||||||
 | 
								e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
 | 
				
			||||||
 | 
							if (!(last & 1))
 | 
				
			||||||
 | 
								e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
 | 
				
			||||||
 | 
							if (first > last)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							order++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
 | 
				
			||||||
 | 
								mb_clear_bits(buddy, first, last - first + 1);
 | 
				
			||||||
 | 
								e4b->bd_info->bb_counters[order - 1] += last - first + 1;
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							first >>= 1;
 | 
				
			||||||
 | 
							last >>= 1;
 | 
				
			||||||
 | 
							buddy = buddy2;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
 | 
				
			||||||
 | 
								   int first, int count)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int left_is_free = 0;
 | 
				
			||||||
 | 
						int right_is_free = 0;
 | 
				
			||||||
 | 
						int block;
 | 
				
			||||||
 | 
						int last = first + count - 1;
 | 
				
			||||||
	struct super_block *sb = e4b->bd_sb;
 | 
						struct super_block *sb = e4b->bd_sb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	BUG_ON(first + count > (sb->s_blocksize << 3));
 | 
						BUG_ON(last >= (sb->s_blocksize << 3));
 | 
				
			||||||
	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
 | 
						assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
 | 
				
			||||||
	mb_check_buddy(e4b);
 | 
						mb_check_buddy(e4b);
 | 
				
			||||||
	mb_free_blocks_double(inode, e4b, first, count);
 | 
						mb_free_blocks_double(inode, e4b, first, count);
 | 
				
			||||||
| 
						 | 
					@ -1283,67 +1407,54 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
 | 
				
			||||||
	if (first < e4b->bd_info->bb_first_free)
 | 
						if (first < e4b->bd_info->bb_first_free)
 | 
				
			||||||
		e4b->bd_info->bb_first_free = first;
 | 
							e4b->bd_info->bb_first_free = first;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* let's maintain fragments counter */
 | 
						/* access memory sequentially: check left neighbour,
 | 
				
			||||||
 | 
						 * clear range and then check right neighbour
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
	if (first != 0)
 | 
						if (first != 0)
 | 
				
			||||||
		block = !mb_test_bit(first - 1, e4b->bd_bitmap);
 | 
							left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
 | 
				
			||||||
	if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
 | 
						block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
 | 
				
			||||||
		max = !mb_test_bit(first + count, e4b->bd_bitmap);
 | 
						if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
 | 
				
			||||||
	if (block && max)
 | 
							right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (unlikely(block != -1)) {
 | 
				
			||||||
 | 
							ext4_fsblk_t blocknr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
 | 
				
			||||||
 | 
							blocknr += EXT4_C2B(EXT4_SB(sb), block);
 | 
				
			||||||
 | 
							ext4_grp_locked_error(sb, e4b->bd_group,
 | 
				
			||||||
 | 
									      inode ? inode->i_ino : 0,
 | 
				
			||||||
 | 
									      blocknr,
 | 
				
			||||||
 | 
									      "freeing already freed block "
 | 
				
			||||||
 | 
									      "(bit %u)", block);
 | 
				
			||||||
 | 
							mb_regenerate_buddy(e4b);
 | 
				
			||||||
 | 
							goto done;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* let's maintain fragments counter */
 | 
				
			||||||
 | 
						if (left_is_free && right_is_free)
 | 
				
			||||||
		e4b->bd_info->bb_fragments--;
 | 
							e4b->bd_info->bb_fragments--;
 | 
				
			||||||
	else if (!block && !max)
 | 
						else if (!left_is_free && !right_is_free)
 | 
				
			||||||
		e4b->bd_info->bb_fragments++;
 | 
							e4b->bd_info->bb_fragments++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* let's maintain buddy itself */
 | 
						/* buddy[0] == bd_bitmap is a special case, so handle
 | 
				
			||||||
	while (count-- > 0) {
 | 
						 * it right away and let mb_buddy_mark_free stay free of
 | 
				
			||||||
		block = first++;
 | 
						 * zero order checks.
 | 
				
			||||||
		order = 0;
 | 
						 * Check if neighbours are to be coaleasced,
 | 
				
			||||||
 | 
						 * adjust bitmap bb_counters and borders appropriately.
 | 
				
			||||||
		if (!mb_test_bit(block, e4b->bd_bitmap)) {
 | 
						 */
 | 
				
			||||||
			ext4_fsblk_t blocknr;
 | 
						if (first & 1) {
 | 
				
			||||||
 | 
							first += !left_is_free;
 | 
				
			||||||
			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
 | 
							e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
 | 
				
			||||||
			blocknr += EXT4_C2B(EXT4_SB(sb), block);
 | 
					 | 
				
			||||||
			ext4_grp_locked_error(sb, e4b->bd_group,
 | 
					 | 
				
			||||||
					      inode ? inode->i_ino : 0,
 | 
					 | 
				
			||||||
					      blocknr,
 | 
					 | 
				
			||||||
					      "freeing already freed block "
 | 
					 | 
				
			||||||
					      "(bit %u)", block);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		mb_clear_bit(block, e4b->bd_bitmap);
 | 
					 | 
				
			||||||
		e4b->bd_info->bb_counters[order]++;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/* start of the buddy */
 | 
					 | 
				
			||||||
		buddy = mb_find_buddy(e4b, order, &max);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		do {
 | 
					 | 
				
			||||||
			block &= ~1UL;
 | 
					 | 
				
			||||||
			if (mb_test_bit(block, buddy) ||
 | 
					 | 
				
			||||||
					mb_test_bit(block + 1, buddy))
 | 
					 | 
				
			||||||
				break;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			/* both the buddies are free, try to coalesce them */
 | 
					 | 
				
			||||||
			buddy2 = mb_find_buddy(e4b, order + 1, &max);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			if (!buddy2)
 | 
					 | 
				
			||||||
				break;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			if (order > 0) {
 | 
					 | 
				
			||||||
				/* for special purposes, we don't set
 | 
					 | 
				
			||||||
				 * free bits in bitmap */
 | 
					 | 
				
			||||||
				mb_set_bit(block, buddy);
 | 
					 | 
				
			||||||
				mb_set_bit(block + 1, buddy);
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			e4b->bd_info->bb_counters[order]--;
 | 
					 | 
				
			||||||
			e4b->bd_info->bb_counters[order]--;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			block = block >> 1;
 | 
					 | 
				
			||||||
			order++;
 | 
					 | 
				
			||||||
			e4b->bd_info->bb_counters[order]++;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			mb_clear_bit(block, buddy2);
 | 
					 | 
				
			||||||
			buddy = buddy2;
 | 
					 | 
				
			||||||
		} while (1);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						if (!(last & 1)) {
 | 
				
			||||||
 | 
							last -= !right_is_free;
 | 
				
			||||||
 | 
							e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (first <= last)
 | 
				
			||||||
 | 
							mb_buddy_mark_free(e4b, first >> 1, last >> 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					done:
 | 
				
			||||||
	mb_set_largest_free_order(sb, e4b->bd_info);
 | 
						mb_set_largest_free_order(sb, e4b->bd_info);
 | 
				
			||||||
	mb_check_buddy(e4b);
 | 
						mb_check_buddy(e4b);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue