mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	There is no point in reserving more space than actually available on the data device for the worst case scenario that is unlikely to happen. Reserve at most 1/4th of the data device blocks, which is still a heuristic. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
		
			
				
	
	
		
			322 lines
		
	
	
	
		
			8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			322 lines
		
	
	
	
		
			8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
// SPDX-License-Identifier: GPL-2.0-or-later
 | 
						|
/*
 | 
						|
 * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
 | 
						|
 * Author: Darrick J. Wong <djwong@kernel.org>
 | 
						|
 */
 | 
						|
#include "xfs.h"
 | 
						|
#include "xfs_fs.h"
 | 
						|
#include "xfs_shared.h"
 | 
						|
#include "xfs_format.h"
 | 
						|
#include "xfs_log_format.h"
 | 
						|
#include "xfs_trans_resv.h"
 | 
						|
#include "xfs_bit.h"
 | 
						|
#include "xfs_sb.h"
 | 
						|
#include "xfs_mount.h"
 | 
						|
#include "xfs_defer.h"
 | 
						|
#include "xfs_trans.h"
 | 
						|
#include "xfs_metafile.h"
 | 
						|
#include "xfs_trace.h"
 | 
						|
#include "xfs_inode.h"
 | 
						|
#include "xfs_quota.h"
 | 
						|
#include "xfs_errortag.h"
 | 
						|
#include "xfs_error.h"
 | 
						|
#include "xfs_alloc.h"
 | 
						|
#include "xfs_rtgroup.h"
 | 
						|
#include "xfs_rtrmap_btree.h"
 | 
						|
#include "xfs_rtrefcount_btree.h"
 | 
						|
 | 
						|
static const struct {
 | 
						|
	enum xfs_metafile_type	mtype;
 | 
						|
	const char		*name;
 | 
						|
} xfs_metafile_type_strs[] = { XFS_METAFILE_TYPE_STR };
 | 
						|
 | 
						|
const char *
 | 
						|
xfs_metafile_type_str(enum xfs_metafile_type metatype)
 | 
						|
{
 | 
						|
	unsigned int	i;
 | 
						|
 | 
						|
	for (i = 0; i < ARRAY_SIZE(xfs_metafile_type_strs); i++) {
 | 
						|
		if (xfs_metafile_type_strs[i].mtype == metatype)
 | 
						|
			return xfs_metafile_type_strs[i].name;
 | 
						|
	}
 | 
						|
 | 
						|
	return NULL;
 | 
						|
}
 | 
						|
 | 
						|
/* Set up an inode to be recognized as a metadata directory inode. */
 | 
						|
void
 | 
						|
xfs_metafile_set_iflag(
 | 
						|
	struct xfs_trans	*tp,
 | 
						|
	struct xfs_inode	*ip,
 | 
						|
	enum xfs_metafile_type	metafile_type)
 | 
						|
{
 | 
						|
	VFS_I(ip)->i_mode &= ~0777;
 | 
						|
	VFS_I(ip)->i_uid = GLOBAL_ROOT_UID;
 | 
						|
	VFS_I(ip)->i_gid = GLOBAL_ROOT_GID;
 | 
						|
	if (S_ISDIR(VFS_I(ip)->i_mode))
 | 
						|
		ip->i_diflags |= XFS_METADIR_DIFLAGS;
 | 
						|
	else
 | 
						|
		ip->i_diflags |= XFS_METAFILE_DIFLAGS;
 | 
						|
	ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
 | 
						|
	ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
 | 
						|
	ip->i_metatype = metafile_type;
 | 
						|
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 | 
						|
}
 | 
						|
 | 
						|
/* Clear the metadata directory inode flag. */
 | 
						|
void
 | 
						|
xfs_metafile_clear_iflag(
 | 
						|
	struct xfs_trans	*tp,
 | 
						|
	struct xfs_inode	*ip)
 | 
						|
{
 | 
						|
	ASSERT(xfs_is_metadir_inode(ip));
 | 
						|
	ASSERT(VFS_I(ip)->i_nlink == 0);
 | 
						|
 | 
						|
	ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA;
 | 
						|
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Is the metafile reservations at or beneath a certain threshold?
 | 
						|
 */
 | 
						|
static inline bool
 | 
						|
xfs_metafile_resv_can_cover(
 | 
						|
	struct xfs_mount	*mp,
 | 
						|
	int64_t			rhs)
 | 
						|
{
 | 
						|
	/*
 | 
						|
	 * The amount of space that can be allocated to this metadata file is
 | 
						|
	 * the remaining reservation for the particular metadata file + the
 | 
						|
	 * global free block count.  Take care of the first case to avoid
 | 
						|
	 * touching the per-cpu counter.
 | 
						|
	 */
 | 
						|
	if (mp->m_metafile_resv_avail >= rhs)
 | 
						|
		return true;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * There aren't enough blocks left in the inode's reservation, but it
 | 
						|
	 * isn't critical unless there also isn't enough free space.
 | 
						|
	 */
 | 
						|
	return xfs_compare_freecounter(mp, XC_FREE_BLOCKS,
 | 
						|
			rhs - mp->m_metafile_resv_avail, 2048) >= 0;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Is the metafile reservation critically low on blocks?  For now we'll define
 | 
						|
 * that as the number of blocks we can get our hands on being less than 10% of
 | 
						|
 * what we reserved or less than some arbitrary number (maximum btree height).
 | 
						|
 */
 | 
						|
bool
 | 
						|
xfs_metafile_resv_critical(
 | 
						|
	struct xfs_mount	*mp)
 | 
						|
{
 | 
						|
	ASSERT(xfs_has_metadir(mp));
 | 
						|
 | 
						|
	trace_xfs_metafile_resv_critical(mp, 0);
 | 
						|
 | 
						|
	if (!xfs_metafile_resv_can_cover(mp, mp->m_rtbtree_maxlevels))
 | 
						|
		return true;
 | 
						|
 | 
						|
	if (!xfs_metafile_resv_can_cover(mp,
 | 
						|
			div_u64(mp->m_metafile_resv_target, 10)))
 | 
						|
		return true;
 | 
						|
 | 
						|
	return XFS_TEST_ERROR(false, mp, XFS_ERRTAG_METAFILE_RESV_CRITICAL);
 | 
						|
}
 | 
						|
 | 
						|
/* Allocate a block from the metadata file's reservation. */
 | 
						|
void
 | 
						|
xfs_metafile_resv_alloc_space(
 | 
						|
	struct xfs_inode	*ip,
 | 
						|
	struct xfs_alloc_arg	*args)
 | 
						|
{
 | 
						|
	struct xfs_mount	*mp = ip->i_mount;
 | 
						|
	int64_t			len = args->len;
 | 
						|
 | 
						|
	ASSERT(xfs_is_metadir_inode(ip));
 | 
						|
	ASSERT(args->resv == XFS_AG_RESV_METAFILE);
 | 
						|
 | 
						|
	trace_xfs_metafile_resv_alloc_space(mp, args->len);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Allocate the blocks from the metadata inode's block reservation
 | 
						|
	 * and update the ondisk sb counter.
 | 
						|
	 */
 | 
						|
	mutex_lock(&mp->m_metafile_resv_lock);
 | 
						|
	if (mp->m_metafile_resv_avail > 0) {
 | 
						|
		int64_t		from_resv;
 | 
						|
 | 
						|
		from_resv = min_t(int64_t, len, mp->m_metafile_resv_avail);
 | 
						|
		mp->m_metafile_resv_avail -= from_resv;
 | 
						|
		xfs_mod_delalloc(ip, 0, -from_resv);
 | 
						|
		xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS,
 | 
						|
				-from_resv);
 | 
						|
		len -= from_resv;
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Any allocation in excess of the reservation requires in-core and
 | 
						|
	 * on-disk fdblocks updates.  If we can grab @len blocks from the
 | 
						|
	 * in-core fdblocks then all we need to do is update the on-disk
 | 
						|
	 * superblock; if not, then try to steal some from the transaction's
 | 
						|
	 * block reservation.  Overruns are only expected for rmap btrees.
 | 
						|
	 */
 | 
						|
	if (len) {
 | 
						|
		unsigned int	field;
 | 
						|
		int		error;
 | 
						|
 | 
						|
		error = xfs_dec_fdblocks(ip->i_mount, len, true);
 | 
						|
		if (error)
 | 
						|
			field = XFS_TRANS_SB_FDBLOCKS;
 | 
						|
		else
 | 
						|
			field = XFS_TRANS_SB_RES_FDBLOCKS;
 | 
						|
 | 
						|
		xfs_trans_mod_sb(args->tp, field, -len);
 | 
						|
	}
 | 
						|
 | 
						|
	mp->m_metafile_resv_used += args->len;
 | 
						|
	mutex_unlock(&mp->m_metafile_resv_lock);
 | 
						|
 | 
						|
	ip->i_nblocks += args->len;
 | 
						|
	xfs_trans_log_inode(args->tp, ip, XFS_ILOG_CORE);
 | 
						|
}
 | 
						|
 | 
						|
/* Free a block to the metadata file's reservation. */
 | 
						|
void
 | 
						|
xfs_metafile_resv_free_space(
 | 
						|
	struct xfs_inode	*ip,
 | 
						|
	struct xfs_trans	*tp,
 | 
						|
	xfs_filblks_t		len)
 | 
						|
{
 | 
						|
	struct xfs_mount	*mp = ip->i_mount;
 | 
						|
	int64_t			to_resv;
 | 
						|
 | 
						|
	ASSERT(xfs_is_metadir_inode(ip));
 | 
						|
 | 
						|
	trace_xfs_metafile_resv_free_space(mp, len);
 | 
						|
 | 
						|
	ip->i_nblocks -= len;
 | 
						|
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 | 
						|
 | 
						|
	mutex_lock(&mp->m_metafile_resv_lock);
 | 
						|
	mp->m_metafile_resv_used -= len;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Add the freed blocks back into the inode's delalloc reservation
 | 
						|
	 * until it reaches the maximum size.  Update the ondisk fdblocks only.
 | 
						|
	 */
 | 
						|
	to_resv = mp->m_metafile_resv_target -
 | 
						|
		(mp->m_metafile_resv_used + mp->m_metafile_resv_avail);
 | 
						|
	if (to_resv > 0) {
 | 
						|
		to_resv = min_t(int64_t, to_resv, len);
 | 
						|
		mp->m_metafile_resv_avail += to_resv;
 | 
						|
		xfs_mod_delalloc(ip, 0, to_resv);
 | 
						|
		xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, to_resv);
 | 
						|
		len -= to_resv;
 | 
						|
	}
 | 
						|
	mutex_unlock(&mp->m_metafile_resv_lock);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Everything else goes back to the filesystem, so update the in-core
 | 
						|
	 * and on-disk counters.
 | 
						|
	 */
 | 
						|
	if (len)
 | 
						|
		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len);
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
__xfs_metafile_resv_free(
 | 
						|
	struct xfs_mount	*mp)
 | 
						|
{
 | 
						|
	if (mp->m_metafile_resv_avail) {
 | 
						|
		xfs_mod_sb_delalloc(mp, -(int64_t)mp->m_metafile_resv_avail);
 | 
						|
		xfs_add_fdblocks(mp, mp->m_metafile_resv_avail);
 | 
						|
	}
 | 
						|
	mp->m_metafile_resv_avail = 0;
 | 
						|
	mp->m_metafile_resv_used = 0;
 | 
						|
	mp->m_metafile_resv_target = 0;
 | 
						|
}
 | 
						|
 | 
						|
/* Release unused metafile space reservation. */
 | 
						|
void
 | 
						|
xfs_metafile_resv_free(
 | 
						|
	struct xfs_mount	*mp)
 | 
						|
{
 | 
						|
	if (!xfs_has_metadir(mp))
 | 
						|
		return;
 | 
						|
 | 
						|
	trace_xfs_metafile_resv_free(mp, 0);
 | 
						|
 | 
						|
	mutex_lock(&mp->m_metafile_resv_lock);
 | 
						|
	__xfs_metafile_resv_free(mp);
 | 
						|
	mutex_unlock(&mp->m_metafile_resv_lock);
 | 
						|
}
 | 
						|
 | 
						|
/* Set up a metafile space reservation. */
 | 
						|
int
 | 
						|
xfs_metafile_resv_init(
 | 
						|
	struct xfs_mount	*mp)
 | 
						|
{
 | 
						|
	struct xfs_rtgroup	*rtg = NULL;
 | 
						|
	xfs_filblks_t		used = 0, target = 0;
 | 
						|
	xfs_filblks_t		hidden_space;
 | 
						|
	xfs_rfsblock_t		dblocks_avail = mp->m_sb.sb_dblocks / 4;
 | 
						|
	int			error = 0;
 | 
						|
 | 
						|
	if (!xfs_has_metadir(mp))
 | 
						|
		return 0;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Free any previous reservation to have a clean slate.
 | 
						|
	 */
 | 
						|
	mutex_lock(&mp->m_metafile_resv_lock);
 | 
						|
	__xfs_metafile_resv_free(mp);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Currently the only btree metafiles that require reservations are the
 | 
						|
	 * rtrmap and the rtrefcount.  Anything new will have to be added here
 | 
						|
	 * as well.
 | 
						|
	 */
 | 
						|
	while ((rtg = xfs_rtgroup_next(mp, rtg))) {
 | 
						|
		if (xfs_has_rtrmapbt(mp)) {
 | 
						|
			used += rtg_rmap(rtg)->i_nblocks;
 | 
						|
			target += xfs_rtrmapbt_calc_reserves(mp);
 | 
						|
		}
 | 
						|
		if (xfs_has_rtreflink(mp)) {
 | 
						|
			used += rtg_refcount(rtg)->i_nblocks;
 | 
						|
			target += xfs_rtrefcountbt_calc_reserves(mp);
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	if (!target)
 | 
						|
		goto out_unlock;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Space taken by the per-AG metadata btrees are accounted on-disk as
 | 
						|
	 * used space.  We therefore only hide the space that is reserved but
 | 
						|
	 * not used by the trees.
 | 
						|
	 */
 | 
						|
	if (used > target)
 | 
						|
		target = used;
 | 
						|
	else if (target > dblocks_avail)
 | 
						|
		target = dblocks_avail;
 | 
						|
	hidden_space = target - used;
 | 
						|
 | 
						|
	error = xfs_dec_fdblocks(mp, hidden_space, true);
 | 
						|
	if (error) {
 | 
						|
		trace_xfs_metafile_resv_init_error(mp, 0);
 | 
						|
		goto out_unlock;
 | 
						|
	}
 | 
						|
 | 
						|
	xfs_mod_sb_delalloc(mp, hidden_space);
 | 
						|
 | 
						|
	mp->m_metafile_resv_target = target;
 | 
						|
	mp->m_metafile_resv_used = used;
 | 
						|
	mp->m_metafile_resv_avail = hidden_space;
 | 
						|
 | 
						|
	trace_xfs_metafile_resv_init(mp, target);
 | 
						|
 | 
						|
out_unlock:
 | 
						|
	mutex_unlock(&mp->m_metafile_resv_lock);
 | 
						|
	return error;
 | 
						|
}
 |