forked from mirrors/linux
		
	 e9c4d8bfb2
			
		
	
	
		e9c4d8bfb2
		
	
	
	
	
		
			
			Split the lookup and refcount handling of struct xfs_perag into an embedded xfs_group structure that can be reused for the upcoming realtime groups. It will be extended with more features later. Note that he xg_type field will only need a single bit even with realtime group support. For now it fills a hole, but it might be worth to fold it into another field if we can use this space better. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
		
			
				
	
	
		
			403 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			403 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0
 | |
| /*
 | |
|  * Copyright (c) 2006-2007 Silicon Graphics, Inc.
 | |
|  * Copyright (c) 2014 Christoph Hellwig.
 | |
|  * All Rights Reserved.
 | |
|  */
 | |
| #include "xfs.h"
 | |
| #include "xfs_shared.h"
 | |
| #include "xfs_format.h"
 | |
| #include "xfs_log_format.h"
 | |
| #include "xfs_trans_resv.h"
 | |
| #include "xfs_mount.h"
 | |
| #include "xfs_inode.h"
 | |
| #include "xfs_bmap.h"
 | |
| #include "xfs_bmap_util.h"
 | |
| #include "xfs_alloc.h"
 | |
| #include "xfs_mru_cache.h"
 | |
| #include "xfs_trace.h"
 | |
| #include "xfs_ag.h"
 | |
| #include "xfs_ag_resv.h"
 | |
| #include "xfs_trans.h"
 | |
| #include "xfs_filestream.h"
 | |
| 
 | |
| struct xfs_fstrm_item {
 | |
| 	struct xfs_mru_cache_elem	mru;
 | |
| 	struct xfs_perag		*pag; /* AG in use for this directory */
 | |
| };
 | |
| 
 | |
| enum xfs_fstrm_alloc {
 | |
| 	XFS_PICK_USERDATA = 1,
 | |
| 	XFS_PICK_LOWSPACE = 2,
 | |
| };
 | |
| 
 | |
| static void
 | |
| xfs_fstrm_free_func(
 | |
| 	void			*data,
 | |
| 	struct xfs_mru_cache_elem *mru)
 | |
| {
 | |
| 	struct xfs_fstrm_item	*item =
 | |
| 		container_of(mru, struct xfs_fstrm_item, mru);
 | |
| 	struct xfs_perag	*pag = item->pag;
 | |
| 
 | |
| 	trace_xfs_filestream_free(pag, mru->key);
 | |
| 	atomic_dec(&pag->pagf_fstrms);
 | |
| 	xfs_perag_rele(pag);
 | |
| 
 | |
| 	kfree(item);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Scan the AGs starting at start_agno looking for an AG that isn't in use and
 | |
|  * has at least minlen blocks free. If no AG is found to match the allocation
 | |
|  * requirements, pick the AG with the most free space in it.
 | |
|  */
 | |
| static int
 | |
| xfs_filestream_pick_ag(
 | |
| 	struct xfs_alloc_arg	*args,
 | |
| 	xfs_ino_t		pino,
 | |
| 	xfs_agnumber_t		start_agno,
 | |
| 	int			flags,
 | |
| 	xfs_extlen_t		*longest)
 | |
| {
 | |
| 	struct xfs_mount	*mp = args->mp;
 | |
| 	struct xfs_perag	*pag;
 | |
| 	struct xfs_perag	*max_pag = NULL;
 | |
| 	xfs_extlen_t		minlen = *longest;
 | |
| 	xfs_extlen_t		minfree, maxfree = 0;
 | |
| 	xfs_agnumber_t		agno;
 | |
| 	bool			first_pass = true;
 | |
| 
 | |
| 	/* 2% of an AG's blocks must be free for it to be chosen. */
 | |
| 	minfree = mp->m_sb.sb_agblocks / 50;
 | |
| 
 | |
| restart:
 | |
| 	for_each_perag_wrap(mp, start_agno, agno, pag) {
 | |
| 		int		err;
 | |
| 
 | |
| 		trace_xfs_filestream_scan(pag, pino);
 | |
| 
 | |
| 		*longest = 0;
 | |
| 		err = xfs_bmap_longest_free_extent(pag, NULL, longest);
 | |
| 		if (err) {
 | |
| 			if (err == -EAGAIN) {
 | |
| 				/* Couldn't lock the AGF, skip this AG. */
 | |
| 				err = 0;
 | |
| 				continue;
 | |
| 			}
 | |
| 			xfs_perag_rele(pag);
 | |
| 			if (max_pag)
 | |
| 				xfs_perag_rele(max_pag);
 | |
| 			return err;
 | |
| 		}
 | |
| 
 | |
| 		/* Keep track of the AG with the most free blocks. */
 | |
| 		if (pag->pagf_freeblks > maxfree) {
 | |
| 			maxfree = pag->pagf_freeblks;
 | |
| 			if (max_pag)
 | |
| 				xfs_perag_rele(max_pag);
 | |
| 			atomic_inc(&pag_group(pag)->xg_active_ref);
 | |
| 			max_pag = pag;
 | |
| 		}
 | |
| 
 | |
| 		/*
 | |
| 		 * The AG reference count does two things: it enforces mutual
 | |
| 		 * exclusion when examining the suitability of an AG in this
 | |
| 		 * loop, and it guards against two filestreams being established
 | |
| 		 * in the same AG as each other.
 | |
| 		 */
 | |
| 		if (atomic_inc_return(&pag->pagf_fstrms) <= 1) {
 | |
| 			if (((minlen && *longest >= minlen) ||
 | |
| 			     (!minlen && pag->pagf_freeblks >= minfree)) &&
 | |
| 			    (!xfs_perag_prefers_metadata(pag) ||
 | |
| 			     !(flags & XFS_PICK_USERDATA) ||
 | |
| 			     (flags & XFS_PICK_LOWSPACE))) {
 | |
| 				/* Break out, retaining the reference on the AG. */
 | |
| 				if (max_pag)
 | |
| 					xfs_perag_rele(max_pag);
 | |
| 				goto done;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		/* Drop the reference on this AG, it's not usable. */
 | |
| 		atomic_dec(&pag->pagf_fstrms);
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * Allow a second pass to give xfs_bmap_longest_free_extent() another
 | |
| 	 * attempt at locking AGFs that it might have skipped over before we
 | |
| 	 * fail.
 | |
| 	 */
 | |
| 	if (first_pass) {
 | |
| 		first_pass = false;
 | |
| 		goto restart;
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * We must be low on data space, so run a final lowspace optimised
 | |
| 	 * selection pass if we haven't already.
 | |
| 	 */
 | |
| 	if (!(flags & XFS_PICK_LOWSPACE)) {
 | |
| 		flags |= XFS_PICK_LOWSPACE;
 | |
| 		goto restart;
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * No unassociated AGs are available, so select the AG with the most
 | |
| 	 * free space, regardless of whether it's already in use by another
 | |
| 	 * filestream. It none suit, just use whatever AG we can grab.
 | |
| 	 */
 | |
| 	if (!max_pag) {
 | |
| 		for_each_perag_wrap(args->mp, 0, start_agno, pag) {
 | |
| 			max_pag = pag;
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		/* Bail if there are no AGs at all to select from. */
 | |
| 		if (!max_pag)
 | |
| 			return -ENOSPC;
 | |
| 	}
 | |
| 
 | |
| 	pag = max_pag;
 | |
| 	atomic_inc(&pag->pagf_fstrms);
 | |
| done:
 | |
| 	trace_xfs_filestream_pick(pag, pino);
 | |
| 	args->pag = pag;
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static struct xfs_inode *
 | |
| xfs_filestream_get_parent(
 | |
| 	struct xfs_inode	*ip)
 | |
| {
 | |
| 	struct inode		*inode = VFS_I(ip), *dir = NULL;
 | |
| 	struct dentry		*dentry, *parent;
 | |
| 
 | |
| 	dentry = d_find_alias(inode);
 | |
| 	if (!dentry)
 | |
| 		goto out;
 | |
| 
 | |
| 	parent = dget_parent(dentry);
 | |
| 	if (!parent)
 | |
| 		goto out_dput;
 | |
| 
 | |
| 	dir = igrab(d_inode(parent));
 | |
| 	dput(parent);
 | |
| 
 | |
| out_dput:
 | |
| 	dput(dentry);
 | |
| out:
 | |
| 	return dir ? XFS_I(dir) : NULL;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Lookup the mru cache for an existing association. If one exists and we can
 | |
|  * use it, return with an active perag reference indicating that the allocation
 | |
|  * will proceed with that association.
 | |
|  *
 | |
|  * If we have no association, or we cannot use the current one and have to
 | |
|  * destroy it, return with longest = 0 to tell the caller to create a new
 | |
|  * association.
 | |
|  */
 | |
| static int
 | |
| xfs_filestream_lookup_association(
 | |
| 	struct xfs_bmalloca	*ap,
 | |
| 	struct xfs_alloc_arg	*args,
 | |
| 	xfs_ino_t		pino,
 | |
| 	xfs_extlen_t		*longest)
 | |
| {
 | |
| 	struct xfs_mount	*mp = args->mp;
 | |
| 	struct xfs_perag	*pag;
 | |
| 	struct xfs_mru_cache_elem *mru;
 | |
| 	int			error = 0;
 | |
| 
 | |
| 	*longest = 0;
 | |
| 	mru = xfs_mru_cache_lookup(mp->m_filestream, pino);
 | |
| 	if (!mru)
 | |
| 		return 0;
 | |
| 	/*
 | |
| 	 * Grab the pag and take an extra active reference for the caller whilst
 | |
| 	 * the mru item cannot go away. This means we'll pin the perag with
 | |
| 	 * the reference we get here even if the filestreams association is torn
 | |
| 	 * down immediately after we mark the lookup as done.
 | |
| 	 */
 | |
| 	pag = container_of(mru, struct xfs_fstrm_item, mru)->pag;
 | |
| 	atomic_inc(&pag_group(pag)->xg_active_ref);
 | |
| 	xfs_mru_cache_done(mp->m_filestream);
 | |
| 
 | |
| 	trace_xfs_filestream_lookup(pag, ap->ip->i_ino);
 | |
| 
 | |
| 	ap->blkno = xfs_agbno_to_fsb(pag, 0);
 | |
| 	xfs_bmap_adjacent(ap);
 | |
| 
 | |
| 	/*
 | |
| 	 * If there is very little free space before we start a filestreams
 | |
| 	 * allocation, we're almost guaranteed to fail to find a large enough
 | |
| 	 * free space available so just use the cached AG.
 | |
| 	 */
 | |
| 	if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
 | |
| 		*longest = 1;
 | |
| 		goto out_done;
 | |
| 	}
 | |
| 
 | |
| 	error = xfs_bmap_longest_free_extent(pag, args->tp, longest);
 | |
| 	if (error == -EAGAIN)
 | |
| 		error = 0;
 | |
| 	if (error || *longest < args->maxlen) {
 | |
| 		/* We aren't going to use this perag */
 | |
| 		*longest = 0;
 | |
| 		xfs_perag_rele(pag);
 | |
| 		return error;
 | |
| 	}
 | |
| 
 | |
| out_done:
 | |
| 	args->pag = pag;
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static int
 | |
| xfs_filestream_create_association(
 | |
| 	struct xfs_bmalloca	*ap,
 | |
| 	struct xfs_alloc_arg	*args,
 | |
| 	xfs_ino_t		pino,
 | |
| 	xfs_extlen_t		*longest)
 | |
| {
 | |
| 	struct xfs_mount	*mp = args->mp;
 | |
| 	struct xfs_mru_cache_elem *mru;
 | |
| 	struct xfs_fstrm_item	*item;
 | |
| 	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, pino);
 | |
| 	int			flags = 0;
 | |
| 	int			error;
 | |
| 
 | |
| 	/* Changing parent AG association now, so remove the existing one. */
 | |
| 	mru = xfs_mru_cache_remove(mp->m_filestream, pino);
 | |
| 	if (mru) {
 | |
| 		struct xfs_fstrm_item *item =
 | |
| 			container_of(mru, struct xfs_fstrm_item, mru);
 | |
| 
 | |
| 		agno = (pag_agno(item->pag) + 1) % mp->m_sb.sb_agcount;
 | |
| 		xfs_fstrm_free_func(mp, mru);
 | |
| 	} else if (xfs_is_inode32(mp)) {
 | |
| 		xfs_agnumber_t	 rotorstep = xfs_rotorstep;
 | |
| 
 | |
| 		agno = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
 | |
| 		mp->m_agfrotor = (mp->m_agfrotor + 1) %
 | |
| 				 (mp->m_sb.sb_agcount * rotorstep);
 | |
| 	}
 | |
| 
 | |
| 	ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0);
 | |
| 	xfs_bmap_adjacent(ap);
 | |
| 
 | |
| 	if (ap->datatype & XFS_ALLOC_USERDATA)
 | |
| 		flags |= XFS_PICK_USERDATA;
 | |
| 	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
 | |
| 		flags |= XFS_PICK_LOWSPACE;
 | |
| 
 | |
| 	*longest = ap->length;
 | |
| 	error = xfs_filestream_pick_ag(args, pino, agno, flags, longest);
 | |
| 	if (error)
 | |
| 		return error;
 | |
| 
 | |
| 	/*
 | |
| 	 * We are going to use this perag now, so create an assoication for it.
 | |
| 	 * xfs_filestream_pick_ag() has already bumped the perag fstrms counter
 | |
| 	 * for us, so all we need to do here is take another active reference to
 | |
| 	 * the perag for the cached association.
 | |
| 	 *
 | |
| 	 * If we fail to store the association, we need to drop the fstrms
 | |
| 	 * counter as well as drop the perag reference we take here for the
 | |
| 	 * item. We do not need to return an error for this failure - as long as
 | |
| 	 * we return a referenced AG, the allocation can still go ahead just
 | |
| 	 * fine.
 | |
| 	 */
 | |
| 	item = kmalloc(sizeof(*item), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 | |
| 	if (!item)
 | |
| 		goto out_put_fstrms;
 | |
| 
 | |
| 	atomic_inc(&pag_group(args->pag)->xg_active_ref);
 | |
| 	item->pag = args->pag;
 | |
| 	error = xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru);
 | |
| 	if (error)
 | |
| 		goto out_free_item;
 | |
| 	return 0;
 | |
| 
 | |
| out_free_item:
 | |
| 	xfs_perag_rele(item->pag);
 | |
| 	kfree(item);
 | |
| out_put_fstrms:
 | |
| 	atomic_dec(&args->pag->pagf_fstrms);
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Search for an allocation group with a single extent large enough for
 | |
|  * the request. First we look for an existing association and use that if it
 | |
|  * is found. Otherwise, we create a new association by selecting an AG that fits
 | |
|  * the allocation criteria.
 | |
|  *
 | |
|  * We return with a referenced perag in args->pag to indicate which AG we are
 | |
|  * allocating into or an error with no references held.
 | |
|  */
 | |
| int
 | |
| xfs_filestream_select_ag(
 | |
| 	struct xfs_bmalloca	*ap,
 | |
| 	struct xfs_alloc_arg	*args,
 | |
| 	xfs_extlen_t		*longest)
 | |
| {
 | |
| 	struct xfs_inode	*pip;
 | |
| 	xfs_ino_t		ino = 0;
 | |
| 	int			error = 0;
 | |
| 
 | |
| 	*longest = 0;
 | |
| 	args->total = ap->total;
 | |
| 	pip = xfs_filestream_get_parent(ap->ip);
 | |
| 	if (pip) {
 | |
| 		ino = pip->i_ino;
 | |
| 		error = xfs_filestream_lookup_association(ap, args, ino,
 | |
| 				longest);
 | |
| 		xfs_irele(pip);
 | |
| 		if (error)
 | |
| 			return error;
 | |
| 		if (*longest >= args->maxlen)
 | |
| 			goto out_select;
 | |
| 		if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
 | |
| 			goto out_select;
 | |
| 	}
 | |
| 
 | |
| 	error = xfs_filestream_create_association(ap, args, ino, longest);
 | |
| 	if (error)
 | |
| 		return error;
 | |
| 
 | |
| out_select:
 | |
| 	ap->blkno = xfs_agbno_to_fsb(args->pag, 0);
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| void
 | |
| xfs_filestream_deassociate(
 | |
| 	struct xfs_inode	*ip)
 | |
| {
 | |
| 	xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino);
 | |
| }
 | |
| 
 | |
| int
 | |
| xfs_filestream_mount(
 | |
| 	xfs_mount_t	*mp)
 | |
| {
 | |
| 	/*
 | |
| 	 * The filestream timer tunable is currently fixed within the range of
 | |
| 	 * one second to four minutes, with five seconds being the default.  The
 | |
| 	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
 | |
| 	 * timer tunable to within about 10 percent.  This requires at least 10
 | |
| 	 * groups.
 | |
| 	 */
 | |
| 	return xfs_mru_cache_create(&mp->m_filestream, mp,
 | |
| 			xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func);
 | |
| }
 | |
| 
 | |
| void
 | |
| xfs_filestream_unmount(
 | |
| 	xfs_mount_t	*mp)
 | |
| {
 | |
| 	xfs_mru_cache_destroy(mp->m_filestream);
 | |
| }
 |