mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	When a cookie is allocated that causes fscache_object structs to be
allocated, those objects are initialised with the cookie pointer, but
aren't blessed with a ref on that cookie unless the attachment is
successfully completed in fscache_attach_object().
If attachment fails because the parent object was dying or there was a
collision, fscache_attach_object() returns without incrementing the cookie
counter - but upon failure of this function, the object is released which
then puts the cookie, whether or not a ref was taken on the cookie.
Fix this by taking a ref on the cookie when it is assigned in
fscache_object_init(), even when we're creating a root object.
Analysis from Kiran Kumar:
This bug has been seen in 4.4.0-124-generic #148-Ubuntu kernel
BugLink: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1776277
fscache cookie ref count updated incorrectly during fscache object
allocation resulting in following Oops.
kernel BUG at /build/linux-Y09MKI/linux-4.4.0/fs/fscache/internal.h:321!
kernel BUG at /build/linux-Y09MKI/linux-4.4.0/fs/fscache/cookie.c:639!
[Cause]
Two threads are trying to do operate on a cookie and two objects.
(1) One thread tries to unmount the filesystem and in process goes over a
    huge list of objects marking them dead and deleting the objects.
    cookie->usage is also decremented in following path:
      nfs_fscache_release_super_cookie
       -> __fscache_relinquish_cookie
        ->__fscache_cookie_put
        ->BUG_ON(atomic_read(&cookie->usage) <= 0);
(2) A second thread tries to lookup an object for reading data in following
    path:
    fscache_alloc_object
    1) cachefiles_alloc_object
        -> fscache_object_init
           -> assign cookie, but usage not bumped.
    2) fscache_attach_object -> fails in cant_attach_object because the
         cookie's backing object or cookie's->parent object are going away
    3) fscache_put_object
        -> cachefiles_put_object
          ->fscache_object_destroy
            ->fscache_cookie_put
               ->BUG_ON(atomic_read(&cookie->usage) <= 0);
[NOTE from dhowells] It's unclear as to the circumstances in which (2) can
take place, given that thread (1) is in nfs_kill_super(), however a
conflicting NFS mount with slightly different parameters that creates a
different superblock would do it.  A backtrace from Kiran seems to show
that this is a possibility:
    kernel BUG at/build/linux-Y09MKI/linux-4.4.0/fs/fscache/cookie.c:639!
    ...
    RIP: __fscache_cookie_put+0x3a/0x40 [fscache]
    Call Trace:
     __fscache_relinquish_cookie+0x87/0x120 [fscache]
     nfs_fscache_release_super_cookie+0x2d/0xb0 [nfs]
     nfs_kill_super+0x29/0x40 [nfs]
     deactivate_locked_super+0x48/0x80
     deactivate_super+0x5c/0x60
     cleanup_mnt+0x3f/0x90
     __cleanup_mnt+0x12/0x20
     task_work_run+0x86/0xb0
     exit_to_usermode_loop+0xc2/0xd0
     syscall_return_slowpath+0x4e/0x60
     int_ret_from_sys_call+0x25/0x9f
[Fix] Bump up the cookie usage in fscache_object_init, when it is first
being assigned a cookie atomically such that the cookie is added and bumped
up if its refcount is not zero.  Remove the assignment in
fscache_attach_object().
[Testcase]
I have run ~100 hours of NFS stress tests and not seen this bug recur.
[Regression Potential]
 - Limited to fscache/cachefiles.
Fixes: ccc4fc3d11 ("FS-Cache: Implement the cookie management part of the netfs API")
Signed-off-by: Kiran Kumar Modukuri <kiran.modukuri@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
		
	
			
		
			
				
	
	
		
			278 lines
		
	
	
	
		
			7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			278 lines
		
	
	
	
		
			7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* Bind and unbind a cache from the filesystem backing it
 | 
						|
 *
 | 
						|
 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
 | 
						|
 * Written by David Howells (dhowells@redhat.com)
 | 
						|
 *
 | 
						|
 * This program is free software; you can redistribute it and/or
 | 
						|
 * modify it under the terms of the GNU General Public Licence
 | 
						|
 * as published by the Free Software Foundation; either version
 | 
						|
 * 2 of the Licence, or (at your option) any later version.
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/module.h>
 | 
						|
#include <linux/init.h>
 | 
						|
#include <linux/sched.h>
 | 
						|
#include <linux/completion.h>
 | 
						|
#include <linux/slab.h>
 | 
						|
#include <linux/fs.h>
 | 
						|
#include <linux/file.h>
 | 
						|
#include <linux/namei.h>
 | 
						|
#include <linux/mount.h>
 | 
						|
#include <linux/statfs.h>
 | 
						|
#include <linux/ctype.h>
 | 
						|
#include <linux/xattr.h>
 | 
						|
#include "internal.h"
 | 
						|
 | 
						|
static int cachefiles_daemon_add_cache(struct cachefiles_cache *caches);
 | 
						|
 | 
						|
/*
 | 
						|
 * bind a directory as a cache
 | 
						|
 */
 | 
						|
int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
 | 
						|
{
 | 
						|
	_enter("{%u,%u,%u,%u,%u,%u},%s",
 | 
						|
	       cache->frun_percent,
 | 
						|
	       cache->fcull_percent,
 | 
						|
	       cache->fstop_percent,
 | 
						|
	       cache->brun_percent,
 | 
						|
	       cache->bcull_percent,
 | 
						|
	       cache->bstop_percent,
 | 
						|
	       args);
 | 
						|
 | 
						|
	/* start by checking things over */
 | 
						|
	ASSERT(cache->fstop_percent >= 0 &&
 | 
						|
	       cache->fstop_percent < cache->fcull_percent &&
 | 
						|
	       cache->fcull_percent < cache->frun_percent &&
 | 
						|
	       cache->frun_percent  < 100);
 | 
						|
 | 
						|
	ASSERT(cache->bstop_percent >= 0 &&
 | 
						|
	       cache->bstop_percent < cache->bcull_percent &&
 | 
						|
	       cache->bcull_percent < cache->brun_percent &&
 | 
						|
	       cache->brun_percent  < 100);
 | 
						|
 | 
						|
	if (*args) {
 | 
						|
		pr_err("'bind' command doesn't take an argument\n");
 | 
						|
		return -EINVAL;
 | 
						|
	}
 | 
						|
 | 
						|
	if (!cache->rootdirname) {
 | 
						|
		pr_err("No cache directory specified\n");
 | 
						|
		return -EINVAL;
 | 
						|
	}
 | 
						|
 | 
						|
	/* don't permit already bound caches to be re-bound */
 | 
						|
	if (test_bit(CACHEFILES_READY, &cache->flags)) {
 | 
						|
		pr_err("Cache already bound\n");
 | 
						|
		return -EBUSY;
 | 
						|
	}
 | 
						|
 | 
						|
	/* make sure we have copies of the tag and dirname strings */
 | 
						|
	if (!cache->tag) {
 | 
						|
		/* the tag string is released by the fops->release()
 | 
						|
		 * function, so we don't release it on error here */
 | 
						|
		cache->tag = kstrdup("CacheFiles", GFP_KERNEL);
 | 
						|
		if (!cache->tag)
 | 
						|
			return -ENOMEM;
 | 
						|
	}
 | 
						|
 | 
						|
	/* add the cache */
 | 
						|
	return cachefiles_daemon_add_cache(cache);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * add a cache
 | 
						|
 */
 | 
						|
static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
 | 
						|
{
 | 
						|
	struct cachefiles_object *fsdef;
 | 
						|
	struct path path;
 | 
						|
	struct kstatfs stats;
 | 
						|
	struct dentry *graveyard, *cachedir, *root;
 | 
						|
	const struct cred *saved_cred;
 | 
						|
	int ret;
 | 
						|
 | 
						|
	_enter("");
 | 
						|
 | 
						|
	/* we want to work under the module's security ID */
 | 
						|
	ret = cachefiles_get_security_ID(cache);
 | 
						|
	if (ret < 0)
 | 
						|
		return ret;
 | 
						|
 | 
						|
	cachefiles_begin_secure(cache, &saved_cred);
 | 
						|
 | 
						|
	/* allocate the root index object */
 | 
						|
	ret = -ENOMEM;
 | 
						|
 | 
						|
	fsdef = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL);
 | 
						|
	if (!fsdef)
 | 
						|
		goto error_root_object;
 | 
						|
 | 
						|
	ASSERTCMP(fsdef->backer, ==, NULL);
 | 
						|
 | 
						|
	atomic_set(&fsdef->usage, 1);
 | 
						|
	fsdef->type = FSCACHE_COOKIE_TYPE_INDEX;
 | 
						|
 | 
						|
	_debug("- fsdef %p", fsdef);
 | 
						|
 | 
						|
	/* look up the directory at the root of the cache */
 | 
						|
	ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path);
 | 
						|
	if (ret < 0)
 | 
						|
		goto error_open_root;
 | 
						|
 | 
						|
	cache->mnt = path.mnt;
 | 
						|
	root = path.dentry;
 | 
						|
 | 
						|
	/* check parameters */
 | 
						|
	ret = -EOPNOTSUPP;
 | 
						|
	if (d_is_negative(root) ||
 | 
						|
	    !d_backing_inode(root)->i_op->lookup ||
 | 
						|
	    !d_backing_inode(root)->i_op->mkdir ||
 | 
						|
	    !(d_backing_inode(root)->i_opflags & IOP_XATTR) ||
 | 
						|
	    !root->d_sb->s_op->statfs ||
 | 
						|
	    !root->d_sb->s_op->sync_fs)
 | 
						|
		goto error_unsupported;
 | 
						|
 | 
						|
	ret = -EROFS;
 | 
						|
	if (sb_rdonly(root->d_sb))
 | 
						|
		goto error_unsupported;
 | 
						|
 | 
						|
	/* determine the security of the on-disk cache as this governs
 | 
						|
	 * security ID of files we create */
 | 
						|
	ret = cachefiles_determine_cache_security(cache, root, &saved_cred);
 | 
						|
	if (ret < 0)
 | 
						|
		goto error_unsupported;
 | 
						|
 | 
						|
	/* get the cache size and blocksize */
 | 
						|
	ret = vfs_statfs(&path, &stats);
 | 
						|
	if (ret < 0)
 | 
						|
		goto error_unsupported;
 | 
						|
 | 
						|
	ret = -ERANGE;
 | 
						|
	if (stats.f_bsize <= 0)
 | 
						|
		goto error_unsupported;
 | 
						|
 | 
						|
	ret = -EOPNOTSUPP;
 | 
						|
	if (stats.f_bsize > PAGE_SIZE)
 | 
						|
		goto error_unsupported;
 | 
						|
 | 
						|
	cache->bsize = stats.f_bsize;
 | 
						|
	cache->bshift = 0;
 | 
						|
	if (stats.f_bsize < PAGE_SIZE)
 | 
						|
		cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize);
 | 
						|
 | 
						|
	_debug("blksize %u (shift %u)",
 | 
						|
	       cache->bsize, cache->bshift);
 | 
						|
 | 
						|
	_debug("size %llu, avail %llu",
 | 
						|
	       (unsigned long long) stats.f_blocks,
 | 
						|
	       (unsigned long long) stats.f_bavail);
 | 
						|
 | 
						|
	/* set up caching limits */
 | 
						|
	do_div(stats.f_files, 100);
 | 
						|
	cache->fstop = stats.f_files * cache->fstop_percent;
 | 
						|
	cache->fcull = stats.f_files * cache->fcull_percent;
 | 
						|
	cache->frun  = stats.f_files * cache->frun_percent;
 | 
						|
 | 
						|
	_debug("limits {%llu,%llu,%llu} files",
 | 
						|
	       (unsigned long long) cache->frun,
 | 
						|
	       (unsigned long long) cache->fcull,
 | 
						|
	       (unsigned long long) cache->fstop);
 | 
						|
 | 
						|
	stats.f_blocks >>= cache->bshift;
 | 
						|
	do_div(stats.f_blocks, 100);
 | 
						|
	cache->bstop = stats.f_blocks * cache->bstop_percent;
 | 
						|
	cache->bcull = stats.f_blocks * cache->bcull_percent;
 | 
						|
	cache->brun  = stats.f_blocks * cache->brun_percent;
 | 
						|
 | 
						|
	_debug("limits {%llu,%llu,%llu} blocks",
 | 
						|
	       (unsigned long long) cache->brun,
 | 
						|
	       (unsigned long long) cache->bcull,
 | 
						|
	       (unsigned long long) cache->bstop);
 | 
						|
 | 
						|
	/* get the cache directory and check its type */
 | 
						|
	cachedir = cachefiles_get_directory(cache, root, "cache");
 | 
						|
	if (IS_ERR(cachedir)) {
 | 
						|
		ret = PTR_ERR(cachedir);
 | 
						|
		goto error_unsupported;
 | 
						|
	}
 | 
						|
 | 
						|
	fsdef->dentry = cachedir;
 | 
						|
	fsdef->fscache.cookie = NULL;
 | 
						|
 | 
						|
	ret = cachefiles_check_object_type(fsdef);
 | 
						|
	if (ret < 0)
 | 
						|
		goto error_unsupported;
 | 
						|
 | 
						|
	/* get the graveyard directory */
 | 
						|
	graveyard = cachefiles_get_directory(cache, root, "graveyard");
 | 
						|
	if (IS_ERR(graveyard)) {
 | 
						|
		ret = PTR_ERR(graveyard);
 | 
						|
		goto error_unsupported;
 | 
						|
	}
 | 
						|
 | 
						|
	cache->graveyard = graveyard;
 | 
						|
 | 
						|
	/* publish the cache */
 | 
						|
	fscache_init_cache(&cache->cache,
 | 
						|
			   &cachefiles_cache_ops,
 | 
						|
			   "%s",
 | 
						|
			   fsdef->dentry->d_sb->s_id);
 | 
						|
 | 
						|
	fscache_object_init(&fsdef->fscache, &fscache_fsdef_index,
 | 
						|
			    &cache->cache);
 | 
						|
 | 
						|
	ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag);
 | 
						|
	if (ret < 0)
 | 
						|
		goto error_add_cache;
 | 
						|
 | 
						|
	/* done */
 | 
						|
	set_bit(CACHEFILES_READY, &cache->flags);
 | 
						|
	dput(root);
 | 
						|
 | 
						|
	pr_info("File cache on %s registered\n", cache->cache.identifier);
 | 
						|
 | 
						|
	/* check how much space the cache has */
 | 
						|
	cachefiles_has_space(cache, 0, 0);
 | 
						|
	cachefiles_end_secure(cache, saved_cred);
 | 
						|
	return 0;
 | 
						|
 | 
						|
error_add_cache:
 | 
						|
	dput(cache->graveyard);
 | 
						|
	cache->graveyard = NULL;
 | 
						|
error_unsupported:
 | 
						|
	mntput(cache->mnt);
 | 
						|
	cache->mnt = NULL;
 | 
						|
	dput(fsdef->dentry);
 | 
						|
	fsdef->dentry = NULL;
 | 
						|
	dput(root);
 | 
						|
error_open_root:
 | 
						|
	kmem_cache_free(cachefiles_object_jar, fsdef);
 | 
						|
error_root_object:
 | 
						|
	cachefiles_end_secure(cache, saved_cred);
 | 
						|
	pr_err("Failed to register: %d\n", ret);
 | 
						|
	return ret;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * unbind a cache on fd release
 | 
						|
 */
 | 
						|
void cachefiles_daemon_unbind(struct cachefiles_cache *cache)
 | 
						|
{
 | 
						|
	_enter("");
 | 
						|
 | 
						|
	if (test_bit(CACHEFILES_READY, &cache->flags)) {
 | 
						|
		pr_info("File cache on %s unregistering\n",
 | 
						|
			cache->cache.identifier);
 | 
						|
 | 
						|
		fscache_withdraw_cache(&cache->cache);
 | 
						|
	}
 | 
						|
 | 
						|
	dput(cache->graveyard);
 | 
						|
	mntput(cache->mnt);
 | 
						|
 | 
						|
	kfree(cache->rootdirname);
 | 
						|
	kfree(cache->secctx);
 | 
						|
	kfree(cache->tag);
 | 
						|
 | 
						|
	_leave("");
 | 
						|
}
 |