forked from mirrors/linux
		
	mm/z3fold.c: fix race between migration and destruction
In z3fold_destroy_pool() we call destroy_workqueue(&pool->compact_wq). However, we have no guarantee that migration isn't happening in the background at that time. Migration directly calls queue_work_on(pool->compact_wq), if destruction wins that race we are using a destroyed workqueue. Link: http://lkml.kernel.org/r/20190809213828.202833-1-henryburns@google.com Signed-off-by: Henry Burns <henryburns@google.com> Cc: Vitaly Wool <vitalywool@gmail.com> Cc: Shakeel Butt <shakeelb@google.com> Cc: Jonathan Adams <jwadams@google.com> Cc: Henry Burns <henrywolfeburns@gmail.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									361469211f
								
							
						
					
					
						commit
						d776aaa989
					
				
					 1 changed files with 89 additions and 0 deletions
				
			
		
							
								
								
									
										89
									
								
								mm/z3fold.c
									
									
									
									
									
								
							
							
						
						
									
										89
									
								
								mm/z3fold.c
									
									
									
									
									
								
							| 
						 | 
					@ -41,6 +41,7 @@
 | 
				
			||||||
#include <linux/workqueue.h>
 | 
					#include <linux/workqueue.h>
 | 
				
			||||||
#include <linux/slab.h>
 | 
					#include <linux/slab.h>
 | 
				
			||||||
#include <linux/spinlock.h>
 | 
					#include <linux/spinlock.h>
 | 
				
			||||||
 | 
					#include <linux/wait.h>
 | 
				
			||||||
#include <linux/zpool.h>
 | 
					#include <linux/zpool.h>
 | 
				
			||||||
#include <linux/magic.h>
 | 
					#include <linux/magic.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -145,6 +146,8 @@ struct z3fold_header {
 | 
				
			||||||
 * @release_wq:	workqueue for safe page release
 | 
					 * @release_wq:	workqueue for safe page release
 | 
				
			||||||
 * @work:	work_struct for safe page release
 | 
					 * @work:	work_struct for safe page release
 | 
				
			||||||
 * @inode:	inode for z3fold pseudo filesystem
 | 
					 * @inode:	inode for z3fold pseudo filesystem
 | 
				
			||||||
 | 
					 * @destroying: bool to stop migration once we start destruction
 | 
				
			||||||
 | 
					 * @isolated: int to count the number of pages currently in isolation
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * This structure is allocated at pool creation time and maintains metadata
 | 
					 * This structure is allocated at pool creation time and maintains metadata
 | 
				
			||||||
 * pertaining to a particular z3fold pool.
 | 
					 * pertaining to a particular z3fold pool.
 | 
				
			||||||
| 
						 | 
					@ -163,8 +166,11 @@ struct z3fold_pool {
 | 
				
			||||||
	const struct zpool_ops *zpool_ops;
 | 
						const struct zpool_ops *zpool_ops;
 | 
				
			||||||
	struct workqueue_struct *compact_wq;
 | 
						struct workqueue_struct *compact_wq;
 | 
				
			||||||
	struct workqueue_struct *release_wq;
 | 
						struct workqueue_struct *release_wq;
 | 
				
			||||||
 | 
						struct wait_queue_head isolate_wait;
 | 
				
			||||||
	struct work_struct work;
 | 
						struct work_struct work;
 | 
				
			||||||
	struct inode *inode;
 | 
						struct inode *inode;
 | 
				
			||||||
 | 
						bool destroying;
 | 
				
			||||||
 | 
						int isolated;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					@ -769,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
 | 
				
			||||||
		goto out_c;
 | 
							goto out_c;
 | 
				
			||||||
	spin_lock_init(&pool->lock);
 | 
						spin_lock_init(&pool->lock);
 | 
				
			||||||
	spin_lock_init(&pool->stale_lock);
 | 
						spin_lock_init(&pool->stale_lock);
 | 
				
			||||||
 | 
						init_waitqueue_head(&pool->isolate_wait);
 | 
				
			||||||
	pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
 | 
						pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
 | 
				
			||||||
	if (!pool->unbuddied)
 | 
						if (!pool->unbuddied)
 | 
				
			||||||
		goto out_pool;
 | 
							goto out_pool;
 | 
				
			||||||
| 
						 | 
					@ -808,6 +815,15 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
 | 
				
			||||||
	return NULL;
 | 
						return NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static bool pool_isolated_are_drained(struct z3fold_pool *pool)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						bool ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock(&pool->lock);
 | 
				
			||||||
 | 
						ret = pool->isolated == 0;
 | 
				
			||||||
 | 
						spin_unlock(&pool->lock);
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * z3fold_destroy_pool() - destroys an existing z3fold pool
 | 
					 * z3fold_destroy_pool() - destroys an existing z3fold pool
 | 
				
			||||||
 * @pool:	the z3fold pool to be destroyed
 | 
					 * @pool:	the z3fold pool to be destroyed
 | 
				
			||||||
| 
						 | 
					@ -817,6 +833,22 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
 | 
				
			||||||
static void z3fold_destroy_pool(struct z3fold_pool *pool)
 | 
					static void z3fold_destroy_pool(struct z3fold_pool *pool)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	kmem_cache_destroy(pool->c_handle);
 | 
						kmem_cache_destroy(pool->c_handle);
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * We set pool-> destroying under lock to ensure that
 | 
				
			||||||
 | 
						 * z3fold_page_isolate() sees any changes to destroying. This way we
 | 
				
			||||||
 | 
						 * avoid the need for any memory barriers.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock(&pool->lock);
 | 
				
			||||||
 | 
						pool->destroying = true;
 | 
				
			||||||
 | 
						spin_unlock(&pool->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * We need to ensure that no pages are being migrated while we destroy
 | 
				
			||||||
 | 
						 * these workqueues, as migration can queue work on either of the
 | 
				
			||||||
 | 
						 * workqueues.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * We need to destroy pool->compact_wq before pool->release_wq,
 | 
						 * We need to destroy pool->compact_wq before pool->release_wq,
 | 
				
			||||||
| 
						 | 
					@ -1307,6 +1339,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
 | 
				
			||||||
	return atomic64_read(&pool->pages_nr);
 | 
						return atomic64_read(&pool->pages_nr);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * z3fold_dec_isolated() expects to be called while pool->lock is held.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static void z3fold_dec_isolated(struct z3fold_pool *pool)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						assert_spin_locked(&pool->lock);
 | 
				
			||||||
 | 
						VM_BUG_ON(pool->isolated <= 0);
 | 
				
			||||||
 | 
						pool->isolated--;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * If we have no more isolated pages, we have to see if
 | 
				
			||||||
 | 
						 * z3fold_destroy_pool() is waiting for a signal.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait))
 | 
				
			||||||
 | 
							wake_up_all(&pool->isolate_wait);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void z3fold_inc_isolated(struct z3fold_pool *pool)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						pool->isolated++;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
 | 
					static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct z3fold_header *zhdr;
 | 
						struct z3fold_header *zhdr;
 | 
				
			||||||
| 
						 | 
					@ -1333,6 +1387,33 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
 | 
				
			||||||
		spin_lock(&pool->lock);
 | 
							spin_lock(&pool->lock);
 | 
				
			||||||
		if (!list_empty(&page->lru))
 | 
							if (!list_empty(&page->lru))
 | 
				
			||||||
			list_del(&page->lru);
 | 
								list_del(&page->lru);
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * We need to check for destruction while holding pool->lock, as
 | 
				
			||||||
 | 
							 * otherwise destruction could see 0 isolated pages, and
 | 
				
			||||||
 | 
							 * proceed.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (unlikely(pool->destroying)) {
 | 
				
			||||||
 | 
								spin_unlock(&pool->lock);
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * If this page isn't stale, somebody else holds a
 | 
				
			||||||
 | 
								 * reference to it. Let't drop our refcount so that they
 | 
				
			||||||
 | 
								 * can call the release logic.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								if (unlikely(kref_put(&zhdr->refcount,
 | 
				
			||||||
 | 
										      release_z3fold_page_locked))) {
 | 
				
			||||||
 | 
									/*
 | 
				
			||||||
 | 
									 * If we get here we have kref problems, so we
 | 
				
			||||||
 | 
									 * should freak out.
 | 
				
			||||||
 | 
									 */
 | 
				
			||||||
 | 
									WARN(1, "Z3fold is experiencing kref problems\n");
 | 
				
			||||||
 | 
									return false;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								z3fold_page_unlock(zhdr);
 | 
				
			||||||
 | 
								return false;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z3fold_inc_isolated(pool);
 | 
				
			||||||
		spin_unlock(&pool->lock);
 | 
							spin_unlock(&pool->lock);
 | 
				
			||||||
		z3fold_page_unlock(zhdr);
 | 
							z3fold_page_unlock(zhdr);
 | 
				
			||||||
		return true;
 | 
							return true;
 | 
				
			||||||
| 
						 | 
					@ -1401,6 +1482,10 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
 | 
						queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock(&pool->lock);
 | 
				
			||||||
 | 
						z3fold_dec_isolated(pool);
 | 
				
			||||||
 | 
						spin_unlock(&pool->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	page_mapcount_reset(page);
 | 
						page_mapcount_reset(page);
 | 
				
			||||||
	put_page(page);
 | 
						put_page(page);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
| 
						 | 
					@ -1420,10 +1505,14 @@ static void z3fold_page_putback(struct page *page)
 | 
				
			||||||
	INIT_LIST_HEAD(&page->lru);
 | 
						INIT_LIST_HEAD(&page->lru);
 | 
				
			||||||
	if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
 | 
						if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
 | 
				
			||||||
		atomic64_dec(&pool->pages_nr);
 | 
							atomic64_dec(&pool->pages_nr);
 | 
				
			||||||
 | 
							spin_lock(&pool->lock);
 | 
				
			||||||
 | 
							z3fold_dec_isolated(pool);
 | 
				
			||||||
 | 
							spin_unlock(&pool->lock);
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	spin_lock(&pool->lock);
 | 
						spin_lock(&pool->lock);
 | 
				
			||||||
	list_add(&page->lru, &pool->lru);
 | 
						list_add(&page->lru, &pool->lru);
 | 
				
			||||||
 | 
						z3fold_dec_isolated(pool);
 | 
				
			||||||
	spin_unlock(&pool->lock);
 | 
						spin_unlock(&pool->lock);
 | 
				
			||||||
	z3fold_page_unlock(zhdr);
 | 
						z3fold_page_unlock(zhdr);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue