forked from mirrors/linux
		
	slab: remove synchronous rcu_barrier() call in memcg cache release path
With kmem cgroup support enabled, kmem_caches can be created and destroyed frequently and a great number of near empty kmem_caches can accumulate if there are a lot of transient cgroups and the system is not under memory pressure. When memory reclaim starts under such conditions, it can lead to consecutive deactivation and destruction of many kmem_caches, easily hundreds of thousands on moderately large systems, exposing scalability issues in the current slab management code. This is one of the patches to address the issue. SLAB_DESTORY_BY_RCU caches need to flush all RCU operations before destruction because slab pages are freed through RCU and they need to be able to dereference the associated kmem_cache. Currently, it's done synchronously with rcu_barrier(). As rcu_barrier() is expensive time-wise, slab implements a batching mechanism so that rcu_barrier() can be done for multiple caches at the same time. Unfortunately, the rcu_barrier() is in synchronous path which is called while holding cgroup_mutex and the batching is too limited to be actually helpful. This patch updates the cache release path so that the batching is asynchronous and global. All SLAB_DESTORY_BY_RCU caches are queued globally and a work item consumes the list. The work item calls rcu_barrier() only once for all caches that are currently queued. * release_caches() is removed and shutdown_cache() now either directly release the cache or schedules a RCU callback to do that. This makes the cache inaccessible once shutdown_cache() is called and makes it impossible for shutdown_memcg_caches() to do memcg-specific cleanups afterwards. Move memcg-specific part into a helper, unlink_memcg_cache(), and make shutdown_cache() call it directly. Link: http://lkml.kernel.org/r/20170117235411.9408-4-tj@kernel.org Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Jay Vana <jsvana@fb.com> Acked-by: Vladimir Davydov <vdavydov@tarantool.org> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									bf5eb3de38
								
							
						
					
					
						commit
						657dc2f972
					
				
					 1 changed files with 64 additions and 46 deletions
				
			
		
							
								
								
									
										110
									
								
								mm/slab_common.c
									
									
									
									
									
								
							
							
						
						
									
										110
									
								
								mm/slab_common.c
									
									
									
									
									
								
							|  | @ -30,6 +30,11 @@ LIST_HEAD(slab_caches); | |||
| DEFINE_MUTEX(slab_mutex); | ||||
| struct kmem_cache *kmem_cache; | ||||
| 
 | ||||
| static LIST_HEAD(slab_caches_to_rcu_destroy); | ||||
| static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work); | ||||
| static DECLARE_WORK(slab_caches_to_rcu_destroy_work, | ||||
| 		    slab_caches_to_rcu_destroy_workfn); | ||||
| 
 | ||||
| /*
 | ||||
|  * Set of flags that will prevent slab merging | ||||
|  */ | ||||
|  | @ -215,6 +220,11 @@ int memcg_update_all_caches(int num_memcgs) | |||
| 	mutex_unlock(&slab_mutex); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static void unlink_memcg_cache(struct kmem_cache *s) | ||||
| { | ||||
| 	list_del(&s->memcg_params.list); | ||||
| } | ||||
| #else | ||||
| static inline int init_memcg_params(struct kmem_cache *s, | ||||
| 		struct mem_cgroup *memcg, struct kmem_cache *root_cache) | ||||
|  | @ -225,6 +235,10 @@ static inline int init_memcg_params(struct kmem_cache *s, | |||
| static inline void destroy_memcg_params(struct kmem_cache *s) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| static inline void unlink_memcg_cache(struct kmem_cache *s) | ||||
| { | ||||
| } | ||||
| #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -461,27 +475,30 @@ kmem_cache_create(const char *name, size_t size, size_t align, | |||
| } | ||||
| EXPORT_SYMBOL(kmem_cache_create); | ||||
| 
 | ||||
| static int shutdown_cache(struct kmem_cache *s, | ||||
| 		struct list_head *release, bool *need_rcu_barrier) | ||||
| { | ||||
| 	if (__kmem_cache_shutdown(s) != 0) | ||||
| 		return -EBUSY; | ||||
| 
 | ||||
| 	if (s->flags & SLAB_DESTROY_BY_RCU) | ||||
| 		*need_rcu_barrier = true; | ||||
| 
 | ||||
| 	list_move(&s->list, release); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void release_caches(struct list_head *release, bool need_rcu_barrier) | ||||
| static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) | ||||
| { | ||||
| 	LIST_HEAD(to_destroy); | ||||
| 	struct kmem_cache *s, *s2; | ||||
| 
 | ||||
| 	if (need_rcu_barrier) | ||||
| 		rcu_barrier(); | ||||
| 	/*
 | ||||
| 	 * On destruction, SLAB_DESTROY_BY_RCU kmem_caches are put on the | ||||
| 	 * @slab_caches_to_rcu_destroy list.  The slab pages are freed | ||||
| 	 * through RCU and and the associated kmem_cache are dereferenced | ||||
| 	 * while freeing the pages, so the kmem_caches should be freed only | ||||
| 	 * after the pending RCU operations are finished.  As rcu_barrier() | ||||
| 	 * is a pretty slow operation, we batch all pending destructions | ||||
| 	 * asynchronously. | ||||
| 	 */ | ||||
| 	mutex_lock(&slab_mutex); | ||||
| 	list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy); | ||||
| 	mutex_unlock(&slab_mutex); | ||||
| 
 | ||||
| 	list_for_each_entry_safe(s, s2, release, list) { | ||||
| 	if (list_empty(&to_destroy)) | ||||
| 		return; | ||||
| 
 | ||||
| 	rcu_barrier(); | ||||
| 
 | ||||
| 	list_for_each_entry_safe(s, s2, &to_destroy, list) { | ||||
| #ifdef SLAB_SUPPORTS_SYSFS | ||||
| 		sysfs_slab_release(s); | ||||
| #else | ||||
|  | @ -490,6 +507,29 @@ static void release_caches(struct list_head *release, bool need_rcu_barrier) | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| static int shutdown_cache(struct kmem_cache *s) | ||||
| { | ||||
| 	if (__kmem_cache_shutdown(s) != 0) | ||||
| 		return -EBUSY; | ||||
| 
 | ||||
| 	list_del(&s->list); | ||||
| 	if (!is_root_cache(s)) | ||||
| 		unlink_memcg_cache(s); | ||||
| 
 | ||||
| 	if (s->flags & SLAB_DESTROY_BY_RCU) { | ||||
| 		list_add_tail(&s->list, &slab_caches_to_rcu_destroy); | ||||
| 		schedule_work(&slab_caches_to_rcu_destroy_work); | ||||
| 	} else { | ||||
| #ifdef SLAB_SUPPORTS_SYSFS | ||||
| 		sysfs_slab_release(s); | ||||
| #else | ||||
| 		slab_kmem_cache_release(s); | ||||
| #endif | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) | ||||
| /*
 | ||||
|  * memcg_create_kmem_cache - Create a cache for a memory cgroup. | ||||
|  | @ -602,22 +642,8 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) | |||
| 	put_online_cpus(); | ||||
| } | ||||
| 
 | ||||
| static int __shutdown_memcg_cache(struct kmem_cache *s, | ||||
| 		struct list_head *release, bool *need_rcu_barrier) | ||||
| { | ||||
| 	BUG_ON(is_root_cache(s)); | ||||
| 
 | ||||
| 	if (shutdown_cache(s, release, need_rcu_barrier)) | ||||
| 		return -EBUSY; | ||||
| 
 | ||||
| 	list_del(&s->memcg_params.list); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) | ||||
| { | ||||
| 	LIST_HEAD(release); | ||||
| 	bool need_rcu_barrier = false; | ||||
| 	struct kmem_cache *s, *s2; | ||||
| 
 | ||||
| 	get_online_cpus(); | ||||
|  | @ -631,18 +657,15 @@ void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) | |||
| 		 * The cgroup is about to be freed and therefore has no charges | ||||
| 		 * left. Hence, all its caches must be empty by now. | ||||
| 		 */ | ||||
| 		BUG_ON(__shutdown_memcg_cache(s, &release, &need_rcu_barrier)); | ||||
| 		BUG_ON(shutdown_cache(s)); | ||||
| 	} | ||||
| 	mutex_unlock(&slab_mutex); | ||||
| 
 | ||||
| 	put_online_mems(); | ||||
| 	put_online_cpus(); | ||||
| 
 | ||||
| 	release_caches(&release, need_rcu_barrier); | ||||
| } | ||||
| 
 | ||||
| static int shutdown_memcg_caches(struct kmem_cache *s, | ||||
| 		struct list_head *release, bool *need_rcu_barrier) | ||||
| static int shutdown_memcg_caches(struct kmem_cache *s) | ||||
| { | ||||
| 	struct memcg_cache_array *arr; | ||||
| 	struct kmem_cache *c, *c2; | ||||
|  | @ -661,7 +684,7 @@ static int shutdown_memcg_caches(struct kmem_cache *s, | |||
| 		c = arr->entries[i]; | ||||
| 		if (!c) | ||||
| 			continue; | ||||
| 		if (__shutdown_memcg_cache(c, release, need_rcu_barrier)) | ||||
| 		if (shutdown_cache(c)) | ||||
| 			/*
 | ||||
| 			 * The cache still has objects. Move it to a temporary | ||||
| 			 * list so as not to try to destroy it for a second | ||||
|  | @ -684,7 +707,7 @@ static int shutdown_memcg_caches(struct kmem_cache *s, | |||
| 	 */ | ||||
| 	list_for_each_entry_safe(c, c2, &s->memcg_params.list, | ||||
| 				 memcg_params.list) | ||||
| 		__shutdown_memcg_cache(c, release, need_rcu_barrier); | ||||
| 		shutdown_cache(c); | ||||
| 
 | ||||
| 	list_splice(&busy, &s->memcg_params.list); | ||||
| 
 | ||||
|  | @ -697,8 +720,7 @@ static int shutdown_memcg_caches(struct kmem_cache *s, | |||
| 	return 0; | ||||
| } | ||||
| #else | ||||
| static inline int shutdown_memcg_caches(struct kmem_cache *s, | ||||
| 		struct list_head *release, bool *need_rcu_barrier) | ||||
| static inline int shutdown_memcg_caches(struct kmem_cache *s) | ||||
| { | ||||
| 	return 0; | ||||
| } | ||||
|  | @ -714,8 +736,6 @@ void slab_kmem_cache_release(struct kmem_cache *s) | |||
| 
 | ||||
| void kmem_cache_destroy(struct kmem_cache *s) | ||||
| { | ||||
| 	LIST_HEAD(release); | ||||
| 	bool need_rcu_barrier = false; | ||||
| 	int err; | ||||
| 
 | ||||
| 	if (unlikely(!s)) | ||||
|  | @ -731,9 +751,9 @@ void kmem_cache_destroy(struct kmem_cache *s) | |||
| 	if (s->refcount) | ||||
| 		goto out_unlock; | ||||
| 
 | ||||
| 	err = shutdown_memcg_caches(s, &release, &need_rcu_barrier); | ||||
| 	err = shutdown_memcg_caches(s); | ||||
| 	if (!err) | ||||
| 		err = shutdown_cache(s, &release, &need_rcu_barrier); | ||||
| 		err = shutdown_cache(s); | ||||
| 
 | ||||
| 	if (err) { | ||||
| 		pr_err("kmem_cache_destroy %s: Slab cache still has objects\n", | ||||
|  | @ -745,8 +765,6 @@ void kmem_cache_destroy(struct kmem_cache *s) | |||
| 
 | ||||
| 	put_online_mems(); | ||||
| 	put_online_cpus(); | ||||
| 
 | ||||
| 	release_caches(&release, need_rcu_barrier); | ||||
| } | ||||
| EXPORT_SYMBOL(kmem_cache_destroy); | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Tejun Heo
						Tejun Heo