forked from mirrors/linux
		
	vmscan: per memory cgroup slab shrinkers
This patch adds SHRINKER_MEMCG_AWARE flag. If a shrinker has this flag set, it will be called per memory cgroup. The memory cgroup to scan objects from is passed in shrink_control->memcg. If the memory cgroup is NULL, a memcg aware shrinker is supposed to scan objects from the global list. Unaware shrinkers are only called on global pressure with memcg=NULL. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Greg Thelen <gthelen@google.com> Cc: Glauber Costa <glommer@gmail.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									4101b62435
								
							
						
					
					
						commit
						cb731d6c62
					
				
					 7 changed files with 79 additions and 49 deletions
				
			
		|  | @ -37,20 +37,6 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) | |||
| 	iput(toput_inode); | ||||
| } | ||||
| 
 | ||||
| static void drop_slab(void) | ||||
| { | ||||
| 	int nr_objects; | ||||
| 
 | ||||
| 	do { | ||||
| 		int nid; | ||||
| 
 | ||||
| 		nr_objects = 0; | ||||
| 		for_each_online_node(nid) | ||||
| 			nr_objects += shrink_node_slabs(GFP_KERNEL, nid, | ||||
| 							1000, 1000); | ||||
| 	} while (nr_objects > 10); | ||||
| } | ||||
| 
 | ||||
| int drop_caches_sysctl_handler(struct ctl_table *table, int write, | ||||
| 	void __user *buffer, size_t *length, loff_t *ppos) | ||||
| { | ||||
|  |  | |||
|  | @ -413,6 +413,8 @@ static inline bool memcg_kmem_enabled(void) | |||
| 	return static_key_false(&memcg_kmem_enabled_key); | ||||
| } | ||||
| 
 | ||||
| bool memcg_kmem_is_active(struct mem_cgroup *memcg); | ||||
| 
 | ||||
| /*
 | ||||
|  * In general, we'll do everything in our power to not incur in any overhead | ||||
|  * for non-memcg users for the kmem functions. Not even a function call, if we | ||||
|  | @ -542,6 +544,11 @@ static inline bool memcg_kmem_enabled(void) | |||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) | ||||
| { | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| static inline bool | ||||
| memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) | ||||
| { | ||||
|  |  | |||
|  | @ -2168,9 +2168,8 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, | |||
| 					void __user *, size_t *, loff_t *); | ||||
| #endif | ||||
| 
 | ||||
| unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, | ||||
| 				unsigned long nr_scanned, | ||||
| 				unsigned long nr_eligible); | ||||
| void drop_slab(void); | ||||
| void drop_slab_node(int nid); | ||||
| 
 | ||||
| #ifndef CONFIG_MMU | ||||
| #define randomize_va_space 0 | ||||
|  |  | |||
|  | @ -20,6 +20,9 @@ struct shrink_control { | |||
| 
 | ||||
| 	/* current node being shrunk (for NUMA aware shrinkers) */ | ||||
| 	int nid; | ||||
| 
 | ||||
| 	/* current memcg being shrunk (for memcg aware shrinkers) */ | ||||
| 	struct mem_cgroup *memcg; | ||||
| }; | ||||
| 
 | ||||
| #define SHRINK_STOP (~0UL) | ||||
|  | @ -62,6 +65,7 @@ struct shrinker { | |||
| 
 | ||||
| /* Flags */ | ||||
| #define SHRINKER_NUMA_AWARE	(1 << 0) | ||||
| #define SHRINKER_MEMCG_AWARE	(1 << 1) | ||||
| 
 | ||||
| extern int register_shrinker(struct shrinker *); | ||||
| extern void unregister_shrinker(struct shrinker *); | ||||
|  |  | |||
|  | @ -352,7 +352,7 @@ struct mem_cgroup { | |||
| }; | ||||
| 
 | ||||
| #ifdef CONFIG_MEMCG_KMEM | ||||
| static bool memcg_kmem_is_active(struct mem_cgroup *memcg) | ||||
| bool memcg_kmem_is_active(struct mem_cgroup *memcg) | ||||
| { | ||||
| 	return memcg->kmemcg_id >= 0; | ||||
| } | ||||
|  |  | |||
|  | @ -242,15 +242,8 @@ void shake_page(struct page *p, int access) | |||
| 	 * Only call shrink_node_slabs here (which would also shrink | ||||
| 	 * other caches) if access is not potentially fatal. | ||||
| 	 */ | ||||
| 	if (access) { | ||||
| 		int nr; | ||||
| 		int nid = page_to_nid(p); | ||||
| 		do { | ||||
| 			nr = shrink_node_slabs(GFP_KERNEL, nid, 1000, 1000); | ||||
| 			if (page_count(p) == 1) | ||||
| 				break; | ||||
| 		} while (nr > 10); | ||||
| 	} | ||||
| 	if (access) | ||||
| 		drop_slab_node(page_to_nid(p)); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(shake_page); | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										65
									
								
								mm/vmscan.c
									
									
									
									
									
								
							
							
						
						
									
										65
									
								
								mm/vmscan.c
									
									
									
									
									
								
							|  | @ -232,7 +232,7 @@ EXPORT_SYMBOL(unregister_shrinker); | |||
| 
 | ||||
| #define SHRINK_BATCH 128 | ||||
| 
 | ||||
| static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | ||||
| static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, | ||||
| 				    struct shrinker *shrinker, | ||||
| 				    unsigned long nr_scanned, | ||||
| 				    unsigned long nr_eligible) | ||||
|  | @ -344,9 +344,10 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | |||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * shrink_node_slabs - shrink slab caches of a given node | ||||
|  * shrink_slab - shrink slab caches | ||||
|  * @gfp_mask: allocation context | ||||
|  * @nid: node whose slab caches to target | ||||
|  * @memcg: memory cgroup whose slab caches to target | ||||
|  * @nr_scanned: pressure numerator | ||||
|  * @nr_eligible: pressure denominator | ||||
|  * | ||||
|  | @ -355,6 +356,12 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | |||
|  * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, | ||||
|  * unaware shrinkers will receive a node id of 0 instead. | ||||
|  * | ||||
|  * @memcg specifies the memory cgroup to target. If it is not NULL, | ||||
|  * only shrinkers with SHRINKER_MEMCG_AWARE set will be called to scan | ||||
|  * objects from the memory cgroup specified. Otherwise all shrinkers | ||||
|  * are called, and memcg aware shrinkers are supposed to scan the | ||||
|  * global list then. | ||||
|  * | ||||
|  * @nr_scanned and @nr_eligible form a ratio that indicate how much of | ||||
|  * the available objects should be scanned.  Page reclaim for example | ||||
|  * passes the number of pages scanned and the number of pages on the | ||||
|  | @ -365,13 +372,17 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | |||
|  * | ||||
|  * Returns the number of reclaimed slab objects. | ||||
|  */ | ||||
| unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, | ||||
| static unsigned long shrink_slab(gfp_t gfp_mask, int nid, | ||||
| 				 struct mem_cgroup *memcg, | ||||
| 				 unsigned long nr_scanned, | ||||
| 				 unsigned long nr_eligible) | ||||
| { | ||||
| 	struct shrinker *shrinker; | ||||
| 	unsigned long freed = 0; | ||||
| 
 | ||||
| 	if (memcg && !memcg_kmem_is_active(memcg)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	if (nr_scanned == 0) | ||||
| 		nr_scanned = SWAP_CLUSTER_MAX; | ||||
| 
 | ||||
|  | @ -390,12 +401,16 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, | |||
| 		struct shrink_control sc = { | ||||
| 			.gfp_mask = gfp_mask, | ||||
| 			.nid = nid, | ||||
| 			.memcg = memcg, | ||||
| 		}; | ||||
| 
 | ||||
| 		if (memcg && !(shrinker->flags & SHRINKER_MEMCG_AWARE)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) | ||||
| 			sc.nid = 0; | ||||
| 
 | ||||
| 		freed += shrink_slabs(&sc, shrinker, nr_scanned, nr_eligible); | ||||
| 		freed += do_shrink_slab(&sc, shrinker, nr_scanned, nr_eligible); | ||||
| 	} | ||||
| 
 | ||||
| 	up_read(&shrinker_rwsem); | ||||
|  | @ -404,6 +419,29 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, | |||
| 	return freed; | ||||
| } | ||||
| 
 | ||||
| void drop_slab_node(int nid) | ||||
| { | ||||
| 	unsigned long freed; | ||||
| 
 | ||||
| 	do { | ||||
| 		struct mem_cgroup *memcg = NULL; | ||||
| 
 | ||||
| 		freed = 0; | ||||
| 		do { | ||||
| 			freed += shrink_slab(GFP_KERNEL, nid, memcg, | ||||
| 					     1000, 1000); | ||||
| 		} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); | ||||
| 	} while (freed > 10); | ||||
| } | ||||
| 
 | ||||
| void drop_slab(void) | ||||
| { | ||||
| 	int nid; | ||||
| 
 | ||||
| 	for_each_online_node(nid) | ||||
| 		drop_slab_node(nid); | ||||
| } | ||||
| 
 | ||||
| static inline int is_page_cache_freeable(struct page *page) | ||||
| { | ||||
| 	/*
 | ||||
|  | @ -2276,6 +2314,7 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
| static bool shrink_zone(struct zone *zone, struct scan_control *sc, | ||||
| 			bool is_classzone) | ||||
| { | ||||
| 	struct reclaim_state *reclaim_state = current->reclaim_state; | ||||
| 	unsigned long nr_reclaimed, nr_scanned; | ||||
| 	bool reclaimable = false; | ||||
| 
 | ||||
|  | @ -2294,6 +2333,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
| 		memcg = mem_cgroup_iter(root, NULL, &reclaim); | ||||
| 		do { | ||||
| 			unsigned long lru_pages; | ||||
| 			unsigned long scanned; | ||||
| 			struct lruvec *lruvec; | ||||
| 			int swappiness; | ||||
| 
 | ||||
|  | @ -2305,10 +2345,16 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
| 
 | ||||
| 			lruvec = mem_cgroup_zone_lruvec(zone, memcg); | ||||
| 			swappiness = mem_cgroup_swappiness(memcg); | ||||
| 			scanned = sc->nr_scanned; | ||||
| 
 | ||||
| 			shrink_lruvec(lruvec, swappiness, sc, &lru_pages); | ||||
| 			zone_lru_pages += lru_pages; | ||||
| 
 | ||||
| 			if (memcg && is_classzone) | ||||
| 				shrink_slab(sc->gfp_mask, zone_to_nid(zone), | ||||
| 					    memcg, sc->nr_scanned - scanned, | ||||
| 					    lru_pages); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * Direct reclaim and kswapd have to scan all memory | ||||
| 			 * cgroups to fulfill the overall scan target for the | ||||
|  | @ -2330,20 +2376,15 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
| 		 * Shrink the slab caches in the same proportion that | ||||
| 		 * the eligible LRU pages were scanned. | ||||
| 		 */ | ||||
| 		if (global_reclaim(sc) && is_classzone) { | ||||
| 			struct reclaim_state *reclaim_state; | ||||
| 
 | ||||
| 			shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone), | ||||
| 		if (global_reclaim(sc) && is_classzone) | ||||
| 			shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL, | ||||
| 				    sc->nr_scanned - nr_scanned, | ||||
| 				    zone_lru_pages); | ||||
| 
 | ||||
| 			reclaim_state = current->reclaim_state; | ||||
| 		if (reclaim_state) { | ||||
| 				sc->nr_reclaimed += | ||||
| 					reclaim_state->reclaimed_slab; | ||||
| 			sc->nr_reclaimed += reclaim_state->reclaimed_slab; | ||||
| 			reclaim_state->reclaimed_slab = 0; | ||||
| 		} | ||||
| 		} | ||||
| 
 | ||||
| 		vmpressure(sc->gfp_mask, sc->target_mem_cgroup, | ||||
| 			   sc->nr_scanned - nr_scanned, | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Vladimir Davydov
						Vladimir Davydov