forked from mirrors/linux
		
	memcg: dynamically allocate lruvec_stats
To decouple the dependency of lruvec_stats on NR_VM_NODE_STAT_ITEMS, we need to dynamically allocate lruvec_stats in the mem_cgroup_per_node structure. Also move the definition of lruvec_stats_percpu and lruvec_stats and related functions to the memcontrol.c to facilitate later patches. No functional changes in the patch. Link: https://lkml.kernel.org/r/20240501172617.678560-3-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev> Reviewed-by: Yosry Ahmed <yosryahmed@google.com> Reviewed-by: T.J. Mercier <tjmercier@google.com> Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Muchun Song <muchun.song@linux.dev> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									59142d87ab
								
							
						
					
					
						commit
						70a64b7919
					
				
					 2 changed files with 81 additions and 68 deletions
				
			
		|  | @ -83,6 +83,8 @@ enum mem_cgroup_events_target { | ||||||
| 
 | 
 | ||||||
| struct memcg_vmstats_percpu; | struct memcg_vmstats_percpu; | ||||||
| struct memcg_vmstats; | struct memcg_vmstats; | ||||||
|  | struct lruvec_stats_percpu; | ||||||
|  | struct lruvec_stats; | ||||||
| 
 | 
 | ||||||
| struct mem_cgroup_reclaim_iter { | struct mem_cgroup_reclaim_iter { | ||||||
| 	struct mem_cgroup *position; | 	struct mem_cgroup *position; | ||||||
|  | @ -90,25 +92,6 @@ struct mem_cgroup_reclaim_iter { | ||||||
| 	unsigned int generation; | 	unsigned int generation; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct lruvec_stats_percpu { |  | ||||||
| 	/* Local (CPU and cgroup) state */ |  | ||||||
| 	long state[NR_VM_NODE_STAT_ITEMS]; |  | ||||||
| 
 |  | ||||||
| 	/* Delta calculation for lockless upward propagation */ |  | ||||||
| 	long state_prev[NR_VM_NODE_STAT_ITEMS]; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct lruvec_stats { |  | ||||||
| 	/* Aggregated (CPU and subtree) state */ |  | ||||||
| 	long state[NR_VM_NODE_STAT_ITEMS]; |  | ||||||
| 
 |  | ||||||
| 	/* Non-hierarchical (CPU aggregated) state */ |  | ||||||
| 	long state_local[NR_VM_NODE_STAT_ITEMS]; |  | ||||||
| 
 |  | ||||||
| 	/* Pending child counts during tree propagation */ |  | ||||||
| 	long state_pending[NR_VM_NODE_STAT_ITEMS]; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * per-node information in memory controller. |  * per-node information in memory controller. | ||||||
|  */ |  */ | ||||||
|  | @ -116,7 +99,7 @@ struct mem_cgroup_per_node { | ||||||
| 	struct lruvec		lruvec; | 	struct lruvec		lruvec; | ||||||
| 
 | 
 | ||||||
| 	struct lruvec_stats_percpu __percpu	*lruvec_stats_percpu; | 	struct lruvec_stats_percpu __percpu	*lruvec_stats_percpu; | ||||||
| 	struct lruvec_stats			lruvec_stats; | 	struct lruvec_stats			*lruvec_stats; | ||||||
| 
 | 
 | ||||||
| 	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; | 	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; | ||||||
| 
 | 
 | ||||||
|  | @ -1037,42 +1020,9 @@ static inline void mod_memcg_page_state(struct page *page, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx); | unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx); | ||||||
| 
 | unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx); | ||||||
| static inline unsigned long lruvec_page_state(struct lruvec *lruvec, | unsigned long lruvec_page_state_local(struct lruvec *lruvec, | ||||||
| 					      enum node_stat_item idx) | 				      enum node_stat_item idx); | ||||||
| { |  | ||||||
| 	struct mem_cgroup_per_node *pn; |  | ||||||
| 	long x; |  | ||||||
| 
 |  | ||||||
| 	if (mem_cgroup_disabled()) |  | ||||||
| 		return node_page_state(lruvec_pgdat(lruvec), idx); |  | ||||||
| 
 |  | ||||||
| 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); |  | ||||||
| 	x = READ_ONCE(pn->lruvec_stats.state[idx]); |  | ||||||
| #ifdef CONFIG_SMP |  | ||||||
| 	if (x < 0) |  | ||||||
| 		x = 0; |  | ||||||
| #endif |  | ||||||
| 	return x; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, |  | ||||||
| 						    enum node_stat_item idx) |  | ||||||
| { |  | ||||||
| 	struct mem_cgroup_per_node *pn; |  | ||||||
| 	long x = 0; |  | ||||||
| 
 |  | ||||||
| 	if (mem_cgroup_disabled()) |  | ||||||
| 		return node_page_state(lruvec_pgdat(lruvec), idx); |  | ||||||
| 
 |  | ||||||
| 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); |  | ||||||
| 	x = READ_ONCE(pn->lruvec_stats.state_local[idx]); |  | ||||||
| #ifdef CONFIG_SMP |  | ||||||
| 	if (x < 0) |  | ||||||
| 		x = 0; |  | ||||||
| #endif |  | ||||||
| 	return x; |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| void mem_cgroup_flush_stats(struct mem_cgroup *memcg); | void mem_cgroup_flush_stats(struct mem_cgroup *memcg); | ||||||
| void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg); | void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg); | ||||||
|  |  | ||||||
|  | @ -575,6 +575,60 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) | ||||||
| 	return mz; | 	return mz; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | struct lruvec_stats_percpu { | ||||||
|  | 	/* Local (CPU and cgroup) state */ | ||||||
|  | 	long state[NR_VM_NODE_STAT_ITEMS]; | ||||||
|  | 
 | ||||||
|  | 	/* Delta calculation for lockless upward propagation */ | ||||||
|  | 	long state_prev[NR_VM_NODE_STAT_ITEMS]; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct lruvec_stats { | ||||||
|  | 	/* Aggregated (CPU and subtree) state */ | ||||||
|  | 	long state[NR_VM_NODE_STAT_ITEMS]; | ||||||
|  | 
 | ||||||
|  | 	/* Non-hierarchical (CPU aggregated) state */ | ||||||
|  | 	long state_local[NR_VM_NODE_STAT_ITEMS]; | ||||||
|  | 
 | ||||||
|  | 	/* Pending child counts during tree propagation */ | ||||||
|  | 	long state_pending[NR_VM_NODE_STAT_ITEMS]; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) | ||||||
|  | { | ||||||
|  | 	struct mem_cgroup_per_node *pn; | ||||||
|  | 	long x; | ||||||
|  | 
 | ||||||
|  | 	if (mem_cgroup_disabled()) | ||||||
|  | 		return node_page_state(lruvec_pgdat(lruvec), idx); | ||||||
|  | 
 | ||||||
|  | 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); | ||||||
|  | 	x = READ_ONCE(pn->lruvec_stats->state[idx]); | ||||||
|  | #ifdef CONFIG_SMP | ||||||
|  | 	if (x < 0) | ||||||
|  | 		x = 0; | ||||||
|  | #endif | ||||||
|  | 	return x; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | unsigned long lruvec_page_state_local(struct lruvec *lruvec, | ||||||
|  | 				      enum node_stat_item idx) | ||||||
|  | { | ||||||
|  | 	struct mem_cgroup_per_node *pn; | ||||||
|  | 	long x = 0; | ||||||
|  | 
 | ||||||
|  | 	if (mem_cgroup_disabled()) | ||||||
|  | 		return node_page_state(lruvec_pgdat(lruvec), idx); | ||||||
|  | 
 | ||||||
|  | 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); | ||||||
|  | 	x = READ_ONCE(pn->lruvec_stats->state_local[idx]); | ||||||
|  | #ifdef CONFIG_SMP | ||||||
|  | 	if (x < 0) | ||||||
|  | 		x = 0; | ||||||
|  | #endif | ||||||
|  | 	return x; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /* Subset of vm_event_item to report for memcg event stats */ | /* Subset of vm_event_item to report for memcg event stats */ | ||||||
| static const unsigned int memcg_vm_event_stat[] = { | static const unsigned int memcg_vm_event_stat[] = { | ||||||
| 	PGPGIN, | 	PGPGIN, | ||||||
|  | @ -5486,18 +5540,25 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) | ||||||
| 	if (!pn) | 	if (!pn) | ||||||
| 		return 1; | 		return 1; | ||||||
| 
 | 
 | ||||||
|  | 	pn->lruvec_stats = kzalloc_node(sizeof(struct lruvec_stats), GFP_KERNEL, | ||||||
|  | 					node); | ||||||
|  | 	if (!pn->lruvec_stats) | ||||||
|  | 		goto fail; | ||||||
|  | 
 | ||||||
| 	pn->lruvec_stats_percpu = alloc_percpu_gfp(struct lruvec_stats_percpu, | 	pn->lruvec_stats_percpu = alloc_percpu_gfp(struct lruvec_stats_percpu, | ||||||
| 						   GFP_KERNEL_ACCOUNT); | 						   GFP_KERNEL_ACCOUNT); | ||||||
| 	if (!pn->lruvec_stats_percpu) { | 	if (!pn->lruvec_stats_percpu) | ||||||
| 		kfree(pn); | 		goto fail; | ||||||
| 		return 1; |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	lruvec_init(&pn->lruvec); | 	lruvec_init(&pn->lruvec); | ||||||
| 	pn->memcg = memcg; | 	pn->memcg = memcg; | ||||||
| 
 | 
 | ||||||
| 	memcg->nodeinfo[node] = pn; | 	memcg->nodeinfo[node] = pn; | ||||||
| 	return 0; | 	return 0; | ||||||
|  | fail: | ||||||
|  | 	kfree(pn->lruvec_stats); | ||||||
|  | 	kfree(pn); | ||||||
|  | 	return 1; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) | static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) | ||||||
|  | @ -5508,6 +5569,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
| 	free_percpu(pn->lruvec_stats_percpu); | 	free_percpu(pn->lruvec_stats_percpu); | ||||||
|  | 	kfree(pn->lruvec_stats); | ||||||
| 	kfree(pn); | 	kfree(pn); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -5860,18 +5922,19 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) | ||||||
| 
 | 
 | ||||||
| 	for_each_node_state(nid, N_MEMORY) { | 	for_each_node_state(nid, N_MEMORY) { | ||||||
| 		struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; | 		struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; | ||||||
| 		struct mem_cgroup_per_node *ppn = NULL; | 		struct lruvec_stats *lstats = pn->lruvec_stats; | ||||||
|  | 		struct lruvec_stats *plstats = NULL; | ||||||
| 		struct lruvec_stats_percpu *lstatc; | 		struct lruvec_stats_percpu *lstatc; | ||||||
| 
 | 
 | ||||||
| 		if (parent) | 		if (parent) | ||||||
| 			ppn = parent->nodeinfo[nid]; | 			plstats = parent->nodeinfo[nid]->lruvec_stats; | ||||||
| 
 | 
 | ||||||
| 		lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu); | 		lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu); | ||||||
| 
 | 
 | ||||||
| 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { | 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { | ||||||
| 			delta = pn->lruvec_stats.state_pending[i]; | 			delta = lstats->state_pending[i]; | ||||||
| 			if (delta) | 			if (delta) | ||||||
| 				pn->lruvec_stats.state_pending[i] = 0; | 				lstats->state_pending[i] = 0; | ||||||
| 
 | 
 | ||||||
| 			delta_cpu = 0; | 			delta_cpu = 0; | ||||||
| 			v = READ_ONCE(lstatc->state[i]); | 			v = READ_ONCE(lstatc->state[i]); | ||||||
|  | @ -5882,12 +5945,12 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| 			if (delta_cpu) | 			if (delta_cpu) | ||||||
| 				pn->lruvec_stats.state_local[i] += delta_cpu; | 				lstats->state_local[i] += delta_cpu; | ||||||
| 
 | 
 | ||||||
| 			if (delta) { | 			if (delta) { | ||||||
| 				pn->lruvec_stats.state[i] += delta; | 				lstats->state[i] += delta; | ||||||
| 				if (ppn) | 				if (plstats) | ||||||
| 					ppn->lruvec_stats.state_pending[i] += delta; | 					plstats->state_pending[i] += delta; | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Shakeel Butt
						Shakeel Butt