mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	mm: memcontrol: fix excessive complexity in memory.stat reporting
We've seen memory.stat reads in top-level cgroups take up to fourteen seconds during a userspace bug that created tens of thousands of ghost cgroups pinned by lingering page cache. Even with a more reasonable number of cgroups, aggregating memory.stat is unnecessarily heavy. The complexity is this: nr_cgroups * nr_stat_items * nr_possible_cpus where the stat items are ~70 at this point. With 128 cgroups and 128 CPUs - decent, not enormous setups - reading the top-level memory.stat has to aggregate over a million per-cpu counters. This doesn't scale. Instead of spreading the source of truth across all CPUs, use the per-cpu counters merely to batch updates to shared atomic counters. This is the same as the per-cpu stocks we use for charging memory to the shared atomic page_counters, and also the way the global vmstat counters are implemented. Vmstat has elaborate spilling thresholds that depend on the number of CPUs, amount of memory, and memory pressure - carefully balancing the cost of counter updates with the amount of per-cpu error. That's because the vmstat counters are system-wide, but also used for decisions inside the kernel (e.g. NR_FREE_PAGES in the allocator). Neither is true for the memory controller. Use the same static batch size we already use for page_counter updates during charging. The per-cpu error in the stats will be 128k, which is an acceptable ratio of cores to memory accounting granularity. [hannes@cmpxchg.org: fix warning in __this_cpu_xchg() calls] Link: http://lkml.kernel.org/r/20171201135750.GB8097@cmpxchg.org Link: http://lkml.kernel.org/r/20171103153336.24044-3-hannes@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Michal Hocko <mhocko@suse.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									284542656e
								
							
						
					
					
						commit
						a983b5ebee
					
				
					 2 changed files with 113 additions and 84 deletions
				
			
		| 
						 | 
					@ -108,7 +108,10 @@ struct lruvec_stat {
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
struct mem_cgroup_per_node {
 | 
					struct mem_cgroup_per_node {
 | 
				
			||||||
	struct lruvec		lruvec;
 | 
						struct lruvec		lruvec;
 | 
				
			||||||
	struct lruvec_stat __percpu *lruvec_stat;
 | 
					
 | 
				
			||||||
 | 
						struct lruvec_stat __percpu *lruvec_stat_cpu;
 | 
				
			||||||
 | 
						atomic_long_t		lruvec_stat[NR_VM_NODE_STAT_ITEMS];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
 | 
						unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct mem_cgroup_reclaim_iter	iter[DEF_PRIORITY + 1];
 | 
						struct mem_cgroup_reclaim_iter	iter[DEF_PRIORITY + 1];
 | 
				
			||||||
| 
						 | 
					@ -227,10 +230,10 @@ struct mem_cgroup {
 | 
				
			||||||
	spinlock_t		move_lock;
 | 
						spinlock_t		move_lock;
 | 
				
			||||||
	struct task_struct	*move_lock_task;
 | 
						struct task_struct	*move_lock_task;
 | 
				
			||||||
	unsigned long		move_lock_flags;
 | 
						unsigned long		move_lock_flags;
 | 
				
			||||||
	/*
 | 
					
 | 
				
			||||||
	 * percpu counter.
 | 
						struct mem_cgroup_stat_cpu __percpu *stat_cpu;
 | 
				
			||||||
	 */
 | 
						atomic_long_t		stat[MEMCG_NR_STAT];
 | 
				
			||||||
	struct mem_cgroup_stat_cpu __percpu *stat;
 | 
						atomic_long_t		events[MEMCG_NR_EVENTS];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	unsigned long		socket_pressure;
 | 
						unsigned long		socket_pressure;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -265,6 +268,12 @@ struct mem_cgroup {
 | 
				
			||||||
	/* WARNING: nodeinfo must be the last member here */
 | 
						/* WARNING: nodeinfo must be the last member here */
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * size of first charge trial. "32" comes from vmscan.c's magic value.
 | 
				
			||||||
 | 
					 * TODO: maybe necessary to use big numbers in big irons.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#define MEMCG_CHARGE_BATCH 32U
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern struct mem_cgroup *root_mem_cgroup;
 | 
					extern struct mem_cgroup *root_mem_cgroup;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline bool mem_cgroup_disabled(void)
 | 
					static inline bool mem_cgroup_disabled(void)
 | 
				
			||||||
| 
						 | 
					@ -485,32 +494,38 @@ void unlock_page_memcg(struct page *page);
 | 
				
			||||||
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
 | 
					static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
 | 
				
			||||||
					     int idx)
 | 
										     int idx)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	long val = 0;
 | 
						long x = atomic_long_read(&memcg->stat[idx]);
 | 
				
			||||||
	int cpu;
 | 
					#ifdef CONFIG_SMP
 | 
				
			||||||
 | 
						if (x < 0)
 | 
				
			||||||
	for_each_possible_cpu(cpu)
 | 
							x = 0;
 | 
				
			||||||
		val += per_cpu(memcg->stat->count[idx], cpu);
 | 
					#endif
 | 
				
			||||||
 | 
						return x;
 | 
				
			||||||
	if (val < 0)
 | 
					 | 
				
			||||||
		val = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return val;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* idx can be of type enum memcg_stat_item or node_stat_item */
 | 
					/* idx can be of type enum memcg_stat_item or node_stat_item */
 | 
				
			||||||
static inline void __mod_memcg_state(struct mem_cgroup *memcg,
 | 
					static inline void __mod_memcg_state(struct mem_cgroup *memcg,
 | 
				
			||||||
				     int idx, int val)
 | 
									     int idx, int val)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (!mem_cgroup_disabled())
 | 
						long x;
 | 
				
			||||||
		__this_cpu_add(memcg->stat->count[idx], val);
 | 
					
 | 
				
			||||||
 | 
						if (mem_cgroup_disabled())
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						x = val + __this_cpu_read(memcg->stat_cpu->count[idx]);
 | 
				
			||||||
 | 
						if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
 | 
				
			||||||
 | 
							atomic_long_add(x, &memcg->stat[idx]);
 | 
				
			||||||
 | 
							x = 0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						__this_cpu_write(memcg->stat_cpu->count[idx], x);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* idx can be of type enum memcg_stat_item or node_stat_item */
 | 
					/* idx can be of type enum memcg_stat_item or node_stat_item */
 | 
				
			||||||
static inline void mod_memcg_state(struct mem_cgroup *memcg,
 | 
					static inline void mod_memcg_state(struct mem_cgroup *memcg,
 | 
				
			||||||
				   int idx, int val)
 | 
									   int idx, int val)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (!mem_cgroup_disabled())
 | 
						preempt_disable();
 | 
				
			||||||
		this_cpu_add(memcg->stat->count[idx], val);
 | 
						__mod_memcg_state(memcg, idx, val);
 | 
				
			||||||
 | 
						preempt_enable();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
| 
						 | 
					@ -548,26 +563,25 @@ static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
 | 
				
			||||||
					      enum node_stat_item idx)
 | 
										      enum node_stat_item idx)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct mem_cgroup_per_node *pn;
 | 
						struct mem_cgroup_per_node *pn;
 | 
				
			||||||
	long val = 0;
 | 
						long x;
 | 
				
			||||||
	int cpu;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (mem_cgroup_disabled())
 | 
						if (mem_cgroup_disabled())
 | 
				
			||||||
		return node_page_state(lruvec_pgdat(lruvec), idx);
 | 
							return node_page_state(lruvec_pgdat(lruvec), idx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 | 
						pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 | 
				
			||||||
	for_each_possible_cpu(cpu)
 | 
						x = atomic_long_read(&pn->lruvec_stat[idx]);
 | 
				
			||||||
		val += per_cpu(pn->lruvec_stat->count[idx], cpu);
 | 
					#ifdef CONFIG_SMP
 | 
				
			||||||
 | 
						if (x < 0)
 | 
				
			||||||
	if (val < 0)
 | 
							x = 0;
 | 
				
			||||||
		val = 0;
 | 
					#endif
 | 
				
			||||||
 | 
						return x;
 | 
				
			||||||
	return val;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void __mod_lruvec_state(struct lruvec *lruvec,
 | 
					static inline void __mod_lruvec_state(struct lruvec *lruvec,
 | 
				
			||||||
				      enum node_stat_item idx, int val)
 | 
									      enum node_stat_item idx, int val)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct mem_cgroup_per_node *pn;
 | 
						struct mem_cgroup_per_node *pn;
 | 
				
			||||||
 | 
						long x;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Update node */
 | 
						/* Update node */
 | 
				
			||||||
	__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
 | 
						__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
 | 
				
			||||||
| 
						 | 
					@ -581,7 +595,12 @@ static inline void __mod_lruvec_state(struct lruvec *lruvec,
 | 
				
			||||||
	__mod_memcg_state(pn->memcg, idx, val);
 | 
						__mod_memcg_state(pn->memcg, idx, val);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Update lruvec */
 | 
						/* Update lruvec */
 | 
				
			||||||
	__this_cpu_add(pn->lruvec_stat->count[idx], val);
 | 
						x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
 | 
				
			||||||
 | 
						if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
 | 
				
			||||||
 | 
							atomic_long_add(x, &pn->lruvec_stat[idx]);
 | 
				
			||||||
 | 
							x = 0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						__this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void mod_lruvec_state(struct lruvec *lruvec,
 | 
					static inline void mod_lruvec_state(struct lruvec *lruvec,
 | 
				
			||||||
| 
						 | 
					@ -624,16 +643,25 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 | 
				
			||||||
static inline void __count_memcg_events(struct mem_cgroup *memcg,
 | 
					static inline void __count_memcg_events(struct mem_cgroup *memcg,
 | 
				
			||||||
					int idx, unsigned long count)
 | 
										int idx, unsigned long count)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (!mem_cgroup_disabled())
 | 
						unsigned long x;
 | 
				
			||||||
		__this_cpu_add(memcg->stat->events[idx], count);
 | 
					
 | 
				
			||||||
 | 
						if (mem_cgroup_disabled())
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						x = count + __this_cpu_read(memcg->stat_cpu->events[idx]);
 | 
				
			||||||
 | 
						if (unlikely(x > MEMCG_CHARGE_BATCH)) {
 | 
				
			||||||
 | 
							atomic_long_add(x, &memcg->events[idx]);
 | 
				
			||||||
 | 
							x = 0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						__this_cpu_write(memcg->stat_cpu->events[idx], x);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* idx can be of type enum memcg_event_item or vm_event_item */
 | 
					 | 
				
			||||||
static inline void count_memcg_events(struct mem_cgroup *memcg,
 | 
					static inline void count_memcg_events(struct mem_cgroup *memcg,
 | 
				
			||||||
				      int idx, unsigned long count)
 | 
									      int idx, unsigned long count)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (!mem_cgroup_disabled())
 | 
						preempt_disable();
 | 
				
			||||||
		this_cpu_add(memcg->stat->events[idx], count);
 | 
						__count_memcg_events(memcg, idx, count);
 | 
				
			||||||
 | 
						preempt_enable();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* idx can be of type enum memcg_event_item or vm_event_item */
 | 
					/* idx can be of type enum memcg_event_item or vm_event_item */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										101
									
								
								mm/memcontrol.c
									
									
									
									
									
								
							
							
						
						
									
										101
									
								
								mm/memcontrol.c
									
									
									
									
									
								
							| 
						 | 
					@ -542,39 +542,10 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
 | 
				
			||||||
	return mz;
 | 
						return mz;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Return page count for single (non recursive) @memcg.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * Implementation Note: reading percpu statistics for memcg.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * Both of vmstat[] and percpu_counter has threshold and do periodic
 | 
					 | 
				
			||||||
 * synchronization to implement "quick" read. There are trade-off between
 | 
					 | 
				
			||||||
 * reading cost and precision of value. Then, we may have a chance to implement
 | 
					 | 
				
			||||||
 * a periodic synchronization of counter in memcg's counter.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * But this _read() function is used for user interface now. The user accounts
 | 
					 | 
				
			||||||
 * memory usage by memory cgroup and he _always_ requires exact value because
 | 
					 | 
				
			||||||
 * he accounts memory. Even if we provide quick-and-fuzzy read, we always
 | 
					 | 
				
			||||||
 * have to visit all online cpus and make sum. So, for now, unnecessary
 | 
					 | 
				
			||||||
 * synchronization is not implemented. (just implemented for cpu hotplug)
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * If there are kernel internal actions which can make use of some not-exact
 | 
					 | 
				
			||||||
 * value, and reading all cpu value can be performance bottleneck in some
 | 
					 | 
				
			||||||
 * common workload, threshold and synchronization as vmstat[] should be
 | 
					 | 
				
			||||||
 * implemented.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * The parameter idx can be of type enum memcg_event_item or vm_event_item.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static unsigned long memcg_sum_events(struct mem_cgroup *memcg,
 | 
					static unsigned long memcg_sum_events(struct mem_cgroup *memcg,
 | 
				
			||||||
				      int event)
 | 
									      int event)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long val = 0;
 | 
						return atomic_long_read(&memcg->events[event]);
 | 
				
			||||||
	int cpu;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for_each_possible_cpu(cpu)
 | 
					 | 
				
			||||||
		val += per_cpu(memcg->stat->events[event], cpu);
 | 
					 | 
				
			||||||
	return val;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
 | 
					static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
 | 
				
			||||||
| 
						 | 
					@ -606,7 +577,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
 | 
				
			||||||
		nr_pages = -nr_pages; /* for event */
 | 
							nr_pages = -nr_pages; /* for event */
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	__this_cpu_add(memcg->stat->nr_page_events, nr_pages);
 | 
						__this_cpu_add(memcg->stat_cpu->nr_page_events, nr_pages);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
 | 
					unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
 | 
				
			||||||
| 
						 | 
					@ -642,8 +613,8 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long val, next;
 | 
						unsigned long val, next;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	val = __this_cpu_read(memcg->stat->nr_page_events);
 | 
						val = __this_cpu_read(memcg->stat_cpu->nr_page_events);
 | 
				
			||||||
	next = __this_cpu_read(memcg->stat->targets[target]);
 | 
						next = __this_cpu_read(memcg->stat_cpu->targets[target]);
 | 
				
			||||||
	/* from time_after() in jiffies.h */
 | 
						/* from time_after() in jiffies.h */
 | 
				
			||||||
	if ((long)(next - val) < 0) {
 | 
						if ((long)(next - val) < 0) {
 | 
				
			||||||
		switch (target) {
 | 
							switch (target) {
 | 
				
			||||||
| 
						 | 
					@ -659,7 +630,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
 | 
				
			||||||
		default:
 | 
							default:
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		__this_cpu_write(memcg->stat->targets[target], next);
 | 
							__this_cpu_write(memcg->stat_cpu->targets[target], next);
 | 
				
			||||||
		return true;
 | 
							return true;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return false;
 | 
						return false;
 | 
				
			||||||
| 
						 | 
					@ -1707,11 +1678,6 @@ void unlock_page_memcg(struct page *page)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(unlock_page_memcg);
 | 
					EXPORT_SYMBOL(unlock_page_memcg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * size of first charge trial. "32" comes from vmscan.c's magic value.
 | 
					 | 
				
			||||||
 * TODO: maybe necessary to use big numbers in big irons.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#define CHARGE_BATCH	32U
 | 
					 | 
				
			||||||
struct memcg_stock_pcp {
 | 
					struct memcg_stock_pcp {
 | 
				
			||||||
	struct mem_cgroup *cached; /* this never be root cgroup */
 | 
						struct mem_cgroup *cached; /* this never be root cgroup */
 | 
				
			||||||
	unsigned int nr_pages;
 | 
						unsigned int nr_pages;
 | 
				
			||||||
| 
						 | 
					@ -1739,7 +1705,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 | 
				
			||||||
	unsigned long flags;
 | 
						unsigned long flags;
 | 
				
			||||||
	bool ret = false;
 | 
						bool ret = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (nr_pages > CHARGE_BATCH)
 | 
						if (nr_pages > MEMCG_CHARGE_BATCH)
 | 
				
			||||||
		return ret;
 | 
							return ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	local_irq_save(flags);
 | 
						local_irq_save(flags);
 | 
				
			||||||
| 
						 | 
					@ -1808,7 +1774,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	stock->nr_pages += nr_pages;
 | 
						stock->nr_pages += nr_pages;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (stock->nr_pages > CHARGE_BATCH)
 | 
						if (stock->nr_pages > MEMCG_CHARGE_BATCH)
 | 
				
			||||||
		drain_stock(stock);
 | 
							drain_stock(stock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	local_irq_restore(flags);
 | 
						local_irq_restore(flags);
 | 
				
			||||||
| 
						 | 
					@ -1858,9 +1824,44 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
 | 
				
			||||||
static int memcg_hotplug_cpu_dead(unsigned int cpu)
 | 
					static int memcg_hotplug_cpu_dead(unsigned int cpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct memcg_stock_pcp *stock;
 | 
						struct memcg_stock_pcp *stock;
 | 
				
			||||||
 | 
						struct mem_cgroup *memcg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	stock = &per_cpu(memcg_stock, cpu);
 | 
						stock = &per_cpu(memcg_stock, cpu);
 | 
				
			||||||
	drain_stock(stock);
 | 
						drain_stock(stock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for_each_mem_cgroup(memcg) {
 | 
				
			||||||
 | 
							int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							for (i = 0; i < MEMCG_NR_STAT; i++) {
 | 
				
			||||||
 | 
								int nid;
 | 
				
			||||||
 | 
								long x;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								x = this_cpu_xchg(memcg->stat_cpu->count[i], 0);
 | 
				
			||||||
 | 
								if (x)
 | 
				
			||||||
 | 
									atomic_long_add(x, &memcg->stat[i]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (i >= NR_VM_NODE_STAT_ITEMS)
 | 
				
			||||||
 | 
									continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								for_each_node(nid) {
 | 
				
			||||||
 | 
									struct mem_cgroup_per_node *pn;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									pn = mem_cgroup_nodeinfo(memcg, nid);
 | 
				
			||||||
 | 
									x = this_cpu_xchg(pn->lruvec_stat_cpu->count[i], 0);
 | 
				
			||||||
 | 
									if (x)
 | 
				
			||||||
 | 
										atomic_long_add(x, &pn->lruvec_stat[i]);
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							for (i = 0; i < MEMCG_NR_EVENTS; i++) {
 | 
				
			||||||
 | 
								long x;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								x = this_cpu_xchg(memcg->stat_cpu->events[i], 0);
 | 
				
			||||||
 | 
								if (x)
 | 
				
			||||||
 | 
									atomic_long_add(x, &memcg->events[i]);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1881,7 +1882,7 @@ static void high_work_func(struct work_struct *work)
 | 
				
			||||||
	struct mem_cgroup *memcg;
 | 
						struct mem_cgroup *memcg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	memcg = container_of(work, struct mem_cgroup, high_work);
 | 
						memcg = container_of(work, struct mem_cgroup, high_work);
 | 
				
			||||||
	reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
 | 
						reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					@ -1905,7 +1906,7 @@ void mem_cgroup_handle_over_high(void)
 | 
				
			||||||
static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 | 
					static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 | 
				
			||||||
		      unsigned int nr_pages)
 | 
							      unsigned int nr_pages)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned int batch = max(CHARGE_BATCH, nr_pages);
 | 
						unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages);
 | 
				
			||||||
	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
 | 
						int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
 | 
				
			||||||
	struct mem_cgroup *mem_over_limit;
 | 
						struct mem_cgroup *mem_over_limit;
 | 
				
			||||||
	struct page_counter *counter;
 | 
						struct page_counter *counter;
 | 
				
			||||||
| 
						 | 
					@ -4161,8 +4162,8 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 | 
				
			||||||
	if (!pn)
 | 
						if (!pn)
 | 
				
			||||||
		return 1;
 | 
							return 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pn->lruvec_stat = alloc_percpu(struct lruvec_stat);
 | 
						pn->lruvec_stat_cpu = alloc_percpu(struct lruvec_stat);
 | 
				
			||||||
	if (!pn->lruvec_stat) {
 | 
						if (!pn->lruvec_stat_cpu) {
 | 
				
			||||||
		kfree(pn);
 | 
							kfree(pn);
 | 
				
			||||||
		return 1;
 | 
							return 1;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -4180,7 +4181,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
 | 
						struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	free_percpu(pn->lruvec_stat);
 | 
						free_percpu(pn->lruvec_stat_cpu);
 | 
				
			||||||
	kfree(pn);
 | 
						kfree(pn);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4190,7 +4191,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for_each_node(node)
 | 
						for_each_node(node)
 | 
				
			||||||
		free_mem_cgroup_per_node_info(memcg, node);
 | 
							free_mem_cgroup_per_node_info(memcg, node);
 | 
				
			||||||
	free_percpu(memcg->stat);
 | 
						free_percpu(memcg->stat_cpu);
 | 
				
			||||||
	kfree(memcg);
 | 
						kfree(memcg);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4219,8 +4220,8 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 | 
				
			||||||
	if (memcg->id.id < 0)
 | 
						if (memcg->id.id < 0)
 | 
				
			||||||
		goto fail;
 | 
							goto fail;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
 | 
						memcg->stat_cpu = alloc_percpu(struct mem_cgroup_stat_cpu);
 | 
				
			||||||
	if (!memcg->stat)
 | 
						if (!memcg->stat_cpu)
 | 
				
			||||||
		goto fail;
 | 
							goto fail;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for_each_node(node)
 | 
						for_each_node(node)
 | 
				
			||||||
| 
						 | 
					@ -5638,7 +5639,7 @@ static void uncharge_batch(const struct uncharge_gather *ug)
 | 
				
			||||||
	__mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge);
 | 
						__mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge);
 | 
				
			||||||
	__mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem);
 | 
						__mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem);
 | 
				
			||||||
	__count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
 | 
						__count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
 | 
				
			||||||
	__this_cpu_add(ug->memcg->stat->nr_page_events, nr_pages);
 | 
						__this_cpu_add(ug->memcg->stat_cpu->nr_page_events, nr_pages);
 | 
				
			||||||
	memcg_check_events(ug->memcg, ug->dummy_page);
 | 
						memcg_check_events(ug->memcg, ug->dummy_page);
 | 
				
			||||||
	local_irq_restore(flags);
 | 
						local_irq_restore(flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue