mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	mm: fix Committed_AS underflow on large NR_CPUS environment
The Committed_AS field can underflow in certain situations: > # while true; do cat /proc/meminfo | grep _AS; sleep 1; done | uniq -c > 1 Committed_AS: 18446744073709323392 kB > 11 Committed_AS: 18446744073709455488 kB > 6 Committed_AS: 35136 kB > 5 Committed_AS: 18446744073709454400 kB > 7 Committed_AS: 35904 kB > 3 Committed_AS: 18446744073709453248 kB > 2 Committed_AS: 34752 kB > 9 Committed_AS: 18446744073709453248 kB > 8 Committed_AS: 34752 kB > 3 Committed_AS: 18446744073709320960 kB > 7 Committed_AS: 18446744073709454080 kB > 3 Committed_AS: 18446744073709320960 kB > 5 Committed_AS: 18446744073709454080 kB > 6 Committed_AS: 18446744073709320960 kB Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does not check for underflow. But NR_CPUS proportional isn't good calculation. In general, possibility of lock contention is proportional to the number of online cpus, not theorical maximum cpus (NR_CPUS). The current kernel has generic percpu-counter stuff. using it is right way. it makes code simplify and percpu_counter_read_positive() don't make underflow issue. Reported-by: Dave Hansen <dave@linux.vnet.ibm.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Eric B Munson <ebmunson@us.ibm.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: <stable@kernel.org> [All kernel versions] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									0763ed2355
								
							
						
					
					
						commit
						00a62ce91e
					
				
					 5 changed files with 17 additions and 65 deletions
				
			
		| 
						 | 
				
			
			@ -35,7 +35,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 | 
			
		|||
#define K(x) ((x) << (PAGE_SHIFT - 10))
 | 
			
		||||
	si_meminfo(&i);
 | 
			
		||||
	si_swapinfo(&i);
 | 
			
		||||
	committed = atomic_long_read(&vm_committed_space);
 | 
			
		||||
	committed = percpu_counter_read_positive(&vm_committed_as);
 | 
			
		||||
	allowed = ((totalram_pages - hugetlb_total_pages())
 | 
			
		||||
		* sysctl_overcommit_ratio / 100) + total_swap_pages;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,21 +12,18 @@
 | 
			
		|||
 | 
			
		||||
#ifdef __KERNEL__
 | 
			
		||||
#include <linux/mm.h>
 | 
			
		||||
#include <linux/percpu_counter.h>
 | 
			
		||||
 | 
			
		||||
#include <asm/atomic.h>
 | 
			
		||||
 | 
			
		||||
extern int sysctl_overcommit_memory;
 | 
			
		||||
extern int sysctl_overcommit_ratio;
 | 
			
		||||
extern atomic_long_t vm_committed_space;
 | 
			
		||||
extern struct percpu_counter vm_committed_as;
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_SMP
 | 
			
		||||
extern void vm_acct_memory(long pages);
 | 
			
		||||
#else
 | 
			
		||||
static inline void vm_acct_memory(long pages)
 | 
			
		||||
{
 | 
			
		||||
	atomic_long_add(pages, &vm_committed_space);
 | 
			
		||||
	percpu_counter_add(&vm_committed_as, pages);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static inline void vm_unacct_memory(long pages)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										12
									
								
								mm/mmap.c
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								mm/mmap.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -85,7 +85,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
 | 
			
		|||
int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */
 | 
			
		||||
int sysctl_overcommit_ratio = 50;	/* default is 50% */
 | 
			
		||||
int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 | 
			
		||||
atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
 | 
			
		||||
struct percpu_counter vm_committed_as;
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Check that a process has enough memory to allocate a new virtual
 | 
			
		||||
| 
						 | 
				
			
			@ -179,11 +179,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 | 
			
		|||
	if (mm)
 | 
			
		||||
		allowed -= mm->total_vm / 32;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * cast `allowed' as a signed long because vm_committed_space
 | 
			
		||||
	 * sometimes has a negative value
 | 
			
		||||
	 */
 | 
			
		||||
	if (atomic_long_read(&vm_committed_space) < (long)allowed)
 | 
			
		||||
	if (percpu_counter_read_positive(&vm_committed_as) < allowed)
 | 
			
		||||
		return 0;
 | 
			
		||||
error:
 | 
			
		||||
	vm_unacct_memory(pages);
 | 
			
		||||
| 
						 | 
				
			
			@ -2481,4 +2477,8 @@ void mm_drop_all_locks(struct mm_struct *mm)
 | 
			
		|||
 */
 | 
			
		||||
void __init mmap_init(void)
 | 
			
		||||
{
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	ret = percpu_counter_init(&vm_committed_as, 0);
 | 
			
		||||
	VM_BUG_ON(ret);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										13
									
								
								mm/nommu.c
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								mm/nommu.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -62,7 +62,7 @@ void *high_memory;
 | 
			
		|||
struct page *mem_map;
 | 
			
		||||
unsigned long max_mapnr;
 | 
			
		||||
unsigned long num_physpages;
 | 
			
		||||
atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
 | 
			
		||||
struct percpu_counter vm_committed_as;
 | 
			
		||||
int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
 | 
			
		||||
int sysctl_overcommit_ratio = 50; /* default is 50% */
 | 
			
		||||
int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
 | 
			
		||||
| 
						 | 
				
			
			@ -463,6 +463,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 | 
			
		|||
 */
 | 
			
		||||
void __init mmap_init(void)
 | 
			
		||||
{
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	ret = percpu_counter_init(&vm_committed_as, 0);
 | 
			
		||||
	VM_BUG_ON(ret);
 | 
			
		||||
	vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1847,12 +1851,9 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 | 
			
		|||
	if (mm)
 | 
			
		||||
		allowed -= mm->total_vm / 32;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * cast `allowed' as a signed long because vm_committed_space
 | 
			
		||||
	 * sometimes has a negative value
 | 
			
		||||
	 */
 | 
			
		||||
	if (atomic_long_read(&vm_committed_space) < (long)allowed)
 | 
			
		||||
	if (percpu_counter_read_positive(&vm_committed_as) < allowed)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
error:
 | 
			
		||||
	vm_unacct_memory(pages);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										46
									
								
								mm/swap.c
									
									
									
									
									
								
							
							
						
						
									
										46
									
								
								mm/swap.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -491,49 +491,6 @@ unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
 | 
			
		|||
 | 
			
		||||
EXPORT_SYMBOL(pagevec_lookup_tag);
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_SMP
 | 
			
		||||
/*
 | 
			
		||||
 * We tolerate a little inaccuracy to avoid ping-ponging the counter between
 | 
			
		||||
 * CPUs
 | 
			
		||||
 */
 | 
			
		||||
#define ACCT_THRESHOLD	max(16, NR_CPUS * 2)
 | 
			
		||||
 | 
			
		||||
static DEFINE_PER_CPU(long, committed_space);
 | 
			
		||||
 | 
			
		||||
void vm_acct_memory(long pages)
 | 
			
		||||
{
 | 
			
		||||
	long *local;
 | 
			
		||||
 | 
			
		||||
	preempt_disable();
 | 
			
		||||
	local = &__get_cpu_var(committed_space);
 | 
			
		||||
	*local += pages;
 | 
			
		||||
	if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
 | 
			
		||||
		atomic_long_add(*local, &vm_committed_space);
 | 
			
		||||
		*local = 0;
 | 
			
		||||
	}
 | 
			
		||||
	preempt_enable();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_HOTPLUG_CPU
 | 
			
		||||
 | 
			
		||||
/* Drop the CPU's cached committed space back into the central pool. */
 | 
			
		||||
static int cpu_swap_callback(struct notifier_block *nfb,
 | 
			
		||||
			     unsigned long action,
 | 
			
		||||
			     void *hcpu)
 | 
			
		||||
{
 | 
			
		||||
	long *committed;
 | 
			
		||||
 | 
			
		||||
	committed = &per_cpu(committed_space, (long)hcpu);
 | 
			
		||||
	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
 | 
			
		||||
		atomic_long_add(*committed, &vm_committed_space);
 | 
			
		||||
		*committed = 0;
 | 
			
		||||
		drain_cpu_pagevecs((long)hcpu);
 | 
			
		||||
	}
 | 
			
		||||
	return NOTIFY_OK;
 | 
			
		||||
}
 | 
			
		||||
#endif /* CONFIG_HOTPLUG_CPU */
 | 
			
		||||
#endif /* CONFIG_SMP */
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Perform any setup for the swap system
 | 
			
		||||
 */
 | 
			
		||||
| 
						 | 
				
			
			@ -554,7 +511,4 @@ void __init swap_setup(void)
 | 
			
		|||
	 * Right now other parts of the system means that we
 | 
			
		||||
	 * _really_ don't want to cluster much more
 | 
			
		||||
	 */
 | 
			
		||||
#ifdef CONFIG_HOTPLUG_CPU
 | 
			
		||||
	hotcpu_notifier(cpu_swap_callback, 0);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue