mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-03 18:20:25 +02:00 
			
		
		
		
	Memory controller: add per cgroup LRU and reclaim
Add the page_cgroup to the per cgroup LRU. The reclaim algorithm has been modified to make the isolate_lru_pages() as a pluggable component. The scan_control data structure now accepts the cgroup on behalf of which reclaims are carried out. try_to_free_pages() has been extended to become cgroup aware. [akpm@linux-foundation.org: fix warning] [Lee.Schermerhorn@hp.com: initialize all scan_control's isolate_pages member] [bunk@kernel.org: make do_try_to_free_pages() static] [hugh@veritas.com: memcgroup: fix try_to_free order] [kamezawa.hiroyu@jp.fujitsu.com: this unlock_page_cgroup() is unnecessary] Signed-off-by: Pavel Emelianov <xemul@openvz.org> Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Paul Menage <menage@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Kirill Korotaev <dev@sw.ru> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: David Rientjes <rientjes@google.com> Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									67e465a77b
								
							
						
					
					
						commit
						66e1707bc3
					
				
					 6 changed files with 286 additions and 30 deletions
				
			
		| 
						 | 
				
			
			@ -32,6 +32,13 @@ extern void page_assign_page_cgroup(struct page *page,
 | 
			
		|||
extern struct page_cgroup *page_get_page_cgroup(struct page *page);
 | 
			
		||||
extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm);
 | 
			
		||||
extern void mem_cgroup_uncharge(struct page_cgroup *pc);
 | 
			
		||||
extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active);
 | 
			
		||||
extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 | 
			
		||||
					struct list_head *dst,
 | 
			
		||||
					unsigned long *scanned, int order,
 | 
			
		||||
					int mode, struct zone *z,
 | 
			
		||||
					struct mem_cgroup *mem_cont,
 | 
			
		||||
					int active);
 | 
			
		||||
 | 
			
		||||
static inline void mem_cgroup_uncharge_page(struct page *page)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -71,6 +78,11 @@ static inline void mem_cgroup_uncharge_page(struct page *page)
 | 
			
		|||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void mem_cgroup_move_lists(struct page_cgroup *pc,
 | 
			
		||||
						bool active)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif /* CONFIG_CGROUP_MEM_CONT */
 | 
			
		||||
 | 
			
		||||
#endif /* _LINUX_MEMCONTROL_H */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -99,4 +99,27 @@ int res_counter_charge(struct res_counter *counter, unsigned long val);
 | 
			
		|||
void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
 | 
			
		||||
void res_counter_uncharge(struct res_counter *counter, unsigned long val);
 | 
			
		||||
 | 
			
		||||
static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
 | 
			
		||||
{
 | 
			
		||||
	if (cnt->usage < cnt->limit)
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Helper function to detect if the cgroup is within it's limit or
 | 
			
		||||
 * not. It's currently called from cgroup_rss_prepare()
 | 
			
		||||
 */
 | 
			
		||||
static inline bool res_counter_check_under_limit(struct res_counter *cnt)
 | 
			
		||||
{
 | 
			
		||||
	bool ret;
 | 
			
		||||
	unsigned long flags;
 | 
			
		||||
 | 
			
		||||
	spin_lock_irqsave(&cnt->lock, flags);
 | 
			
		||||
	ret = res_counter_limit_check_locked(cnt);
 | 
			
		||||
	spin_unlock_irqrestore(&cnt->lock, flags);
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,6 +5,7 @@
 | 
			
		|||
#include <linux/linkage.h>
 | 
			
		||||
#include <linux/mmzone.h>
 | 
			
		||||
#include <linux/list.h>
 | 
			
		||||
#include <linux/memcontrol.h>
 | 
			
		||||
#include <linux/sched.h>
 | 
			
		||||
 | 
			
		||||
#include <asm/atomic.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -182,6 +183,8 @@ extern void swap_setup(void);
 | 
			
		|||
/* linux/mm/vmscan.c */
 | 
			
		||||
extern unsigned long try_to_free_pages(struct zone **zones, int order,
 | 
			
		||||
					gfp_t gfp_mask);
 | 
			
		||||
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem);
 | 
			
		||||
extern int __isolate_lru_page(struct page *page, int mode);
 | 
			
		||||
extern unsigned long shrink_all_memory(unsigned long nr_pages);
 | 
			
		||||
extern int vm_swappiness;
 | 
			
		||||
extern int remove_mapping(struct address_space *mapping, struct page *page);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										148
									
								
								mm/memcontrol.c
									
									
									
									
									
								
							
							
						
						
									
										148
									
								
								mm/memcontrol.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -22,10 +22,15 @@
 | 
			
		|||
#include <linux/cgroup.h>
 | 
			
		||||
#include <linux/mm.h>
 | 
			
		||||
#include <linux/page-flags.h>
 | 
			
		||||
#include <linux/backing-dev.h>
 | 
			
		||||
#include <linux/bit_spinlock.h>
 | 
			
		||||
#include <linux/rcupdate.h>
 | 
			
		||||
#include <linux/swap.h>
 | 
			
		||||
#include <linux/spinlock.h>
 | 
			
		||||
#include <linux/fs.h>
 | 
			
		||||
 | 
			
		||||
struct cgroup_subsys mem_cgroup_subsys;
 | 
			
		||||
static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * The memory controller data structure. The memory controller controls both
 | 
			
		||||
| 
						 | 
				
			
			@ -51,6 +56,10 @@ struct mem_cgroup {
 | 
			
		|||
	 */
 | 
			
		||||
	struct list_head active_list;
 | 
			
		||||
	struct list_head inactive_list;
 | 
			
		||||
	/*
 | 
			
		||||
	 * spin_lock to protect the per cgroup LRU
 | 
			
		||||
	 */
 | 
			
		||||
	spinlock_t lru_lock;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -141,6 +150,94 @@ void __always_inline unlock_page_cgroup(struct page *page)
 | 
			
		|||
	bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
 | 
			
		||||
{
 | 
			
		||||
	if (active)
 | 
			
		||||
		list_move(&pc->lru, &pc->mem_cgroup->active_list);
 | 
			
		||||
	else
 | 
			
		||||
		list_move(&pc->lru, &pc->mem_cgroup->inactive_list);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * This routine assumes that the appropriate zone's lru lock is already held
 | 
			
		||||
 */
 | 
			
		||||
void mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
 | 
			
		||||
{
 | 
			
		||||
	struct mem_cgroup *mem;
 | 
			
		||||
	if (!pc)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	mem = pc->mem_cgroup;
 | 
			
		||||
 | 
			
		||||
	spin_lock(&mem->lru_lock);
 | 
			
		||||
	__mem_cgroup_move_lists(pc, active);
 | 
			
		||||
	spin_unlock(&mem->lru_lock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 | 
			
		||||
					struct list_head *dst,
 | 
			
		||||
					unsigned long *scanned, int order,
 | 
			
		||||
					int mode, struct zone *z,
 | 
			
		||||
					struct mem_cgroup *mem_cont,
 | 
			
		||||
					int active)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long nr_taken = 0;
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	unsigned long scan;
 | 
			
		||||
	LIST_HEAD(pc_list);
 | 
			
		||||
	struct list_head *src;
 | 
			
		||||
	struct page_cgroup *pc;
 | 
			
		||||
 | 
			
		||||
	if (active)
 | 
			
		||||
		src = &mem_cont->active_list;
 | 
			
		||||
	else
 | 
			
		||||
		src = &mem_cont->inactive_list;
 | 
			
		||||
 | 
			
		||||
	spin_lock(&mem_cont->lru_lock);
 | 
			
		||||
	for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
 | 
			
		||||
		pc = list_entry(src->prev, struct page_cgroup, lru);
 | 
			
		||||
		page = pc->page;
 | 
			
		||||
		VM_BUG_ON(!pc);
 | 
			
		||||
 | 
			
		||||
		if (PageActive(page) && !active) {
 | 
			
		||||
			__mem_cgroup_move_lists(pc, true);
 | 
			
		||||
			scan--;
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
		if (!PageActive(page) && active) {
 | 
			
		||||
			__mem_cgroup_move_lists(pc, false);
 | 
			
		||||
			scan--;
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * Reclaim, per zone
 | 
			
		||||
		 * TODO: make the active/inactive lists per zone
 | 
			
		||||
		 */
 | 
			
		||||
		if (page_zone(page) != z)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * Check if the meta page went away from under us
 | 
			
		||||
		 */
 | 
			
		||||
		if (!list_empty(&pc->lru))
 | 
			
		||||
			list_move(&pc->lru, &pc_list);
 | 
			
		||||
		else
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		if (__isolate_lru_page(page, mode) == 0) {
 | 
			
		||||
			list_move(&page->lru, dst);
 | 
			
		||||
			nr_taken++;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	list_splice(&pc_list, src);
 | 
			
		||||
	spin_unlock(&mem_cont->lru_lock);
 | 
			
		||||
 | 
			
		||||
	*scanned = scan;
 | 
			
		||||
	return nr_taken;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Charge the memory controller for page usage.
 | 
			
		||||
 * Return
 | 
			
		||||
| 
						 | 
				
			
			@ -151,6 +248,8 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
 | 
			
		|||
{
 | 
			
		||||
	struct mem_cgroup *mem;
 | 
			
		||||
	struct page_cgroup *pc, *race_pc;
 | 
			
		||||
	unsigned long flags;
 | 
			
		||||
	unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Should page_cgroup's go to their own slab?
 | 
			
		||||
| 
						 | 
				
			
			@ -159,14 +258,20 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
 | 
			
		|||
	 * to see if the cgroup page already has a page_cgroup associated
 | 
			
		||||
	 * with it
 | 
			
		||||
	 */
 | 
			
		||||
retry:
 | 
			
		||||
	lock_page_cgroup(page);
 | 
			
		||||
	pc = page_get_page_cgroup(page);
 | 
			
		||||
	/*
 | 
			
		||||
	 * The page_cgroup exists and the page has already been accounted
 | 
			
		||||
	 */
 | 
			
		||||
	if (pc) {
 | 
			
		||||
		atomic_inc(&pc->ref_cnt);
 | 
			
		||||
		goto done;
 | 
			
		||||
		if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) {
 | 
			
		||||
			/* this page is under being uncharged ? */
 | 
			
		||||
			unlock_page_cgroup(page);
 | 
			
		||||
			cpu_relax();
 | 
			
		||||
			goto retry;
 | 
			
		||||
		} else
 | 
			
		||||
			goto done;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	unlock_page_cgroup(page);
 | 
			
		||||
| 
						 | 
				
			
			@ -197,7 +302,32 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
 | 
			
		|||
	 * If we created the page_cgroup, we should free it on exceeding
 | 
			
		||||
	 * the cgroup limit.
 | 
			
		||||
	 */
 | 
			
		||||
	if (res_counter_charge(&mem->res, 1)) {
 | 
			
		||||
	while (res_counter_charge(&mem->res, 1)) {
 | 
			
		||||
		if (try_to_free_mem_cgroup_pages(mem))
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
 		 * try_to_free_mem_cgroup_pages() might not give us a full
 | 
			
		||||
 		 * picture of reclaim. Some pages are reclaimed and might be
 | 
			
		||||
 		 * moved to swap cache or just unmapped from the cgroup.
 | 
			
		||||
 		 * Check the limit again to see if the reclaim reduced the
 | 
			
		||||
 		 * current usage of the cgroup before giving up
 | 
			
		||||
 		 */
 | 
			
		||||
		if (res_counter_check_under_limit(&mem->res))
 | 
			
		||||
			continue;
 | 
			
		||||
			/*
 | 
			
		||||
			 * Since we control both RSS and cache, we end up with a
 | 
			
		||||
			 * very interesting scenario where we end up reclaiming
 | 
			
		||||
			 * memory (essentially RSS), since the memory is pushed
 | 
			
		||||
			 * to swap cache, we eventually end up adding those
 | 
			
		||||
			 * pages back to our list. Hence we give ourselves a
 | 
			
		||||
			 * few chances before we fail
 | 
			
		||||
			 */
 | 
			
		||||
		else if (nr_retries--) {
 | 
			
		||||
			congestion_wait(WRITE, HZ/10);
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		css_put(&mem->css);
 | 
			
		||||
		goto free_pc;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -221,14 +351,16 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
 | 
			
		|||
	pc->page = page;
 | 
			
		||||
	page_assign_page_cgroup(page, pc);
 | 
			
		||||
 | 
			
		||||
	spin_lock_irqsave(&mem->lru_lock, flags);
 | 
			
		||||
	list_add(&pc->lru, &mem->active_list);
 | 
			
		||||
	spin_unlock_irqrestore(&mem->lru_lock, flags);
 | 
			
		||||
 | 
			
		||||
done:
 | 
			
		||||
	unlock_page_cgroup(page);
 | 
			
		||||
	return 0;
 | 
			
		||||
free_pc:
 | 
			
		||||
	kfree(pc);
 | 
			
		||||
	return -ENOMEM;
 | 
			
		||||
err:
 | 
			
		||||
	unlock_page_cgroup(page);
 | 
			
		||||
	return -ENOMEM;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -240,6 +372,7 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
 | 
			
		|||
{
 | 
			
		||||
	struct mem_cgroup *mem;
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	unsigned long flags;
 | 
			
		||||
 | 
			
		||||
	if (!pc)
 | 
			
		||||
		return;
 | 
			
		||||
| 
						 | 
				
			
			@ -252,6 +385,10 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
 | 
			
		|||
		page_assign_page_cgroup(page, NULL);
 | 
			
		||||
		unlock_page_cgroup(page);
 | 
			
		||||
		res_counter_uncharge(&mem->res, 1);
 | 
			
		||||
 | 
			
		||||
 		spin_lock_irqsave(&mem->lru_lock, flags);
 | 
			
		||||
 		list_del_init(&pc->lru);
 | 
			
		||||
 		spin_unlock_irqrestore(&mem->lru_lock, flags);
 | 
			
		||||
		kfree(pc);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -310,6 +447,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 | 
			
		|||
	res_counter_init(&mem->res);
 | 
			
		||||
	INIT_LIST_HEAD(&mem->active_list);
 | 
			
		||||
	INIT_LIST_HEAD(&mem->inactive_list);
 | 
			
		||||
	spin_lock_init(&mem->lru_lock);
 | 
			
		||||
	return &mem->css;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -29,6 +29,7 @@
 | 
			
		|||
#include <linux/cpu.h>
 | 
			
		||||
#include <linux/notifier.h>
 | 
			
		||||
#include <linux/backing-dev.h>
 | 
			
		||||
#include <linux/memcontrol.h>
 | 
			
		||||
 | 
			
		||||
/* How many pages do we try to swap or page in/out together? */
 | 
			
		||||
int page_cluster;
 | 
			
		||||
| 
						 | 
				
			
			@ -175,6 +176,7 @@ void activate_page(struct page *page)
 | 
			
		|||
		SetPageActive(page);
 | 
			
		||||
		add_page_to_active_list(zone, page);
 | 
			
		||||
		__count_vm_event(PGACTIVATE);
 | 
			
		||||
		mem_cgroup_move_lists(page_get_page_cgroup(page), true);
 | 
			
		||||
	}
 | 
			
		||||
	spin_unlock_irq(&zone->lru_lock);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										128
									
								
								mm/vmscan.c
									
									
									
									
									
								
							
							
						
						
									
										128
									
								
								mm/vmscan.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -37,6 +37,7 @@
 | 
			
		|||
#include <linux/delay.h>
 | 
			
		||||
#include <linux/kthread.h>
 | 
			
		||||
#include <linux/freezer.h>
 | 
			
		||||
#include <linux/memcontrol.h>
 | 
			
		||||
 | 
			
		||||
#include <asm/tlbflush.h>
 | 
			
		||||
#include <asm/div64.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -68,6 +69,15 @@ struct scan_control {
 | 
			
		|||
	int all_unreclaimable;
 | 
			
		||||
 | 
			
		||||
	int order;
 | 
			
		||||
 | 
			
		||||
	/* Which cgroup do we reclaim from */
 | 
			
		||||
	struct mem_cgroup *mem_cgroup;
 | 
			
		||||
 | 
			
		||||
	/* Pluggable isolate pages callback */
 | 
			
		||||
	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
 | 
			
		||||
			unsigned long *scanned, int order, int mode,
 | 
			
		||||
			struct zone *z, struct mem_cgroup *mem_cont,
 | 
			
		||||
			int active);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
 | 
			
		||||
| 
						 | 
				
			
			@ -626,7 +636,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 | 
			
		|||
 *
 | 
			
		||||
 * returns 0 on success, -ve errno on failure.
 | 
			
		||||
 */
 | 
			
		||||
static int __isolate_lru_page(struct page *page, int mode)
 | 
			
		||||
int __isolate_lru_page(struct page *page, int mode)
 | 
			
		||||
{
 | 
			
		||||
	int ret = -EINVAL;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -760,6 +770,21 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 | 
			
		|||
	return nr_taken;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static unsigned long isolate_pages_global(unsigned long nr,
 | 
			
		||||
					struct list_head *dst,
 | 
			
		||||
					unsigned long *scanned, int order,
 | 
			
		||||
					int mode, struct zone *z,
 | 
			
		||||
					struct mem_cgroup *mem_cont,
 | 
			
		||||
					int active)
 | 
			
		||||
{
 | 
			
		||||
	if (active)
 | 
			
		||||
		return isolate_lru_pages(nr, &z->active_list, dst,
 | 
			
		||||
						scanned, order, mode);
 | 
			
		||||
	else
 | 
			
		||||
		return isolate_lru_pages(nr, &z->inactive_list, dst,
 | 
			
		||||
						scanned, order, mode);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * clear_active_flags() is a helper for shrink_active_list(), clearing
 | 
			
		||||
 * any active bits from the pages in the list.
 | 
			
		||||
| 
						 | 
				
			
			@ -801,11 +826,11 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 | 
			
		|||
		unsigned long nr_freed;
 | 
			
		||||
		unsigned long nr_active;
 | 
			
		||||
 | 
			
		||||
		nr_taken = isolate_lru_pages(sc->swap_cluster_max,
 | 
			
		||||
			     &zone->inactive_list,
 | 
			
		||||
		nr_taken = sc->isolate_pages(sc->swap_cluster_max,
 | 
			
		||||
			     &page_list, &nr_scan, sc->order,
 | 
			
		||||
			     (sc->order > PAGE_ALLOC_COSTLY_ORDER)?
 | 
			
		||||
					     ISOLATE_BOTH : ISOLATE_INACTIVE);
 | 
			
		||||
					     ISOLATE_BOTH : ISOLATE_INACTIVE,
 | 
			
		||||
				zone, sc->mem_cgroup, 0);
 | 
			
		||||
		nr_active = clear_active_flags(&page_list);
 | 
			
		||||
		__count_vm_events(PGDEACTIVATE, nr_active);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1018,8 +1043,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 | 
			
		|||
 | 
			
		||||
	lru_add_drain();
 | 
			
		||||
	spin_lock_irq(&zone->lru_lock);
 | 
			
		||||
	pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
 | 
			
		||||
			    &l_hold, &pgscanned, sc->order, ISOLATE_ACTIVE);
 | 
			
		||||
	pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
 | 
			
		||||
					ISOLATE_ACTIVE, zone,
 | 
			
		||||
					sc->mem_cgroup, 1);
 | 
			
		||||
	zone->pages_scanned += pgscanned;
 | 
			
		||||
	__mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
 | 
			
		||||
	spin_unlock_irq(&zone->lru_lock);
 | 
			
		||||
| 
						 | 
				
			
			@ -1051,6 +1077,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 | 
			
		|||
		ClearPageActive(page);
 | 
			
		||||
 | 
			
		||||
		list_move(&page->lru, &zone->inactive_list);
 | 
			
		||||
		mem_cgroup_move_lists(page_get_page_cgroup(page), false);
 | 
			
		||||
		pgmoved++;
 | 
			
		||||
		if (!pagevec_add(&pvec, page)) {
 | 
			
		||||
			__mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
 | 
			
		||||
| 
						 | 
				
			
			@ -1079,6 +1106,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 | 
			
		|||
		SetPageLRU(page);
 | 
			
		||||
		VM_BUG_ON(!PageActive(page));
 | 
			
		||||
		list_move(&page->lru, &zone->active_list);
 | 
			
		||||
		mem_cgroup_move_lists(page_get_page_cgroup(page), true);
 | 
			
		||||
		pgmoved++;
 | 
			
		||||
		if (!pagevec_add(&pvec, page)) {
 | 
			
		||||
			__mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
 | 
			
		||||
| 
						 | 
				
			
			@ -1206,7 +1234,8 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
 | 
			
		|||
 * holds filesystem locks which prevent writeout this might not work, and the
 | 
			
		||||
 * allocation attempt will fail.
 | 
			
		||||
 */
 | 
			
		||||
unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
 | 
			
		||||
static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask,
 | 
			
		||||
					  struct scan_control *sc)
 | 
			
		||||
{
 | 
			
		||||
	int priority;
 | 
			
		||||
	int ret = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -1215,14 +1244,6 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
 | 
			
		|||
	struct reclaim_state *reclaim_state = current->reclaim_state;
 | 
			
		||||
	unsigned long lru_pages = 0;
 | 
			
		||||
	int i;
 | 
			
		||||
	struct scan_control sc = {
 | 
			
		||||
		.gfp_mask = gfp_mask,
 | 
			
		||||
		.may_writepage = !laptop_mode,
 | 
			
		||||
		.swap_cluster_max = SWAP_CLUSTER_MAX,
 | 
			
		||||
		.may_swap = 1,
 | 
			
		||||
		.swappiness = vm_swappiness,
 | 
			
		||||
		.order = order,
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	count_vm_event(ALLOCSTALL);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1237,17 +1258,22 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
 | 
			
		||||
		sc.nr_scanned = 0;
 | 
			
		||||
		sc->nr_scanned = 0;
 | 
			
		||||
		if (!priority)
 | 
			
		||||
			disable_swap_token();
 | 
			
		||||
		nr_reclaimed += shrink_zones(priority, zones, &sc);
 | 
			
		||||
		shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
 | 
			
		||||
		nr_reclaimed += shrink_zones(priority, zones, sc);
 | 
			
		||||
		/*
 | 
			
		||||
		 * Don't shrink slabs when reclaiming memory from
 | 
			
		||||
		 * over limit cgroups
 | 
			
		||||
		 */
 | 
			
		||||
		if (sc->mem_cgroup == NULL)
 | 
			
		||||
			shrink_slab(sc->nr_scanned, gfp_mask, lru_pages);
 | 
			
		||||
		if (reclaim_state) {
 | 
			
		||||
			nr_reclaimed += reclaim_state->reclaimed_slab;
 | 
			
		||||
			reclaim_state->reclaimed_slab = 0;
 | 
			
		||||
		}
 | 
			
		||||
		total_scanned += sc.nr_scanned;
 | 
			
		||||
		if (nr_reclaimed >= sc.swap_cluster_max) {
 | 
			
		||||
		total_scanned += sc->nr_scanned;
 | 
			
		||||
		if (nr_reclaimed >= sc->swap_cluster_max) {
 | 
			
		||||
			ret = 1;
 | 
			
		||||
			goto out;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -1259,18 +1285,18 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
 | 
			
		|||
		 * that's undesirable in laptop mode, where we *want* lumpy
 | 
			
		||||
		 * writeout.  So in laptop mode, write out the whole world.
 | 
			
		||||
		 */
 | 
			
		||||
		if (total_scanned > sc.swap_cluster_max +
 | 
			
		||||
					sc.swap_cluster_max / 2) {
 | 
			
		||||
		if (total_scanned > sc->swap_cluster_max +
 | 
			
		||||
					sc->swap_cluster_max / 2) {
 | 
			
		||||
			wakeup_pdflush(laptop_mode ? 0 : total_scanned);
 | 
			
		||||
			sc.may_writepage = 1;
 | 
			
		||||
			sc->may_writepage = 1;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/* Take a nap, wait for some writeback to complete */
 | 
			
		||||
		if (sc.nr_scanned && priority < DEF_PRIORITY - 2)
 | 
			
		||||
		if (sc->nr_scanned && priority < DEF_PRIORITY - 2)
 | 
			
		||||
			congestion_wait(WRITE, HZ/10);
 | 
			
		||||
	}
 | 
			
		||||
	/* top priority shrink_caches still had more to do? don't OOM, then */
 | 
			
		||||
	if (!sc.all_unreclaimable)
 | 
			
		||||
	if (!sc->all_unreclaimable && sc->mem_cgroup == NULL)
 | 
			
		||||
		ret = 1;
 | 
			
		||||
out:
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -1293,6 +1319,54 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
 | 
			
		|||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
 | 
			
		||||
{
 | 
			
		||||
	struct scan_control sc = {
 | 
			
		||||
		.gfp_mask = gfp_mask,
 | 
			
		||||
		.may_writepage = !laptop_mode,
 | 
			
		||||
		.swap_cluster_max = SWAP_CLUSTER_MAX,
 | 
			
		||||
		.may_swap = 1,
 | 
			
		||||
		.swappiness = vm_swappiness,
 | 
			
		||||
		.order = order,
 | 
			
		||||
		.mem_cgroup = NULL,
 | 
			
		||||
		.isolate_pages = isolate_pages_global,
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	return do_try_to_free_pages(zones, gfp_mask, &sc);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_CGROUP_MEM_CONT
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_HIGHMEM
 | 
			
		||||
#define ZONE_USERPAGES ZONE_HIGHMEM
 | 
			
		||||
#else
 | 
			
		||||
#define ZONE_USERPAGES ZONE_NORMAL
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont)
 | 
			
		||||
{
 | 
			
		||||
	struct scan_control sc = {
 | 
			
		||||
		.gfp_mask = GFP_KERNEL,
 | 
			
		||||
		.may_writepage = !laptop_mode,
 | 
			
		||||
		.may_swap = 1,
 | 
			
		||||
		.swap_cluster_max = SWAP_CLUSTER_MAX,
 | 
			
		||||
		.swappiness = vm_swappiness,
 | 
			
		||||
		.order = 0,
 | 
			
		||||
		.mem_cgroup = mem_cont,
 | 
			
		||||
		.isolate_pages = mem_cgroup_isolate_pages,
 | 
			
		||||
	};
 | 
			
		||||
	int node;
 | 
			
		||||
	struct zone **zones;
 | 
			
		||||
 | 
			
		||||
	for_each_online_node(node) {
 | 
			
		||||
		zones = NODE_DATA(node)->node_zonelists[ZONE_USERPAGES].zones;
 | 
			
		||||
		if (do_try_to_free_pages(zones, sc.gfp_mask, &sc))
 | 
			
		||||
			return 1;
 | 
			
		||||
	}
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * For kswapd, balance_pgdat() will work across all this node's zones until
 | 
			
		||||
 * they are all at pages_high.
 | 
			
		||||
| 
						 | 
				
			
			@ -1328,6 +1402,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
 | 
			
		|||
		.swap_cluster_max = SWAP_CLUSTER_MAX,
 | 
			
		||||
		.swappiness = vm_swappiness,
 | 
			
		||||
		.order = order,
 | 
			
		||||
		.mem_cgroup = NULL,
 | 
			
		||||
		.isolate_pages = isolate_pages_global,
 | 
			
		||||
	};
 | 
			
		||||
	/*
 | 
			
		||||
	 * temp_priority is used to remember the scanning priority at which
 | 
			
		||||
| 
						 | 
				
			
			@ -1649,6 +1725,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 | 
			
		|||
		.swap_cluster_max = nr_pages,
 | 
			
		||||
		.may_writepage = 1,
 | 
			
		||||
		.swappiness = vm_swappiness,
 | 
			
		||||
		.isolate_pages = isolate_pages_global,
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	current->reclaim_state = &reclaim_state;
 | 
			
		||||
| 
						 | 
				
			
			@ -1834,6 +1911,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 | 
			
		|||
					SWAP_CLUSTER_MAX),
 | 
			
		||||
		.gfp_mask = gfp_mask,
 | 
			
		||||
		.swappiness = vm_swappiness,
 | 
			
		||||
		.isolate_pages = isolate_pages_global,
 | 
			
		||||
	};
 | 
			
		||||
	unsigned long slab_reclaimable;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue