mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	mm: modify swap_map and add SWAP_HAS_CACHE flag
This is a part of the patches for fixing memcg's swap accountinf leak. But, IMHO, not a bad patch even if no memcg. There are 2 kinds of references to swap. - reference from swap entry - reference from swap cache Then, - If there is swap cache && swap's refcnt is 1, there is only swap cache. (*) swapcount(entry) == 1 && find_get_page(swapper_space, entry) != NULL This counting logic have worked well for a long time. But considering that we cannot know there is a _real_ reference or not by swap_map[], current usage of counter is not very good. This patch adds a flag SWAP_HAS_CACHE and recored information that a swap entry has a cache or not. This will remove -1 magic used in swapfile.c and be a help to avoid unnecessary find_get_page(). Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Tested-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									cb4b86ba47
								
							
						
					
					
						commit
						355cfa73dd
					
				
					 3 changed files with 171 additions and 60 deletions
				
			
		| 
						 | 
					@ -129,9 +129,10 @@ enum {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SWAP_CLUSTER_MAX 32
 | 
					#define SWAP_CLUSTER_MAX 32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SWAP_MAP_MAX	0x7fff
 | 
					#define SWAP_MAP_MAX	0x7ffe
 | 
				
			||||||
#define SWAP_MAP_BAD	0x8000
 | 
					#define SWAP_MAP_BAD	0x7fff
 | 
				
			||||||
 | 
					#define SWAP_HAS_CACHE  0x8000		/* There is a swap cache of entry. */
 | 
				
			||||||
 | 
					#define SWAP_COUNT_MASK (~SWAP_HAS_CACHE)
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * The in-memory structure used to track swap areas.
 | 
					 * The in-memory structure used to track swap areas.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
| 
						 | 
					@ -281,7 +282,7 @@ extern long total_swap_pages;
 | 
				
			||||||
extern void si_swapinfo(struct sysinfo *);
 | 
					extern void si_swapinfo(struct sysinfo *);
 | 
				
			||||||
extern swp_entry_t get_swap_page(void);
 | 
					extern swp_entry_t get_swap_page(void);
 | 
				
			||||||
extern swp_entry_t get_swap_page_of_type(int);
 | 
					extern swp_entry_t get_swap_page_of_type(int);
 | 
				
			||||||
extern int swap_duplicate(swp_entry_t);
 | 
					extern void swap_duplicate(swp_entry_t);
 | 
				
			||||||
extern int swapcache_prepare(swp_entry_t);
 | 
					extern int swapcache_prepare(swp_entry_t);
 | 
				
			||||||
extern int valid_swaphandles(swp_entry_t, unsigned long *);
 | 
					extern int valid_swaphandles(swp_entry_t, unsigned long *);
 | 
				
			||||||
extern void swap_free(swp_entry_t);
 | 
					extern void swap_free(swp_entry_t);
 | 
				
			||||||
| 
						 | 
					@ -353,9 +354,12 @@ static inline void show_swap_cache_info(void)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define free_swap_and_cache(swp)	is_migration_entry(swp)
 | 
					#define free_swap_and_cache(swp)	is_migration_entry(swp)
 | 
				
			||||||
#define swap_duplicate(swp)		is_migration_entry(swp)
 | 
					 | 
				
			||||||
#define swapcache_prepare(swp)		is_migration_entry(swp)
 | 
					#define swapcache_prepare(swp)		is_migration_entry(swp)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void swap_duplicate(swp_entry_t swp)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void swap_free(swp_entry_t swp)
 | 
					static inline void swap_free(swp_entry_t swp)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -292,7 +292,10 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Swap entry may have been freed since our caller observed it.
 | 
							 * Swap entry may have been freed since our caller observed it.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		if (!swapcache_prepare(entry))
 | 
							err = swapcache_prepare(entry);
 | 
				
			||||||
 | 
							if (err == -EEXIST) /* seems racy */
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
							if (err)           /* swp entry is obsolete ? */
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										212
									
								
								mm/swapfile.c
									
									
									
									
									
								
							
							
						
						
									
										212
									
								
								mm/swapfile.c
									
									
									
									
									
								
							| 
						 | 
					@ -53,6 +53,33 @@ static struct swap_info_struct swap_info[MAX_SWAPFILES];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static DEFINE_MUTEX(swapon_mutex);
 | 
					static DEFINE_MUTEX(swapon_mutex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* For reference count accounting in swap_map */
 | 
				
			||||||
 | 
					/* enum for swap_map[] handling. internal use only */
 | 
				
			||||||
 | 
					enum {
 | 
				
			||||||
 | 
						SWAP_MAP = 0,	/* ops for reference from swap users */
 | 
				
			||||||
 | 
						SWAP_CACHE,	/* ops for reference from swap cache */
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline int swap_count(unsigned short ent)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return ent & SWAP_COUNT_MASK;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline bool swap_has_cache(unsigned short ent)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return !!(ent & SWAP_HAS_CACHE);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline unsigned short encode_swapmap(int count, bool has_cache)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned short ret = count;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (has_cache)
 | 
				
			||||||
 | 
							return SWAP_HAS_CACHE | ret;
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * We need this because the bdev->unplug_fn can sleep and we cannot
 | 
					 * We need this because the bdev->unplug_fn can sleep and we cannot
 | 
				
			||||||
 * hold swap_lock while calling the unplug_fn. And swap_lock
 | 
					 * hold swap_lock while calling the unplug_fn. And swap_lock
 | 
				
			||||||
| 
						 | 
					@ -167,7 +194,8 @@ static int wait_for_discard(void *word)
 | 
				
			||||||
#define SWAPFILE_CLUSTER	256
 | 
					#define SWAPFILE_CLUSTER	256
 | 
				
			||||||
#define LATENCY_LIMIT		256
 | 
					#define LATENCY_LIMIT		256
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline unsigned long scan_swap_map(struct swap_info_struct *si)
 | 
					static inline unsigned long scan_swap_map(struct swap_info_struct *si,
 | 
				
			||||||
 | 
										  int cache)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long offset;
 | 
						unsigned long offset;
 | 
				
			||||||
	unsigned long scan_base;
 | 
						unsigned long scan_base;
 | 
				
			||||||
| 
						 | 
					@ -285,7 +313,10 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
 | 
				
			||||||
		si->lowest_bit = si->max;
 | 
							si->lowest_bit = si->max;
 | 
				
			||||||
		si->highest_bit = 0;
 | 
							si->highest_bit = 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	si->swap_map[offset] = 1;
 | 
						if (cache == SWAP_CACHE) /* at usual swap-out via vmscan.c */
 | 
				
			||||||
 | 
							si->swap_map[offset] = encode_swapmap(0, true);
 | 
				
			||||||
 | 
						else /* at suspend */
 | 
				
			||||||
 | 
							si->swap_map[offset] = encode_swapmap(1, false);
 | 
				
			||||||
	si->cluster_next = offset + 1;
 | 
						si->cluster_next = offset + 1;
 | 
				
			||||||
	si->flags -= SWP_SCANNING;
 | 
						si->flags -= SWP_SCANNING;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -401,7 +432,8 @@ swp_entry_t get_swap_page(void)
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		swap_list.next = next;
 | 
							swap_list.next = next;
 | 
				
			||||||
		offset = scan_swap_map(si);
 | 
							/* This is called for allocating swap entry for cache */
 | 
				
			||||||
 | 
							offset = scan_swap_map(si, SWAP_CACHE);
 | 
				
			||||||
		if (offset) {
 | 
							if (offset) {
 | 
				
			||||||
			spin_unlock(&swap_lock);
 | 
								spin_unlock(&swap_lock);
 | 
				
			||||||
			return swp_entry(type, offset);
 | 
								return swp_entry(type, offset);
 | 
				
			||||||
| 
						 | 
					@ -415,6 +447,7 @@ swp_entry_t get_swap_page(void)
 | 
				
			||||||
	return (swp_entry_t) {0};
 | 
						return (swp_entry_t) {0};
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* The only caller of this function is now susupend routine */
 | 
				
			||||||
swp_entry_t get_swap_page_of_type(int type)
 | 
					swp_entry_t get_swap_page_of_type(int type)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct swap_info_struct *si;
 | 
						struct swap_info_struct *si;
 | 
				
			||||||
| 
						 | 
					@ -424,7 +457,8 @@ swp_entry_t get_swap_page_of_type(int type)
 | 
				
			||||||
	si = swap_info + type;
 | 
						si = swap_info + type;
 | 
				
			||||||
	if (si->flags & SWP_WRITEOK) {
 | 
						if (si->flags & SWP_WRITEOK) {
 | 
				
			||||||
		nr_swap_pages--;
 | 
							nr_swap_pages--;
 | 
				
			||||||
		offset = scan_swap_map(si);
 | 
							/* This is called for allocating swap entry, not cache */
 | 
				
			||||||
 | 
							offset = scan_swap_map(si, SWAP_MAP);
 | 
				
			||||||
		if (offset) {
 | 
							if (offset) {
 | 
				
			||||||
			spin_unlock(&swap_lock);
 | 
								spin_unlock(&swap_lock);
 | 
				
			||||||
			return swp_entry(type, offset);
 | 
								return swp_entry(type, offset);
 | 
				
			||||||
| 
						 | 
					@ -471,25 +505,38 @@ static struct swap_info_struct * swap_info_get(swp_entry_t entry)
 | 
				
			||||||
	return NULL;
 | 
						return NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int swap_entry_free(struct swap_info_struct *p, swp_entry_t ent)
 | 
					static int swap_entry_free(struct swap_info_struct *p,
 | 
				
			||||||
 | 
								   swp_entry_t ent, int cache)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long offset = swp_offset(ent);
 | 
						unsigned long offset = swp_offset(ent);
 | 
				
			||||||
	int count = p->swap_map[offset];
 | 
						int count = swap_count(p->swap_map[offset]);
 | 
				
			||||||
 | 
						bool has_cache;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (count < SWAP_MAP_MAX) {
 | 
						has_cache = swap_has_cache(p->swap_map[offset]);
 | 
				
			||||||
		count--;
 | 
					
 | 
				
			||||||
		p->swap_map[offset] = count;
 | 
						if (cache == SWAP_MAP) { /* dropping usage count of swap */
 | 
				
			||||||
		if (!count) {
 | 
							if (count < SWAP_MAP_MAX) {
 | 
				
			||||||
			if (offset < p->lowest_bit)
 | 
								count--;
 | 
				
			||||||
				p->lowest_bit = offset;
 | 
								p->swap_map[offset] = encode_swapmap(count, has_cache);
 | 
				
			||||||
			if (offset > p->highest_bit)
 | 
					 | 
				
			||||||
				p->highest_bit = offset;
 | 
					 | 
				
			||||||
			if (p->prio > swap_info[swap_list.next].prio)
 | 
					 | 
				
			||||||
				swap_list.next = p - swap_info;
 | 
					 | 
				
			||||||
			nr_swap_pages++;
 | 
					 | 
				
			||||||
			p->inuse_pages--;
 | 
					 | 
				
			||||||
			mem_cgroup_uncharge_swap(ent);
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
						} else { /* dropping swap cache flag */
 | 
				
			||||||
 | 
							VM_BUG_ON(!has_cache);
 | 
				
			||||||
 | 
							p->swap_map[offset] = encode_swapmap(count, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						/* return code. */
 | 
				
			||||||
 | 
						count = p->swap_map[offset];
 | 
				
			||||||
 | 
						/* free if no reference */
 | 
				
			||||||
 | 
						if (!count) {
 | 
				
			||||||
 | 
							if (offset < p->lowest_bit)
 | 
				
			||||||
 | 
								p->lowest_bit = offset;
 | 
				
			||||||
 | 
							if (offset > p->highest_bit)
 | 
				
			||||||
 | 
								p->highest_bit = offset;
 | 
				
			||||||
 | 
							if (p->prio > swap_info[swap_list.next].prio)
 | 
				
			||||||
 | 
								swap_list.next = p - swap_info;
 | 
				
			||||||
 | 
							nr_swap_pages++;
 | 
				
			||||||
 | 
							p->inuse_pages--;
 | 
				
			||||||
 | 
							mem_cgroup_uncharge_swap(ent);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return count;
 | 
						return count;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -504,7 +551,7 @@ void swap_free(swp_entry_t entry)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	p = swap_info_get(entry);
 | 
						p = swap_info_get(entry);
 | 
				
			||||||
	if (p) {
 | 
						if (p) {
 | 
				
			||||||
		swap_entry_free(p, entry);
 | 
							swap_entry_free(p, entry, SWAP_MAP);
 | 
				
			||||||
		spin_unlock(&swap_lock);
 | 
							spin_unlock(&swap_lock);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -514,9 +561,16 @@ void swap_free(swp_entry_t entry)
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
void swapcache_free(swp_entry_t entry, struct page *page)
 | 
					void swapcache_free(swp_entry_t entry, struct page *page)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct swap_info_struct *p;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (page)
 | 
						if (page)
 | 
				
			||||||
		mem_cgroup_uncharge_swapcache(page, entry);
 | 
							mem_cgroup_uncharge_swapcache(page, entry);
 | 
				
			||||||
	return swap_free(entry);
 | 
						p = swap_info_get(entry);
 | 
				
			||||||
 | 
						if (p) {
 | 
				
			||||||
 | 
							swap_entry_free(p, entry, SWAP_CACHE);
 | 
				
			||||||
 | 
							spin_unlock(&swap_lock);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					@ -531,8 +585,7 @@ static inline int page_swapcount(struct page *page)
 | 
				
			||||||
	entry.val = page_private(page);
 | 
						entry.val = page_private(page);
 | 
				
			||||||
	p = swap_info_get(entry);
 | 
						p = swap_info_get(entry);
 | 
				
			||||||
	if (p) {
 | 
						if (p) {
 | 
				
			||||||
		/* Subtract the 1 for the swap cache itself */
 | 
							count = swap_count(p->swap_map[swp_offset(entry)]);
 | 
				
			||||||
		count = p->swap_map[swp_offset(entry)] - 1;
 | 
					 | 
				
			||||||
		spin_unlock(&swap_lock);
 | 
							spin_unlock(&swap_lock);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return count;
 | 
						return count;
 | 
				
			||||||
| 
						 | 
					@ -594,7 +647,7 @@ int free_swap_and_cache(swp_entry_t entry)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	p = swap_info_get(entry);
 | 
						p = swap_info_get(entry);
 | 
				
			||||||
	if (p) {
 | 
						if (p) {
 | 
				
			||||||
		if (swap_entry_free(p, entry) == 1) {
 | 
							if (swap_entry_free(p, entry, SWAP_MAP) == SWAP_HAS_CACHE) {
 | 
				
			||||||
			page = find_get_page(&swapper_space, entry.val);
 | 
								page = find_get_page(&swapper_space, entry.val);
 | 
				
			||||||
			if (page && !trylock_page(page)) {
 | 
								if (page && !trylock_page(page)) {
 | 
				
			||||||
				page_cache_release(page);
 | 
									page_cache_release(page);
 | 
				
			||||||
| 
						 | 
					@ -901,7 +954,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
 | 
				
			||||||
			i = 1;
 | 
								i = 1;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		count = si->swap_map[i];
 | 
							count = si->swap_map[i];
 | 
				
			||||||
		if (count && count != SWAP_MAP_BAD)
 | 
							if (count && swap_count(count) != SWAP_MAP_BAD)
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return i;
 | 
						return i;
 | 
				
			||||||
| 
						 | 
					@ -1005,13 +1058,13 @@ static int try_to_unuse(unsigned int type)
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		shmem = 0;
 | 
							shmem = 0;
 | 
				
			||||||
		swcount = *swap_map;
 | 
							swcount = *swap_map;
 | 
				
			||||||
		if (swcount > 1) {
 | 
							if (swap_count(swcount)) {
 | 
				
			||||||
			if (start_mm == &init_mm)
 | 
								if (start_mm == &init_mm)
 | 
				
			||||||
				shmem = shmem_unuse(entry, page);
 | 
									shmem = shmem_unuse(entry, page);
 | 
				
			||||||
			else
 | 
								else
 | 
				
			||||||
				retval = unuse_mm(start_mm, entry, page);
 | 
									retval = unuse_mm(start_mm, entry, page);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		if (*swap_map > 1) {
 | 
							if (swap_count(*swap_map)) {
 | 
				
			||||||
			int set_start_mm = (*swap_map >= swcount);
 | 
								int set_start_mm = (*swap_map >= swcount);
 | 
				
			||||||
			struct list_head *p = &start_mm->mmlist;
 | 
								struct list_head *p = &start_mm->mmlist;
 | 
				
			||||||
			struct mm_struct *new_start_mm = start_mm;
 | 
								struct mm_struct *new_start_mm = start_mm;
 | 
				
			||||||
| 
						 | 
					@ -1021,7 +1074,7 @@ static int try_to_unuse(unsigned int type)
 | 
				
			||||||
			atomic_inc(&new_start_mm->mm_users);
 | 
								atomic_inc(&new_start_mm->mm_users);
 | 
				
			||||||
			atomic_inc(&prev_mm->mm_users);
 | 
								atomic_inc(&prev_mm->mm_users);
 | 
				
			||||||
			spin_lock(&mmlist_lock);
 | 
								spin_lock(&mmlist_lock);
 | 
				
			||||||
			while (*swap_map > 1 && !retval && !shmem &&
 | 
								while (swap_count(*swap_map) && !retval && !shmem &&
 | 
				
			||||||
					(p = p->next) != &start_mm->mmlist) {
 | 
										(p = p->next) != &start_mm->mmlist) {
 | 
				
			||||||
				mm = list_entry(p, struct mm_struct, mmlist);
 | 
									mm = list_entry(p, struct mm_struct, mmlist);
 | 
				
			||||||
				if (!atomic_inc_not_zero(&mm->mm_users))
 | 
									if (!atomic_inc_not_zero(&mm->mm_users))
 | 
				
			||||||
| 
						 | 
					@ -1033,14 +1086,16 @@ static int try_to_unuse(unsigned int type)
 | 
				
			||||||
				cond_resched();
 | 
									cond_resched();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				swcount = *swap_map;
 | 
									swcount = *swap_map;
 | 
				
			||||||
				if (swcount <= 1)
 | 
									if (!swap_count(swcount)) /* any usage ? */
 | 
				
			||||||
					;
 | 
										;
 | 
				
			||||||
				else if (mm == &init_mm) {
 | 
									else if (mm == &init_mm) {
 | 
				
			||||||
					set_start_mm = 1;
 | 
										set_start_mm = 1;
 | 
				
			||||||
					shmem = shmem_unuse(entry, page);
 | 
										shmem = shmem_unuse(entry, page);
 | 
				
			||||||
				} else
 | 
									} else
 | 
				
			||||||
					retval = unuse_mm(mm, entry, page);
 | 
										retval = unuse_mm(mm, entry, page);
 | 
				
			||||||
				if (set_start_mm && *swap_map < swcount) {
 | 
					
 | 
				
			||||||
 | 
									if (set_start_mm &&
 | 
				
			||||||
 | 
									    swap_count(*swap_map) < swcount) {
 | 
				
			||||||
					mmput(new_start_mm);
 | 
										mmput(new_start_mm);
 | 
				
			||||||
					atomic_inc(&mm->mm_users);
 | 
										atomic_inc(&mm->mm_users);
 | 
				
			||||||
					new_start_mm = mm;
 | 
										new_start_mm = mm;
 | 
				
			||||||
| 
						 | 
					@ -1067,21 +1122,25 @@ static int try_to_unuse(unsigned int type)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * How could swap count reach 0x7fff when the maximum
 | 
							 * How could swap count reach 0x7ffe ?
 | 
				
			||||||
		 * pid is 0x7fff, and there's no way to repeat a swap
 | 
							 * There's no way to repeat a swap page within an mm
 | 
				
			||||||
		 * page within an mm (except in shmem, where it's the
 | 
							 * (except in shmem, where it's the shared object which takes
 | 
				
			||||||
		 * shared object which takes the reference count)?
 | 
							 * the reference count)?
 | 
				
			||||||
		 * We believe SWAP_MAP_MAX cannot occur in Linux 2.4.
 | 
							 * We believe SWAP_MAP_MAX cannot occur.(if occur, unsigned
 | 
				
			||||||
		 *
 | 
							 * short is too small....)
 | 
				
			||||||
		 * If that's wrong, then we should worry more about
 | 
							 * If that's wrong, then we should worry more about
 | 
				
			||||||
		 * exit_mmap() and do_munmap() cases described above:
 | 
							 * exit_mmap() and do_munmap() cases described above:
 | 
				
			||||||
		 * we might be resetting SWAP_MAP_MAX too early here.
 | 
							 * we might be resetting SWAP_MAP_MAX too early here.
 | 
				
			||||||
		 * We know "Undead"s can happen, they're okay, so don't
 | 
							 * We know "Undead"s can happen, they're okay, so don't
 | 
				
			||||||
		 * report them; but do report if we reset SWAP_MAP_MAX.
 | 
							 * report them; but do report if we reset SWAP_MAP_MAX.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		if (*swap_map == SWAP_MAP_MAX) {
 | 
							/* We might release the lock_page() in unuse_mm(). */
 | 
				
			||||||
 | 
							if (!PageSwapCache(page) || page_private(page) != entry.val)
 | 
				
			||||||
 | 
								goto retry;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (swap_count(*swap_map) == SWAP_MAP_MAX) {
 | 
				
			||||||
			spin_lock(&swap_lock);
 | 
								spin_lock(&swap_lock);
 | 
				
			||||||
			*swap_map = 1;
 | 
								*swap_map = encode_swapmap(0, true);
 | 
				
			||||||
			spin_unlock(&swap_lock);
 | 
								spin_unlock(&swap_lock);
 | 
				
			||||||
			reset_overflow = 1;
 | 
								reset_overflow = 1;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					@ -1099,7 +1158,8 @@ static int try_to_unuse(unsigned int type)
 | 
				
			||||||
		 * pages would be incorrect if swap supported "shared
 | 
							 * pages would be incorrect if swap supported "shared
 | 
				
			||||||
		 * private" pages, but they are handled by tmpfs files.
 | 
							 * private" pages, but they are handled by tmpfs files.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
 | 
							if (swap_count(*swap_map) &&
 | 
				
			||||||
 | 
							     PageDirty(page) && PageSwapCache(page)) {
 | 
				
			||||||
			struct writeback_control wbc = {
 | 
								struct writeback_control wbc = {
 | 
				
			||||||
				.sync_mode = WB_SYNC_NONE,
 | 
									.sync_mode = WB_SYNC_NONE,
 | 
				
			||||||
			};
 | 
								};
 | 
				
			||||||
| 
						 | 
					@ -1126,6 +1186,7 @@ static int try_to_unuse(unsigned int type)
 | 
				
			||||||
		 * mark page dirty so shrink_page_list will preserve it.
 | 
							 * mark page dirty so shrink_page_list will preserve it.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		SetPageDirty(page);
 | 
							SetPageDirty(page);
 | 
				
			||||||
 | 
					retry:
 | 
				
			||||||
		unlock_page(page);
 | 
							unlock_page(page);
 | 
				
			||||||
		page_cache_release(page);
 | 
							page_cache_release(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1952,15 +2013,23 @@ void si_swapinfo(struct sysinfo *val)
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as
 | 
					 * Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as
 | 
				
			||||||
 * "permanent", but will be reclaimed by the next swapoff.
 | 
					 * "permanent", but will be reclaimed by the next swapoff.
 | 
				
			||||||
 | 
					 * Returns error code in following case.
 | 
				
			||||||
 | 
					 * - success -> 0
 | 
				
			||||||
 | 
					 * - swp_entry is invalid -> EINVAL
 | 
				
			||||||
 | 
					 * - swp_entry is migration entry -> EINVAL
 | 
				
			||||||
 | 
					 * - swap-cache reference is requested but there is already one. -> EEXIST
 | 
				
			||||||
 | 
					 * - swap-cache reference is requested but the entry is not used. -> ENOENT
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
int swap_duplicate(swp_entry_t entry)
 | 
					static int __swap_duplicate(swp_entry_t entry, bool cache)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct swap_info_struct * p;
 | 
						struct swap_info_struct * p;
 | 
				
			||||||
	unsigned long offset, type;
 | 
						unsigned long offset, type;
 | 
				
			||||||
	int result = 0;
 | 
						int result = -EINVAL;
 | 
				
			||||||
 | 
						int count;
 | 
				
			||||||
 | 
						bool has_cache;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (is_migration_entry(entry))
 | 
						if (is_migration_entry(entry))
 | 
				
			||||||
		return 1;
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	type = swp_type(entry);
 | 
						type = swp_type(entry);
 | 
				
			||||||
	if (type >= nr_swapfiles)
 | 
						if (type >= nr_swapfiles)
 | 
				
			||||||
| 
						 | 
					@ -1969,17 +2038,40 @@ int swap_duplicate(swp_entry_t entry)
 | 
				
			||||||
	offset = swp_offset(entry);
 | 
						offset = swp_offset(entry);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&swap_lock);
 | 
						spin_lock(&swap_lock);
 | 
				
			||||||
	if (offset < p->max && p->swap_map[offset]) {
 | 
					
 | 
				
			||||||
		if (p->swap_map[offset] < SWAP_MAP_MAX - 1) {
 | 
						if (unlikely(offset >= p->max))
 | 
				
			||||||
			p->swap_map[offset]++;
 | 
							goto unlock_out;
 | 
				
			||||||
			result = 1;
 | 
					
 | 
				
			||||||
		} else if (p->swap_map[offset] <= SWAP_MAP_MAX) {
 | 
						count = swap_count(p->swap_map[offset]);
 | 
				
			||||||
 | 
						has_cache = swap_has_cache(p->swap_map[offset]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (cache == SWAP_CACHE) { /* called for swapcache/swapin-readahead */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* set SWAP_HAS_CACHE if there is no cache and entry is used */
 | 
				
			||||||
 | 
							if (!has_cache && count) {
 | 
				
			||||||
 | 
								p->swap_map[offset] = encode_swapmap(count, true);
 | 
				
			||||||
 | 
								result = 0;
 | 
				
			||||||
 | 
							} else if (has_cache) /* someone added cache */
 | 
				
			||||||
 | 
								result = -EEXIST;
 | 
				
			||||||
 | 
							else if (!count) /* no users */
 | 
				
			||||||
 | 
								result = -ENOENT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						} else if (count || has_cache) {
 | 
				
			||||||
 | 
							if (count < SWAP_MAP_MAX - 1) {
 | 
				
			||||||
 | 
								p->swap_map[offset] = encode_swapmap(count + 1,
 | 
				
			||||||
 | 
												     has_cache);
 | 
				
			||||||
 | 
								result = 0;
 | 
				
			||||||
 | 
							} else if (count <= SWAP_MAP_MAX) {
 | 
				
			||||||
			if (swap_overflow++ < 5)
 | 
								if (swap_overflow++ < 5)
 | 
				
			||||||
				printk(KERN_WARNING "swap_dup: swap entry overflow\n");
 | 
									printk(KERN_WARNING
 | 
				
			||||||
			p->swap_map[offset] = SWAP_MAP_MAX;
 | 
									       "swap_dup: swap entry overflow\n");
 | 
				
			||||||
			result = 1;
 | 
								p->swap_map[offset] = encode_swapmap(SWAP_MAP_MAX,
 | 
				
			||||||
 | 
												      has_cache);
 | 
				
			||||||
 | 
								result = 0;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						} else
 | 
				
			||||||
 | 
							result = -ENOENT; /* unused swap entry */
 | 
				
			||||||
 | 
					unlock_out:
 | 
				
			||||||
	spin_unlock(&swap_lock);
 | 
						spin_unlock(&swap_lock);
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	return result;
 | 
						return result;
 | 
				
			||||||
| 
						 | 
					@ -1988,13 +2080,25 @@ int swap_duplicate(swp_entry_t entry)
 | 
				
			||||||
	printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
 | 
						printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
 | 
				
			||||||
	goto out;
 | 
						goto out;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * increase reference count of swap entry by 1.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void swap_duplicate(swp_entry_t entry)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						__swap_duplicate(entry, SWAP_MAP);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 | 
					 * @entry: swap entry for which we allocate swap cache.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 * Called when allocating swap cache for exising swap entry,
 | 
					 * Called when allocating swap cache for exising swap entry,
 | 
				
			||||||
 | 
					 * This can return error codes. Returns 0 at success.
 | 
				
			||||||
 | 
					 * -EBUSY means there is a swap cache.
 | 
				
			||||||
 | 
					 * Note: return code is different from swap_duplicate().
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
int swapcache_prepare(swp_entry_t entry)
 | 
					int swapcache_prepare(swp_entry_t entry)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return swap_duplicate(entry);
 | 
						return __swap_duplicate(entry, SWAP_CACHE);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2035,7 +2139,7 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
 | 
				
			||||||
		/* Don't read in free or bad pages */
 | 
							/* Don't read in free or bad pages */
 | 
				
			||||||
		if (!si->swap_map[toff])
 | 
							if (!si->swap_map[toff])
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
		if (si->swap_map[toff] == SWAP_MAP_BAD)
 | 
							if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD)
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	/* Count contiguous allocated slots below our target */
 | 
						/* Count contiguous allocated slots below our target */
 | 
				
			||||||
| 
						 | 
					@ -2043,7 +2147,7 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
 | 
				
			||||||
		/* Don't read in free or bad pages */
 | 
							/* Don't read in free or bad pages */
 | 
				
			||||||
		if (!si->swap_map[toff])
 | 
							if (!si->swap_map[toff])
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
		if (si->swap_map[toff] == SWAP_MAP_BAD)
 | 
							if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD)
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	spin_unlock(&swap_lock);
 | 
						spin_unlock(&swap_lock);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue