forked from mirrors/linux
		
	hugetlbfs: extend hugetlb_vma_lock to private VMAs
Extend the locking scheme used to protect shared hugetlb mappings from
truncate vs page fault races, in order to protect private hugetlb mappings
(with resv_map) against MADV_DONTNEED.
Add a read-write semaphore to the resv_map data structure, and use that
from the hugetlb_vma_(un)lock_* functions, in preparation for closing the
race between MADV_DONTNEED and page faults.
Link: https://lkml.kernel.org/r/20231006040020.3677377-3-riel@surriel.com
Fixes: 04ada095dc ("hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing")
Signed-off-by: Rik van Riel <riel@surriel.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									92fe9dcbe4
								
							
						
					
					
						commit
						bf4916922c
					
				
					 2 changed files with 43 additions and 4 deletions
				
			
		|  | @ -60,6 +60,7 @@ struct resv_map { | ||||||
| 	long adds_in_progress; | 	long adds_in_progress; | ||||||
| 	struct list_head region_cache; | 	struct list_head region_cache; | ||||||
| 	long region_cache_count; | 	long region_cache_count; | ||||||
|  | 	struct rw_semaphore rw_sema; | ||||||
| #ifdef CONFIG_CGROUP_HUGETLB | #ifdef CONFIG_CGROUP_HUGETLB | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * On private mappings, the counter to uncharge reservations is stored | 	 * On private mappings, the counter to uncharge reservations is stored | ||||||
|  | @ -1233,6 +1234,11 @@ static inline bool __vma_shareable_lock(struct vm_area_struct *vma) | ||||||
| 	return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; | 	return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline bool __vma_private_lock(struct vm_area_struct *vma) | ||||||
|  | { | ||||||
|  | 	return (!(vma->vm_flags & VM_MAYSHARE)) && vma->vm_private_data; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Safe version of huge_pte_offset() to check the locks.  See comments |  * Safe version of huge_pte_offset() to check the locks.  See comments | ||||||
|  * above huge_pte_offset(). |  * above huge_pte_offset(). | ||||||
|  |  | ||||||
							
								
								
									
										41
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							
							
						
						
									
										41
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							|  | @ -97,6 +97,7 @@ static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); | ||||||
| static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); | static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); | ||||||
| static void hugetlb_unshare_pmds(struct vm_area_struct *vma, | static void hugetlb_unshare_pmds(struct vm_area_struct *vma, | ||||||
| 		unsigned long start, unsigned long end); | 		unsigned long start, unsigned long end); | ||||||
|  | static struct resv_map *vma_resv_map(struct vm_area_struct *vma); | ||||||
| 
 | 
 | ||||||
| static inline bool subpool_is_free(struct hugepage_subpool *spool) | static inline bool subpool_is_free(struct hugepage_subpool *spool) | ||||||
| { | { | ||||||
|  | @ -267,6 +268,10 @@ void hugetlb_vma_lock_read(struct vm_area_struct *vma) | ||||||
| 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; | 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; | ||||||
| 
 | 
 | ||||||
| 		down_read(&vma_lock->rw_sema); | 		down_read(&vma_lock->rw_sema); | ||||||
|  | 	} else if (__vma_private_lock(vma)) { | ||||||
|  | 		struct resv_map *resv_map = vma_resv_map(vma); | ||||||
|  | 
 | ||||||
|  | 		down_read(&resv_map->rw_sema); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -276,6 +281,10 @@ void hugetlb_vma_unlock_read(struct vm_area_struct *vma) | ||||||
| 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; | 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; | ||||||
| 
 | 
 | ||||||
| 		up_read(&vma_lock->rw_sema); | 		up_read(&vma_lock->rw_sema); | ||||||
|  | 	} else if (__vma_private_lock(vma)) { | ||||||
|  | 		struct resv_map *resv_map = vma_resv_map(vma); | ||||||
|  | 
 | ||||||
|  | 		up_read(&resv_map->rw_sema); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -285,6 +294,10 @@ void hugetlb_vma_lock_write(struct vm_area_struct *vma) | ||||||
| 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; | 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; | ||||||
| 
 | 
 | ||||||
| 		down_write(&vma_lock->rw_sema); | 		down_write(&vma_lock->rw_sema); | ||||||
|  | 	} else if (__vma_private_lock(vma)) { | ||||||
|  | 		struct resv_map *resv_map = vma_resv_map(vma); | ||||||
|  | 
 | ||||||
|  | 		down_write(&resv_map->rw_sema); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -294,17 +307,27 @@ void hugetlb_vma_unlock_write(struct vm_area_struct *vma) | ||||||
| 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; | 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; | ||||||
| 
 | 
 | ||||||
| 		up_write(&vma_lock->rw_sema); | 		up_write(&vma_lock->rw_sema); | ||||||
|  | 	} else if (__vma_private_lock(vma)) { | ||||||
|  | 		struct resv_map *resv_map = vma_resv_map(vma); | ||||||
|  | 
 | ||||||
|  | 		up_write(&resv_map->rw_sema); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int hugetlb_vma_trylock_write(struct vm_area_struct *vma) | int hugetlb_vma_trylock_write(struct vm_area_struct *vma) | ||||||
| { | { | ||||||
| 	struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; |  | ||||||
| 
 | 
 | ||||||
| 	if (!__vma_shareable_lock(vma)) | 	if (__vma_shareable_lock(vma)) { | ||||||
| 		return 1; | 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; | ||||||
| 
 | 
 | ||||||
| 	return down_write_trylock(&vma_lock->rw_sema); | 		return down_write_trylock(&vma_lock->rw_sema); | ||||||
|  | 	} else if (__vma_private_lock(vma)) { | ||||||
|  | 		struct resv_map *resv_map = vma_resv_map(vma); | ||||||
|  | 
 | ||||||
|  | 		return down_write_trylock(&resv_map->rw_sema); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 1; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void hugetlb_vma_assert_locked(struct vm_area_struct *vma) | void hugetlb_vma_assert_locked(struct vm_area_struct *vma) | ||||||
|  | @ -313,6 +336,10 @@ void hugetlb_vma_assert_locked(struct vm_area_struct *vma) | ||||||
| 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; | 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; | ||||||
| 
 | 
 | ||||||
| 		lockdep_assert_held(&vma_lock->rw_sema); | 		lockdep_assert_held(&vma_lock->rw_sema); | ||||||
|  | 	} else if (__vma_private_lock(vma)) { | ||||||
|  | 		struct resv_map *resv_map = vma_resv_map(vma); | ||||||
|  | 
 | ||||||
|  | 		lockdep_assert_held(&resv_map->rw_sema); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -345,6 +372,11 @@ static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma) | ||||||
| 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; | 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; | ||||||
| 
 | 
 | ||||||
| 		__hugetlb_vma_unlock_write_put(vma_lock); | 		__hugetlb_vma_unlock_write_put(vma_lock); | ||||||
|  | 	} else if (__vma_private_lock(vma)) { | ||||||
|  | 		struct resv_map *resv_map = vma_resv_map(vma); | ||||||
|  | 
 | ||||||
|  | 		/* no free for anon vmas, but still need to unlock */ | ||||||
|  | 		up_write(&resv_map->rw_sema); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1068,6 +1100,7 @@ struct resv_map *resv_map_alloc(void) | ||||||
| 	kref_init(&resv_map->refs); | 	kref_init(&resv_map->refs); | ||||||
| 	spin_lock_init(&resv_map->lock); | 	spin_lock_init(&resv_map->lock); | ||||||
| 	INIT_LIST_HEAD(&resv_map->regions); | 	INIT_LIST_HEAD(&resv_map->regions); | ||||||
|  | 	init_rwsem(&resv_map->rw_sema); | ||||||
| 
 | 
 | ||||||
| 	resv_map->adds_in_progress = 0; | 	resv_map->adds_in_progress = 0; | ||||||
| 	/*
 | 	/*
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Rik van Riel
						Rik van Riel