forked from mirrors/linux
		
	mm: convert mm_lock_seq to a proper seqcount
Convert mm_lock_seq to be seqcount_t and change all mmap_write_lock variants to increment it, in-line with the usual seqcount usage pattern. This lets us check whether the mmap_lock is write-locked by checking mm_lock_seq.sequence counter (odd=locked, even=unlocked). This will be used when implementing mmap_lock speculation functions. As a result vm_lock_seq is also change to be unsigned to match the type of mm_lock_seq.sequence. Link: https://lkml.kernel.org/r/20241122174416.1367052-2-surenb@google.com Suggested-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Suren Baghdasaryan <surenb@google.com> Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Cc: Christian Brauner <brauner@kernel.org> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Hillf Danton <hdanton@sina.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jann Horn <jannh@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Mateusz Guzik <mjguzik@gmail.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Minchan Kim <minchan@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Pasha Tatashin <pasha.tatashin@soleen.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Peter Xu <peterx@redhat.com> Cc: Shakeel Butt <shakeel.butt@linux.dev> Cc: Sourav Panda <souravpanda@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Wei Yang <richard.weiyang@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									dba4761a3e
								
							
						
					
					
						commit
						e5e7fb278e
					
				
					 7 changed files with 74 additions and 57 deletions
				
			
		|  | @ -711,7 +711,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma) | ||||||
| 	 * we don't rely on for anything - the mm_lock_seq read against which we | 	 * we don't rely on for anything - the mm_lock_seq read against which we | ||||||
| 	 * need ordering is below. | 	 * need ordering is below. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq)) | 	if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence)) | ||||||
| 		return false; | 		return false; | ||||||
| 
 | 
 | ||||||
| 	if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0)) | 	if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0)) | ||||||
|  | @ -728,7 +728,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma) | ||||||
| 	 * after it has been unlocked. | 	 * after it has been unlocked. | ||||||
| 	 * This pairs with RELEASE semantics in vma_end_write_all(). | 	 * This pairs with RELEASE semantics in vma_end_write_all(). | ||||||
| 	 */ | 	 */ | ||||||
| 	if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) { | 	if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) { | ||||||
| 		up_read(&vma->vm_lock->lock); | 		up_read(&vma->vm_lock->lock); | ||||||
| 		return false; | 		return false; | ||||||
| 	} | 	} | ||||||
|  | @ -743,7 +743,7 @@ static inline void vma_end_read(struct vm_area_struct *vma) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* WARNING! Can only be used if mmap_lock is expected to be write-locked */ | /* WARNING! Can only be used if mmap_lock is expected to be write-locked */ | ||||||
| static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) | static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) | ||||||
| { | { | ||||||
| 	mmap_assert_write_locked(vma->vm_mm); | 	mmap_assert_write_locked(vma->vm_mm); | ||||||
| 
 | 
 | ||||||
|  | @ -751,7 +751,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) | ||||||
| 	 * current task is holding mmap_write_lock, both vma->vm_lock_seq and | 	 * current task is holding mmap_write_lock, both vma->vm_lock_seq and | ||||||
| 	 * mm->mm_lock_seq can't be concurrently modified. | 	 * mm->mm_lock_seq can't be concurrently modified. | ||||||
| 	 */ | 	 */ | ||||||
| 	*mm_lock_seq = vma->vm_mm->mm_lock_seq; | 	*mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence; | ||||||
| 	return (vma->vm_lock_seq == *mm_lock_seq); | 	return (vma->vm_lock_seq == *mm_lock_seq); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -762,7 +762,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) | ||||||
|  */ |  */ | ||||||
| static inline void vma_start_write(struct vm_area_struct *vma) | static inline void vma_start_write(struct vm_area_struct *vma) | ||||||
| { | { | ||||||
| 	int mm_lock_seq; | 	unsigned int mm_lock_seq; | ||||||
| 
 | 
 | ||||||
| 	if (__is_vma_write_locked(vma, &mm_lock_seq)) | 	if (__is_vma_write_locked(vma, &mm_lock_seq)) | ||||||
| 		return; | 		return; | ||||||
|  | @ -780,7 +780,7 @@ static inline void vma_start_write(struct vm_area_struct *vma) | ||||||
| 
 | 
 | ||||||
| static inline void vma_assert_write_locked(struct vm_area_struct *vma) | static inline void vma_assert_write_locked(struct vm_area_struct *vma) | ||||||
| { | { | ||||||
| 	int mm_lock_seq; | 	unsigned int mm_lock_seq; | ||||||
| 
 | 
 | ||||||
| 	VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); | 	VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -729,7 +729,7 @@ struct vm_area_struct { | ||||||
| 	 * counter reuse can only lead to occasional unnecessary use of the | 	 * counter reuse can only lead to occasional unnecessary use of the | ||||||
| 	 * slowpath. | 	 * slowpath. | ||||||
| 	 */ | 	 */ | ||||||
| 	int vm_lock_seq; | 	unsigned int vm_lock_seq; | ||||||
| 	/* Unstable RCU readers are allowed to read this. */ | 	/* Unstable RCU readers are allowed to read this. */ | ||||||
| 	struct vma_lock *vm_lock; | 	struct vma_lock *vm_lock; | ||||||
| #endif | #endif | ||||||
|  | @ -923,6 +923,9 @@ struct mm_struct { | ||||||
| 		 * Roughly speaking, incrementing the sequence number is | 		 * Roughly speaking, incrementing the sequence number is | ||||||
| 		 * equivalent to releasing locks on VMAs; reading the sequence | 		 * equivalent to releasing locks on VMAs; reading the sequence | ||||||
| 		 * number can be part of taking a read lock on a VMA. | 		 * number can be part of taking a read lock on a VMA. | ||||||
|  | 		 * Incremented every time mmap_lock is write-locked/unlocked. | ||||||
|  | 		 * Initialized to 0, therefore odd values indicate mmap_lock | ||||||
|  | 		 * is write-locked and even values that it's released. | ||||||
| 		 * | 		 * | ||||||
| 		 * Can be modified under write mmap_lock using RELEASE | 		 * Can be modified under write mmap_lock using RELEASE | ||||||
| 		 * semantics. | 		 * semantics. | ||||||
|  | @ -931,7 +934,7 @@ struct mm_struct { | ||||||
| 		 * Can be read with ACQUIRE semantics if not holding write | 		 * Can be read with ACQUIRE semantics if not holding write | ||||||
| 		 * mmap_lock. | 		 * mmap_lock. | ||||||
| 		 */ | 		 */ | ||||||
| 		int mm_lock_seq; | 		seqcount_t mm_lock_seq; | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -71,6 +71,62 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_PER_VMA_LOCK | #ifdef CONFIG_PER_VMA_LOCK | ||||||
|  | static inline void mm_lock_seqcount_init(struct mm_struct *mm) | ||||||
|  | { | ||||||
|  | 	seqcount_init(&mm->mm_lock_seq); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void mm_lock_seqcount_begin(struct mm_struct *mm) | ||||||
|  | { | ||||||
|  | 	do_raw_write_seqcount_begin(&mm->mm_lock_seq); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void mm_lock_seqcount_end(struct mm_struct *mm) | ||||||
|  | { | ||||||
|  | 	ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq); | ||||||
|  | 	do_raw_write_seqcount_end(&mm->mm_lock_seq); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #else | ||||||
|  | static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} | ||||||
|  | static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {} | ||||||
|  | static inline void mm_lock_seqcount_end(struct mm_struct *mm) {} | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | static inline void mmap_init_lock(struct mm_struct *mm) | ||||||
|  | { | ||||||
|  | 	init_rwsem(&mm->mmap_lock); | ||||||
|  | 	mm_lock_seqcount_init(mm); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void mmap_write_lock(struct mm_struct *mm) | ||||||
|  | { | ||||||
|  | 	__mmap_lock_trace_start_locking(mm, true); | ||||||
|  | 	down_write(&mm->mmap_lock); | ||||||
|  | 	mm_lock_seqcount_begin(mm); | ||||||
|  | 	__mmap_lock_trace_acquire_returned(mm, true, true); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) | ||||||
|  | { | ||||||
|  | 	__mmap_lock_trace_start_locking(mm, true); | ||||||
|  | 	down_write_nested(&mm->mmap_lock, subclass); | ||||||
|  | 	mm_lock_seqcount_begin(mm); | ||||||
|  | 	__mmap_lock_trace_acquire_returned(mm, true, true); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline int mmap_write_lock_killable(struct mm_struct *mm) | ||||||
|  | { | ||||||
|  | 	int ret; | ||||||
|  | 
 | ||||||
|  | 	__mmap_lock_trace_start_locking(mm, true); | ||||||
|  | 	ret = down_write_killable(&mm->mmap_lock); | ||||||
|  | 	if (!ret) | ||||||
|  | 		mm_lock_seqcount_begin(mm); | ||||||
|  | 	__mmap_lock_trace_acquire_returned(mm, true, ret == 0); | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Drop all currently-held per-VMA locks. |  * Drop all currently-held per-VMA locks. | ||||||
|  * This is called from the mmap_lock implementation directly before releasing |  * This is called from the mmap_lock implementation directly before releasing | ||||||
|  | @ -82,46 +138,7 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm) | ||||||
| static inline void vma_end_write_all(struct mm_struct *mm) | static inline void vma_end_write_all(struct mm_struct *mm) | ||||||
| { | { | ||||||
| 	mmap_assert_write_locked(mm); | 	mmap_assert_write_locked(mm); | ||||||
| 	/*
 | 	mm_lock_seqcount_end(mm); | ||||||
| 	 * Nobody can concurrently modify mm->mm_lock_seq due to exclusive |  | ||||||
| 	 * mmap_lock being held. |  | ||||||
| 	 * We need RELEASE semantics here to ensure that preceding stores into |  | ||||||
| 	 * the VMA take effect before we unlock it with this store. |  | ||||||
| 	 * Pairs with ACQUIRE semantics in vma_start_read(). |  | ||||||
| 	 */ |  | ||||||
| 	smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1); |  | ||||||
| } |  | ||||||
| #else |  | ||||||
| static inline void vma_end_write_all(struct mm_struct *mm) {} |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
| static inline void mmap_init_lock(struct mm_struct *mm) |  | ||||||
| { |  | ||||||
| 	init_rwsem(&mm->mmap_lock); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline void mmap_write_lock(struct mm_struct *mm) |  | ||||||
| { |  | ||||||
| 	__mmap_lock_trace_start_locking(mm, true); |  | ||||||
| 	down_write(&mm->mmap_lock); |  | ||||||
| 	__mmap_lock_trace_acquire_returned(mm, true, true); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) |  | ||||||
| { |  | ||||||
| 	__mmap_lock_trace_start_locking(mm, true); |  | ||||||
| 	down_write_nested(&mm->mmap_lock, subclass); |  | ||||||
| 	__mmap_lock_trace_acquire_returned(mm, true, true); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline int mmap_write_lock_killable(struct mm_struct *mm) |  | ||||||
| { |  | ||||||
| 	int ret; |  | ||||||
| 
 |  | ||||||
| 	__mmap_lock_trace_start_locking(mm, true); |  | ||||||
| 	ret = down_write_killable(&mm->mmap_lock); |  | ||||||
| 	__mmap_lock_trace_acquire_returned(mm, true, ret == 0); |  | ||||||
| 	return ret; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void mmap_write_unlock(struct mm_struct *mm) | static inline void mmap_write_unlock(struct mm_struct *mm) | ||||||
|  |  | ||||||
|  | @ -448,7 +448,7 @@ static bool vma_lock_alloc(struct vm_area_struct *vma) | ||||||
| 		return false; | 		return false; | ||||||
| 
 | 
 | ||||||
| 	init_rwsem(&vma->vm_lock->lock); | 	init_rwsem(&vma->vm_lock->lock); | ||||||
| 	vma->vm_lock_seq = -1; | 	vma->vm_lock_seq = UINT_MAX; | ||||||
| 
 | 
 | ||||||
| 	return true; | 	return true; | ||||||
| } | } | ||||||
|  | @ -1262,9 +1262,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, | ||||||
| 	seqcount_init(&mm->write_protect_seq); | 	seqcount_init(&mm->write_protect_seq); | ||||||
| 	mmap_init_lock(mm); | 	mmap_init_lock(mm); | ||||||
| 	INIT_LIST_HEAD(&mm->mmlist); | 	INIT_LIST_HEAD(&mm->mmlist); | ||||||
| #ifdef CONFIG_PER_VMA_LOCK |  | ||||||
| 	mm->mm_lock_seq = 0; |  | ||||||
| #endif |  | ||||||
| 	mm_pgtables_bytes_init(mm); | 	mm_pgtables_bytes_init(mm); | ||||||
| 	mm->map_count = 0; | 	mm->map_count = 0; | ||||||
| 	mm->locked_vm = 0; | 	mm->locked_vm = 0; | ||||||
|  |  | ||||||
|  | @ -40,7 +40,7 @@ struct mm_struct init_mm = { | ||||||
| 	.arg_lock	=  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock), | 	.arg_lock	=  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock), | ||||||
| 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist), | 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist), | ||||||
| #ifdef CONFIG_PER_VMA_LOCK | #ifdef CONFIG_PER_VMA_LOCK | ||||||
| 	.mm_lock_seq	= 0, | 	.mm_lock_seq	= SEQCNT_ZERO(init_mm.mm_lock_seq), | ||||||
| #endif | #endif | ||||||
| 	.user_ns	= &init_user_ns, | 	.user_ns	= &init_user_ns, | ||||||
| 	.cpu_bitmap	= CPU_BITS_NONE, | 	.cpu_bitmap	= CPU_BITS_NONE, | ||||||
|  |  | ||||||
|  | @ -100,7 +100,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, | ||||||
| 	 * begun. Linking to the tree will have caused this to be incremented, | 	 * begun. Linking to the tree will have caused this to be incremented, | ||||||
| 	 * which means we will get a false positive otherwise. | 	 * which means we will get a false positive otherwise. | ||||||
| 	 */ | 	 */ | ||||||
| 	vma->vm_lock_seq = -1; | 	vma->vm_lock_seq = UINT_MAX; | ||||||
| 
 | 
 | ||||||
| 	return vma; | 	return vma; | ||||||
| } | } | ||||||
|  | @ -225,7 +225,7 @@ static bool vma_write_started(struct vm_area_struct *vma) | ||||||
| 	int seq = vma->vm_lock_seq; | 	int seq = vma->vm_lock_seq; | ||||||
| 
 | 
 | ||||||
| 	/* We reset after each check. */ | 	/* We reset after each check. */ | ||||||
| 	vma->vm_lock_seq = -1; | 	vma->vm_lock_seq = UINT_MAX; | ||||||
| 
 | 
 | ||||||
| 	/* The vma_start_write() stub simply increments this value. */ | 	/* The vma_start_write() stub simply increments this value. */ | ||||||
| 	return seq > -1; | 	return seq > -1; | ||||||
|  |  | ||||||
|  | @ -281,7 +281,7 @@ struct vm_area_struct { | ||||||
| 	 * counter reuse can only lead to occasional unnecessary use of the | 	 * counter reuse can only lead to occasional unnecessary use of the | ||||||
| 	 * slowpath. | 	 * slowpath. | ||||||
| 	 */ | 	 */ | ||||||
| 	int vm_lock_seq; | 	unsigned int vm_lock_seq; | ||||||
| 	struct vma_lock *vm_lock; | 	struct vma_lock *vm_lock; | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | @ -467,7 +467,7 @@ static inline bool vma_lock_alloc(struct vm_area_struct *vma) | ||||||
| 		return false; | 		return false; | ||||||
| 
 | 
 | ||||||
| 	init_rwsem(&vma->vm_lock->lock); | 	init_rwsem(&vma->vm_lock->lock); | ||||||
| 	vma->vm_lock_seq = -1; | 	vma->vm_lock_seq = UINT_MAX; | ||||||
| 
 | 
 | ||||||
| 	return true; | 	return true; | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Suren Baghdasaryan
						Suren Baghdasaryan