forked from mirrors/linux
		
	perf: Fix perf ring buffer memory ordering
The PPC64 people noticed a missing memory barrier and crufty old comments in the perf ring buffer code. So update all the comments and add the missing barrier. When the architecture implements local_t using atomic_long_t there will be double barriers issued; but short of introducing more conditional barrier primitives this is the best we can do. Reported-by: Victor Kaplansky <victork@il.ibm.com> Tested-by: Victor Kaplansky <victork@il.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> Cc: michael@ellerman.id.au Cc: Paul McKenney <paulmck@linux.vnet.ibm.com> Cc: Michael Neuling <mikey@neuling.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: anton@samba.org Cc: benh@kernel.crashing.org Link: http://lkml.kernel.org/r/20131025173749.GG19466@laptop.lan Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
							parent
							
								
									cd65718712
								
							
						
					
					
						commit
						bf378d341e
					
				
					 2 changed files with 34 additions and 9 deletions
				
			
		|  | @ -456,13 +456,15 @@ struct perf_event_mmap_page { | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Control data for the mmap() data buffer. | 	 * Control data for the mmap() data buffer. | ||||||
| 	 * | 	 * | ||||||
| 	 * User-space reading the @data_head value should issue an rmb(), on | 	 * User-space reading the @data_head value should issue an smp_rmb(), | ||||||
| 	 * SMP capable platforms, after reading this value -- see | 	 * after reading this value. | ||||||
| 	 * perf_event_wakeup(). |  | ||||||
| 	 * | 	 * | ||||||
| 	 * When the mapping is PROT_WRITE the @data_tail value should be | 	 * When the mapping is PROT_WRITE the @data_tail value should be | ||||||
| 	 * written by userspace to reflect the last read data. In this case | 	 * written by userspace to reflect the last read data, after issueing | ||||||
| 	 * the kernel will not over-write unread data. | 	 * an smp_mb() to separate the data read from the ->data_tail store. | ||||||
|  | 	 * In this case the kernel will not over-write unread data. | ||||||
|  | 	 * | ||||||
|  | 	 * See perf_output_put_handle() for the data ordering. | ||||||
| 	 */ | 	 */ | ||||||
| 	__u64   data_head;		/* head in the data section */ | 	__u64   data_head;		/* head in the data section */ | ||||||
| 	__u64	data_tail;		/* user-space written tail */ | 	__u64	data_tail;		/* user-space written tail */ | ||||||
|  |  | ||||||
|  | @ -87,10 +87,31 @@ static void perf_output_put_handle(struct perf_output_handle *handle) | ||||||
| 		goto out; | 		goto out; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Publish the known good head. Rely on the full barrier implied | 	 * Since the mmap() consumer (userspace) can run on a different CPU: | ||||||
| 	 * by atomic_dec_and_test() order the rb->head read and this | 	 * | ||||||
| 	 * write. | 	 *   kernel				user | ||||||
|  | 	 * | ||||||
|  | 	 *   READ ->data_tail			READ ->data_head | ||||||
|  | 	 *   smp_mb()	(A)			smp_rmb()	(C) | ||||||
|  | 	 *   WRITE $data			READ $data | ||||||
|  | 	 *   smp_wmb()	(B)			smp_mb()	(D) | ||||||
|  | 	 *   STORE ->data_head			WRITE ->data_tail | ||||||
|  | 	 * | ||||||
|  | 	 * Where A pairs with D, and B pairs with C. | ||||||
|  | 	 * | ||||||
|  | 	 * I don't think A needs to be a full barrier because we won't in fact | ||||||
|  | 	 * write data until we see the store from userspace. So we simply don't | ||||||
|  | 	 * issue the data WRITE until we observe it. Be conservative for now. | ||||||
|  | 	 * | ||||||
|  | 	 * OTOH, D needs to be a full barrier since it separates the data READ | ||||||
|  | 	 * from the tail WRITE. | ||||||
|  | 	 * | ||||||
|  | 	 * For B a WMB is sufficient since it separates two WRITEs, and for C | ||||||
|  | 	 * an RMB is sufficient since it separates two READs. | ||||||
|  | 	 * | ||||||
|  | 	 * See perf_output_begin(). | ||||||
| 	 */ | 	 */ | ||||||
|  | 	smp_wmb(); | ||||||
| 	rb->user_page->data_head = head; | 	rb->user_page->data_head = head; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
|  | @ -154,9 +175,11 @@ int perf_output_begin(struct perf_output_handle *handle, | ||||||
| 		 * Userspace could choose to issue a mb() before updating the | 		 * Userspace could choose to issue a mb() before updating the | ||||||
| 		 * tail pointer. So that all reads will be completed before the | 		 * tail pointer. So that all reads will be completed before the | ||||||
| 		 * write is issued. | 		 * write is issued. | ||||||
|  | 		 * | ||||||
|  | 		 * See perf_output_put_handle(). | ||||||
| 		 */ | 		 */ | ||||||
| 		tail = ACCESS_ONCE(rb->user_page->data_tail); | 		tail = ACCESS_ONCE(rb->user_page->data_tail); | ||||||
| 		smp_rmb(); | 		smp_mb(); | ||||||
| 		offset = head = local_read(&rb->head); | 		offset = head = local_read(&rb->head); | ||||||
| 		head += size; | 		head += size; | ||||||
| 		if (unlikely(!perf_output_space(rb, tail, offset, head))) | 		if (unlikely(!perf_output_space(rb, tail, offset, head))) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Peter Zijlstra
						Peter Zijlstra