forked from mirrors/linux
		
	perf: Fix perf ring buffer memory ordering
The PPC64 people noticed a missing memory barrier and crufty old comments in the perf ring buffer code. So update all the comments and add the missing barrier. When the architecture implements local_t using atomic_long_t there will be double barriers issued; but short of introducing more conditional barrier primitives this is the best we can do. Reported-by: Victor Kaplansky <victork@il.ibm.com> Tested-by: Victor Kaplansky <victork@il.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> Cc: michael@ellerman.id.au Cc: Paul McKenney <paulmck@linux.vnet.ibm.com> Cc: Michael Neuling <mikey@neuling.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: anton@samba.org Cc: benh@kernel.crashing.org Link: http://lkml.kernel.org/r/20131025173749.GG19466@laptop.lan Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
							parent
							
								
									cd65718712
								
							
						
					
					
						commit
						bf378d341e
					
				
					 2 changed files with 34 additions and 9 deletions
				
			
		|  | @ -456,13 +456,15 @@ struct perf_event_mmap_page { | |||
| 	/*
 | ||||
| 	 * Control data for the mmap() data buffer. | ||||
| 	 * | ||||
| 	 * User-space reading the @data_head value should issue an rmb(), on | ||||
| 	 * SMP capable platforms, after reading this value -- see | ||||
| 	 * perf_event_wakeup(). | ||||
| 	 * User-space reading the @data_head value should issue an smp_rmb(), | ||||
| 	 * after reading this value. | ||||
| 	 * | ||||
| 	 * When the mapping is PROT_WRITE the @data_tail value should be | ||||
| 	 * written by userspace to reflect the last read data. In this case | ||||
| 	 * the kernel will not over-write unread data. | ||||
| 	 * written by userspace to reflect the last read data, after issueing | ||||
| 	 * an smp_mb() to separate the data read from the ->data_tail store. | ||||
| 	 * In this case the kernel will not over-write unread data. | ||||
| 	 * | ||||
| 	 * See perf_output_put_handle() for the data ordering. | ||||
| 	 */ | ||||
| 	__u64   data_head;		/* head in the data section */ | ||||
| 	__u64	data_tail;		/* user-space written tail */ | ||||
|  |  | |||
|  | @ -87,10 +87,31 @@ static void perf_output_put_handle(struct perf_output_handle *handle) | |||
| 		goto out; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Publish the known good head. Rely on the full barrier implied | ||||
| 	 * by atomic_dec_and_test() order the rb->head read and this | ||||
| 	 * write. | ||||
| 	 * Since the mmap() consumer (userspace) can run on a different CPU: | ||||
| 	 * | ||||
| 	 *   kernel				user | ||||
| 	 * | ||||
| 	 *   READ ->data_tail			READ ->data_head | ||||
| 	 *   smp_mb()	(A)			smp_rmb()	(C) | ||||
| 	 *   WRITE $data			READ $data | ||||
| 	 *   smp_wmb()	(B)			smp_mb()	(D) | ||||
| 	 *   STORE ->data_head			WRITE ->data_tail | ||||
| 	 * | ||||
| 	 * Where A pairs with D, and B pairs with C. | ||||
| 	 * | ||||
| 	 * I don't think A needs to be a full barrier because we won't in fact | ||||
| 	 * write data until we see the store from userspace. So we simply don't | ||||
| 	 * issue the data WRITE until we observe it. Be conservative for now. | ||||
| 	 * | ||||
| 	 * OTOH, D needs to be a full barrier since it separates the data READ | ||||
| 	 * from the tail WRITE. | ||||
| 	 * | ||||
| 	 * For B a WMB is sufficient since it separates two WRITEs, and for C | ||||
| 	 * an RMB is sufficient since it separates two READs. | ||||
| 	 * | ||||
| 	 * See perf_output_begin(). | ||||
| 	 */ | ||||
| 	smp_wmb(); | ||||
| 	rb->user_page->data_head = head; | ||||
| 
 | ||||
| 	/*
 | ||||
|  | @ -154,9 +175,11 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
| 		 * Userspace could choose to issue a mb() before updating the | ||||
| 		 * tail pointer. So that all reads will be completed before the | ||||
| 		 * write is issued. | ||||
| 		 * | ||||
| 		 * See perf_output_put_handle(). | ||||
| 		 */ | ||||
| 		tail = ACCESS_ONCE(rb->user_page->data_tail); | ||||
| 		smp_rmb(); | ||||
| 		smp_mb(); | ||||
| 		offset = head = local_read(&rb->head); | ||||
| 		head += size; | ||||
| 		if (unlikely(!perf_output_space(rb, tail, offset, head))) | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Peter Zijlstra
						Peter Zijlstra