mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	ring-buffer: Fix recursion protection transitions between interrupt context
The recursion protection of the ring buffer depends on preempt_count() to be
correct. But it is possible that the ring buffer gets called after an
interrupt comes in but before it updates the preempt_count(). This will
trigger a false positive in the recursion code.
Use the same trick from the ftrace function callback recursion code which
uses a "transition" bit that gets set, to allow for a single recursion for
to handle transitions between contexts.
Cc: stable@vger.kernel.org
Fixes: 567cd4da54 ("ring-buffer: User context bit recursion checking")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
			
			
This commit is contained in:
		
							parent
							
								
									906695e593
								
							
						
					
					
						commit
						b02414c8f0
					
				
					 1 changed files with 46 additions and 12 deletions
				
			
		| 
						 | 
					@ -438,14 +438,16 @@ enum {
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Used for which event context the event is in.
 | 
					 * Used for which event context the event is in.
 | 
				
			||||||
 *  NMI     = 0
 | 
					 *  TRANSITION = 0
 | 
				
			||||||
 *  IRQ     = 1
 | 
					 *  NMI     = 1
 | 
				
			||||||
 *  SOFTIRQ = 2
 | 
					 *  IRQ     = 2
 | 
				
			||||||
 *  NORMAL  = 3
 | 
					 *  SOFTIRQ = 3
 | 
				
			||||||
 | 
					 *  NORMAL  = 4
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * See trace_recursive_lock() comment below for more details.
 | 
					 * See trace_recursive_lock() comment below for more details.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
enum {
 | 
					enum {
 | 
				
			||||||
 | 
						RB_CTX_TRANSITION,
 | 
				
			||||||
	RB_CTX_NMI,
 | 
						RB_CTX_NMI,
 | 
				
			||||||
	RB_CTX_IRQ,
 | 
						RB_CTX_IRQ,
 | 
				
			||||||
	RB_CTX_SOFTIRQ,
 | 
						RB_CTX_SOFTIRQ,
 | 
				
			||||||
| 
						 | 
					@ -3014,10 +3016,10 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
 | 
				
			||||||
 * a bit of overhead in something as critical as function tracing,
 | 
					 * a bit of overhead in something as critical as function tracing,
 | 
				
			||||||
 * we use a bitmask trick.
 | 
					 * we use a bitmask trick.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *  bit 0 =  NMI context
 | 
					 *  bit 1 =  NMI context
 | 
				
			||||||
 *  bit 1 =  IRQ context
 | 
					 *  bit 2 =  IRQ context
 | 
				
			||||||
 *  bit 2 =  SoftIRQ context
 | 
					 *  bit 3 =  SoftIRQ context
 | 
				
			||||||
 *  bit 3 =  normal context.
 | 
					 *  bit 4 =  normal context.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * This works because this is the order of contexts that can
 | 
					 * This works because this is the order of contexts that can
 | 
				
			||||||
 * preempt other contexts. A SoftIRQ never preempts an IRQ
 | 
					 * preempt other contexts. A SoftIRQ never preempts an IRQ
 | 
				
			||||||
| 
						 | 
					@ -3040,6 +3042,30 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
 | 
				
			||||||
 * The least significant bit can be cleared this way, and it
 | 
					 * The least significant bit can be cleared this way, and it
 | 
				
			||||||
 * just so happens that it is the same bit corresponding to
 | 
					 * just so happens that it is the same bit corresponding to
 | 
				
			||||||
 * the current context.
 | 
					 * the current context.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit
 | 
				
			||||||
 | 
					 * is set when a recursion is detected at the current context, and if
 | 
				
			||||||
 | 
					 * the TRANSITION bit is already set, it will fail the recursion.
 | 
				
			||||||
 | 
					 * This is needed because there's a lag between the changing of
 | 
				
			||||||
 | 
					 * interrupt context and updating the preempt count. In this case,
 | 
				
			||||||
 | 
					 * a false positive will be found. To handle this, one extra recursion
 | 
				
			||||||
 | 
					 * is allowed, and this is done by the TRANSITION bit. If the TRANSITION
 | 
				
			||||||
 | 
					 * bit is already set, then it is considered a recursion and the function
 | 
				
			||||||
 | 
					 * ends. Otherwise, the TRANSITION bit is set, and that bit is returned.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * On the trace_recursive_unlock(), the TRANSITION bit will be the first
 | 
				
			||||||
 | 
					 * to be cleared. Even if it wasn't the context that set it. That is,
 | 
				
			||||||
 | 
					 * if an interrupt comes in while NORMAL bit is set and the ring buffer
 | 
				
			||||||
 | 
					 * is called before preempt_count() is updated, since the check will
 | 
				
			||||||
 | 
					 * be on the NORMAL bit, the TRANSITION bit will then be set. If an
 | 
				
			||||||
 | 
					 * NMI then comes in, it will set the NMI bit, but when the NMI code
 | 
				
			||||||
 | 
					 * does the trace_recursive_unlock() it will clear the TRANSTION bit
 | 
				
			||||||
 | 
					 * and leave the NMI bit set. But this is fine, because the interrupt
 | 
				
			||||||
 | 
					 * code that set the TRANSITION bit will then clear the NMI bit when it
 | 
				
			||||||
 | 
					 * calls trace_recursive_unlock(). If another NMI comes in, it will
 | 
				
			||||||
 | 
					 * set the TRANSITION bit and continue.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Note: The TRANSITION bit only handles a single transition between context.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __always_inline int
 | 
					static __always_inline int
 | 
				
			||||||
| 
						 | 
					@ -3055,8 +3081,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
 | 
				
			||||||
		bit = pc & NMI_MASK ? RB_CTX_NMI :
 | 
							bit = pc & NMI_MASK ? RB_CTX_NMI :
 | 
				
			||||||
			pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
 | 
								pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (unlikely(val & (1 << (bit + cpu_buffer->nest))))
 | 
						if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) {
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * It is possible that this was called by transitioning
 | 
				
			||||||
 | 
							 * between interrupt context, and preempt_count() has not
 | 
				
			||||||
 | 
							 * been updated yet. In this case, use the TRANSITION bit.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							bit = RB_CTX_TRANSITION;
 | 
				
			||||||
 | 
							if (val & (1 << (bit + cpu_buffer->nest)))
 | 
				
			||||||
			return 1;
 | 
								return 1;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	val |= (1 << (bit + cpu_buffer->nest));
 | 
						val |= (1 << (bit + cpu_buffer->nest));
 | 
				
			||||||
	cpu_buffer->current_context = val;
 | 
						cpu_buffer->current_context = val;
 | 
				
			||||||
| 
						 | 
					@ -3071,8 +3105,8 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
 | 
				
			||||||
		cpu_buffer->current_context - (1 << cpu_buffer->nest);
 | 
							cpu_buffer->current_context - (1 << cpu_buffer->nest);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* The recursive locking above uses 4 bits */
 | 
					/* The recursive locking above uses 5 bits */
 | 
				
			||||||
#define NESTED_BITS 4
 | 
					#define NESTED_BITS 5
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * ring_buffer_nest_start - Allow to trace while nested
 | 
					 * ring_buffer_nest_start - Allow to trace while nested
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue