mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	ring-buffer: User context bit recursion checking
Using context bit recursion checking, we can help increase the performance of the ring buffer. Before this patch: # echo function > /debug/tracing/current_tracer # for i in `seq 10`; do ./hackbench 50; done Time: 10.285 Time: 10.407 Time: 10.243 Time: 10.372 Time: 10.380 Time: 10.198 Time: 10.272 Time: 10.354 Time: 10.248 Time: 10.253 (average: 10.3012) Now we have: # echo function > /debug/tracing/current_tracer # for i in `seq 10`; do ./hackbench 50; done Time: 9.712 Time: 9.824 Time: 9.861 Time: 9.827 Time: 9.962 Time: 9.905 Time: 9.886 Time: 10.088 Time: 9.861 Time: 9.834 (average: 9.876) a 4% savings! Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
This commit is contained in:
		
							parent
							
								
									897f68a48b
								
							
						
					
					
						commit
						567cd4da54
					
				
					 2 changed files with 69 additions and 33 deletions
				
			
		| 
						 | 
					@ -2432,41 +2432,76 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_TRACING
 | 
					#ifdef CONFIG_TRACING
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define TRACE_RECURSIVE_DEPTH 16
 | 
					/*
 | 
				
			||||||
 | 
					 * The lock and unlock are done within a preempt disable section.
 | 
				
			||||||
 | 
					 * The current_context per_cpu variable can only be modified
 | 
				
			||||||
 | 
					 * by the current task between lock and unlock. But it can
 | 
				
			||||||
 | 
					 * be modified more than once via an interrupt. To pass this
 | 
				
			||||||
 | 
					 * information from the lock to the unlock without having to
 | 
				
			||||||
 | 
					 * access the 'in_interrupt()' functions again (which do show
 | 
				
			||||||
 | 
					 * a bit of overhead in something as critical as function tracing,
 | 
				
			||||||
 | 
					 * we use a bitmask trick.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *  bit 0 =  NMI context
 | 
				
			||||||
 | 
					 *  bit 1 =  IRQ context
 | 
				
			||||||
 | 
					 *  bit 2 =  SoftIRQ context
 | 
				
			||||||
 | 
					 *  bit 3 =  normal context.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This works because this is the order of contexts that can
 | 
				
			||||||
 | 
					 * preempt other contexts. A SoftIRQ never preempts an IRQ
 | 
				
			||||||
 | 
					 * context.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * When the context is determined, the corresponding bit is
 | 
				
			||||||
 | 
					 * checked and set (if it was set, then a recursion of that context
 | 
				
			||||||
 | 
					 * happened).
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * On unlock, we need to clear this bit. To do so, just subtract
 | 
				
			||||||
 | 
					 * 1 from the current_context and AND it to itself.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * (binary)
 | 
				
			||||||
 | 
					 *  101 - 1 = 100
 | 
				
			||||||
 | 
					 *  101 & 100 = 100 (clearing bit zero)
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *  1010 - 1 = 1001
 | 
				
			||||||
 | 
					 *  1010 & 1001 = 1000 (clearing bit 1)
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * The least significant bit can be cleared this way, and it
 | 
				
			||||||
 | 
					 * just so happens that it is the same bit corresponding to
 | 
				
			||||||
 | 
					 * the current context.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static DEFINE_PER_CPU(unsigned int, current_context);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Keep this code out of the fast path cache */
 | 
					static __always_inline int trace_recursive_lock(void)
 | 
				
			||||||
static noinline void trace_recursive_fail(void)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	/* Disable all tracing before we do anything else */
 | 
						unsigned int val = this_cpu_read(current_context);
 | 
				
			||||||
	tracing_off_permanent();
 | 
						int bit;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
 | 
						if (in_interrupt()) {
 | 
				
			||||||
		    "HC[%lu]:SC[%lu]:NMI[%lu]\n",
 | 
							if (in_nmi())
 | 
				
			||||||
		    trace_recursion_buffer(),
 | 
								bit = 0;
 | 
				
			||||||
		    hardirq_count() >> HARDIRQ_SHIFT,
 | 
							else if (in_irq())
 | 
				
			||||||
		    softirq_count() >> SOFTIRQ_SHIFT,
 | 
								bit = 1;
 | 
				
			||||||
		    in_nmi());
 | 
							else
 | 
				
			||||||
 | 
								bit = 2;
 | 
				
			||||||
 | 
						} else
 | 
				
			||||||
 | 
							bit = 3;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	WARN_ON_ONCE(1);
 | 
						if (unlikely(val & (1 << bit)))
 | 
				
			||||||
}
 | 
							return 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int trace_recursive_lock(void)
 | 
						val |= (1 << bit);
 | 
				
			||||||
{
 | 
						this_cpu_write(current_context, val);
 | 
				
			||||||
	trace_recursion_inc();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
 | 
					 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					 | 
				
			||||||
	trace_recursive_fail();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return -1;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void trace_recursive_unlock(void)
 | 
					static __always_inline void trace_recursive_unlock(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	WARN_ON_ONCE(!trace_recursion_buffer());
 | 
						unsigned int val = this_cpu_read(current_context);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	trace_recursion_dec();
 | 
						val--;
 | 
				
			||||||
 | 
						val &= this_cpu_read(current_context);
 | 
				
			||||||
 | 
						this_cpu_write(current_context, val);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -291,11 +291,6 @@ struct tracer {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Only current can touch trace_recursion */
 | 
					/* Only current can touch trace_recursion */
 | 
				
			||||||
#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
 | 
					 | 
				
			||||||
#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Ring buffer has the 10 LSB bits to count */
 | 
					 | 
				
			||||||
#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * For function tracing recursion:
 | 
					 * For function tracing recursion:
 | 
				
			||||||
| 
						 | 
					@ -323,7 +318,13 @@ struct tracer {
 | 
				
			||||||
 * caller, and we can skip the current check.
 | 
					 * caller, and we can skip the current check.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
enum {
 | 
					enum {
 | 
				
			||||||
	TRACE_FTRACE_BIT = 11,
 | 
						TRACE_BUFFER_BIT,
 | 
				
			||||||
 | 
						TRACE_BUFFER_NMI_BIT,
 | 
				
			||||||
 | 
						TRACE_BUFFER_IRQ_BIT,
 | 
				
			||||||
 | 
						TRACE_BUFFER_SIRQ_BIT,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Start of function recursion bits */
 | 
				
			||||||
 | 
						TRACE_FTRACE_BIT,
 | 
				
			||||||
	TRACE_FTRACE_NMI_BIT,
 | 
						TRACE_FTRACE_NMI_BIT,
 | 
				
			||||||
	TRACE_FTRACE_IRQ_BIT,
 | 
						TRACE_FTRACE_IRQ_BIT,
 | 
				
			||||||
	TRACE_FTRACE_SIRQ_BIT,
 | 
						TRACE_FTRACE_SIRQ_BIT,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue