mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	ftrace: Optimize the function tracer list loop
There is lots of places that perform:
       op = rcu_dereference_raw(ftrace_control_list);
       while (op != &ftrace_list_end) {
Add a helper macro to do this, and also optimize for a single
entity. That is, gcc will optimize a loop for either no iterations
or more than one iteration. But usually only a single callback
is registered to the function tracer, thus the optimized case
should be a single pass. to do this we now do:
	op = rcu_dereference_raw(list);
	do {
		[...]
	} while (likely(op = rcu_dereference_raw((op)->next)) &&
	       unlikely((op) != &ftrace_list_end));
An op is always registered (ftrace_list_end when no callbacks is
registered), thus when a single callback is registered, the link
list looks like:
 top => callback => ftrace_list_end => NULL.
The likely(op = op->next) still must be performed due to the race
of removing the callback, where the first op assignment could
equal ftrace_list_end. In that case, the op->next would be NULL.
But this is unlikely (only happens in a race condition when
removing the callback).
But it is very likely that the next op would be ftrace_list_end,
unless more than one callback has been registered. This tells
gcc what the most common case is and makes the fast path with
the least amount of branches.
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
			
			
This commit is contained in:
		
							parent
							
								
									9640388b63
								
							
						
					
					
						commit
						0a016409e4
					
				
					 1 changed files with 26 additions and 22 deletions
				
			
		| 
						 | 
					@ -111,6 +111,26 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
 | 
				
			||||||
#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
 | 
					#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Traverse the ftrace_global_list, invoking all entries.  The reason that we
 | 
				
			||||||
 | 
					 * can use rcu_dereference_raw() is that elements removed from this list
 | 
				
			||||||
 | 
					 * are simply leaked, so there is no need to interact with a grace-period
 | 
				
			||||||
 | 
					 * mechanism.  The rcu_dereference_raw() calls are needed to handle
 | 
				
			||||||
 | 
					 * concurrent insertions into the ftrace_global_list.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Silly Alpha and silly pointer-speculation compiler optimizations!
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#define do_for_each_ftrace_op(op, list)			\
 | 
				
			||||||
 | 
						op = rcu_dereference_raw(list);			\
 | 
				
			||||||
 | 
						do
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Optimized for just a single item in the list (as that is the normal case).
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#define while_for_each_ftrace_op(op)				\
 | 
				
			||||||
 | 
						while (likely(op = rcu_dereference_raw((op)->next)) &&	\
 | 
				
			||||||
 | 
						       unlikely((op) != &ftrace_list_end))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * ftrace_nr_registered_ops - return number of ops registered
 | 
					 * ftrace_nr_registered_ops - return number of ops registered
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					@ -132,15 +152,6 @@ int ftrace_nr_registered_ops(void)
 | 
				
			||||||
	return cnt;
 | 
						return cnt;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Traverse the ftrace_global_list, invoking all entries.  The reason that we
 | 
					 | 
				
			||||||
 * can use rcu_dereference_raw() is that elements removed from this list
 | 
					 | 
				
			||||||
 * are simply leaked, so there is no need to interact with a grace-period
 | 
					 | 
				
			||||||
 * mechanism.  The rcu_dereference_raw() calls are needed to handle
 | 
					 | 
				
			||||||
 * concurrent insertions into the ftrace_global_list.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * Silly Alpha and silly pointer-speculation compiler optimizations!
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
 | 
					ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
 | 
				
			||||||
			struct ftrace_ops *op, struct pt_regs *regs)
 | 
								struct ftrace_ops *op, struct pt_regs *regs)
 | 
				
			||||||
| 
						 | 
					@ -149,11 +160,9 @@ ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	trace_recursion_set(TRACE_GLOBAL_BIT);
 | 
						trace_recursion_set(TRACE_GLOBAL_BIT);
 | 
				
			||||||
	op = rcu_dereference_raw(ftrace_global_list); /*see above*/
 | 
						do_for_each_ftrace_op(op, ftrace_global_list) {
 | 
				
			||||||
	while (op != &ftrace_list_end) {
 | 
					 | 
				
			||||||
		op->func(ip, parent_ip, op, regs);
 | 
							op->func(ip, parent_ip, op, regs);
 | 
				
			||||||
		op = rcu_dereference_raw(op->next); /*see above*/
 | 
						} while_for_each_ftrace_op(op);
 | 
				
			||||||
	};
 | 
					 | 
				
			||||||
	trace_recursion_clear(TRACE_GLOBAL_BIT);
 | 
						trace_recursion_clear(TRACE_GLOBAL_BIT);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4104,14 +4113,11 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	preempt_disable_notrace();
 | 
						preempt_disable_notrace();
 | 
				
			||||||
	trace_recursion_set(TRACE_CONTROL_BIT);
 | 
						trace_recursion_set(TRACE_CONTROL_BIT);
 | 
				
			||||||
	op = rcu_dereference_raw(ftrace_control_list);
 | 
						do_for_each_ftrace_op(op, ftrace_control_list) {
 | 
				
			||||||
	while (op != &ftrace_list_end) {
 | 
					 | 
				
			||||||
		if (!ftrace_function_local_disabled(op) &&
 | 
							if (!ftrace_function_local_disabled(op) &&
 | 
				
			||||||
		    ftrace_ops_test(op, ip))
 | 
							    ftrace_ops_test(op, ip))
 | 
				
			||||||
			op->func(ip, parent_ip, op, regs);
 | 
								op->func(ip, parent_ip, op, regs);
 | 
				
			||||||
 | 
						} while_for_each_ftrace_op(op);
 | 
				
			||||||
		op = rcu_dereference_raw(op->next);
 | 
					 | 
				
			||||||
	};
 | 
					 | 
				
			||||||
	trace_recursion_clear(TRACE_CONTROL_BIT);
 | 
						trace_recursion_clear(TRACE_CONTROL_BIT);
 | 
				
			||||||
	preempt_enable_notrace();
 | 
						preempt_enable_notrace();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -4139,12 +4145,10 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 | 
				
			||||||
	 * they must be freed after a synchronize_sched().
 | 
						 * they must be freed after a synchronize_sched().
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	preempt_disable_notrace();
 | 
						preempt_disable_notrace();
 | 
				
			||||||
	op = rcu_dereference_raw(ftrace_ops_list);
 | 
						do_for_each_ftrace_op(op, ftrace_ops_list) {
 | 
				
			||||||
	while (op != &ftrace_list_end) {
 | 
					 | 
				
			||||||
		if (ftrace_ops_test(op, ip))
 | 
							if (ftrace_ops_test(op, ip))
 | 
				
			||||||
			op->func(ip, parent_ip, op, regs);
 | 
								op->func(ip, parent_ip, op, regs);
 | 
				
			||||||
		op = rcu_dereference_raw(op->next);
 | 
						} while_for_each_ftrace_op(op);
 | 
				
			||||||
	};
 | 
					 | 
				
			||||||
	preempt_enable_notrace();
 | 
						preempt_enable_notrace();
 | 
				
			||||||
	trace_recursion_clear(TRACE_INTERNAL_BIT);
 | 
						trace_recursion_clear(TRACE_INTERNAL_BIT);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue