forked from mirrors/linux
		
	tracing: profile likely and unlikely annotations
Impact: new unlikely/likely profiler
Andrew Morton recently suggested having an in-kernel way to profile
likely and unlikely macros. This patch achieves that goal.
When configured, every(*) likely and unlikely macro gets a counter attached
to it. When the condition is hit, the hit and misses of that condition
are recorded. These numbers can later be retrieved by:
  /debugfs/tracing/profile_likely    - All likely markers
  /debugfs/tracing/profile_unlikely  - All unlikely markers.
# cat /debug/tracing/profile_unlikely | head
 correct incorrect  %        Function                  File              Line
 ------- ---------  -        --------                  ----              ----
    2167        0   0 do_arch_prctl                  process_64.c         832
       0        0   0 do_arch_prctl                  process_64.c         804
    2670        0   0 IS_ERR                         err.h                34
   71230     5693   7 __switch_to                    process_64.c         673
   76919        0   0 __switch_to                    process_64.c         639
   43184    33743  43 __switch_to                    process_64.c         624
   12740    64181  83 __switch_to                    process_64.c         594
   12740    64174  83 __switch_to                    process_64.c         590
# cat /debug/tracing/profile_unlikely | \
  awk '{ if ($3 > 25) print $0; }' |head -20
   44963    35259  43 __switch_to                    process_64.c         624
   12762    67454  84 __switch_to                    process_64.c         594
   12762    67447  84 __switch_to                    process_64.c         590
    1478      595  28 syscall_get_error              syscall.h            51
       0     2821 100 syscall_trace_leave            ptrace.c             1567
       0        1 100 native_smp_prepare_cpus        smpboot.c            1237
   86338   265881  75 calc_delta_fair                sched_fair.c         408
  210410   108540  34 calc_delta_mine                sched.c              1267
       0    54550 100 sched_info_queued              sched_stats.h        222
   51899    66435  56 pick_next_task_fair            sched_fair.c         1422
       6       10  62 yield_task_fair                sched_fair.c         982
    7325     2692  26 rt_policy                      sched.c              144
       0     1270 100 pre_schedule_rt                sched_rt.c           1261
    1268    48073  97 pick_next_task_rt              sched_rt.c           884
       0    45181 100 sched_info_dequeued            sched_stats.h        177
       0       15 100 sched_move_task                sched.c              8700
       0       15 100 sched_move_task                sched.c              8690
   53167    33217  38 schedule                       sched.c              4457
       0    80208 100 sched_info_switch              sched_stats.h        270
   30585    49631  61 context_switch                 sched.c              2619
# cat /debug/tracing/profile_likely | awk '{ if ($3 > 25) print $0; }'
   39900    36577  47 pick_next_task                 sched.c              4397
   20824    15233  42 switch_mm                      mmu_context_64.h     18
       0        7 100 __cancel_work_timer            workqueue.c          560
     617    66484  99 clocksource_adjust             timekeeping.c        456
       0   346340 100 audit_syscall_exit             auditsc.c            1570
      38   347350  99 audit_get_context              auditsc.c            732
       0   345244 100 audit_syscall_entry            auditsc.c            1541
      38     1017  96 audit_free                     auditsc.c            1446
       0     1090 100 audit_alloc                    auditsc.c            862
    2618     1090  29 audit_alloc                    auditsc.c            858
       0        6 100 move_masked_irq                migration.c          9
       1      198  99 probe_sched_wakeup             trace_sched_switch.c 58
       2        2  50 probe_wakeup                   trace_sched_wakeup.c 227
       0        2 100 probe_wakeup_sched_switch      trace_sched_wakeup.c 144
    4514     2090  31 __grab_cache_page              filemap.c            2149
   12882   228786  94 mapping_unevictable            pagemap.h            50
       4       11  73 __flush_cpu_slab               slub.c               1466
  627757   330451  34 slab_free                      slub.c               1731
    2959    61245  95 dentry_lru_del_init            dcache.c             153
     946     1217  56 load_elf_binary                binfmt_elf.c         904
     102       82  44 disk_put_part                  genhd.h              206
       1        1  50 dst_gc_task                    dst.c                82
       0       19 100 tcp_mss_split_point            tcp_output.c         1126
As you can see by the above, there's a bit of work to do in rethinking
the use of some unlikelys and likelys. Note: the unlikely case had 71 hits
that were more than 25%.
Note:  After submitting my first version of this patch, Andrew Morton
  showed me a version written by Daniel Walker, where I picked up
  the following ideas from:
  1)  Using __builtin_constant_p to avoid profiling fixed values.
  2)  Using __FILE__ instead of instruction pointers.
  3)  Using the preprocessor to stop all profiling of likely
       annotations from vsyscall_64.c.
Thanks to Andrew Morton, Arjan van de Ven, Theodore Tso and Ingo Molnar
for their feed back on this patch.
(*) Not ever unlikely is recorded, those that are used by vsyscalls
 (a few of them) had to have profiling disabled.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Theodore Tso <tytso@mit.edu>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
			
			
This commit is contained in:
		
							parent
							
								
									cb9382e5a9
								
							
						
					
					
						commit
						1f0d69a9fc
					
				
					 6 changed files with 261 additions and 3 deletions
				
			
		|  | @ -17,6 +17,14 @@ | |||
|  *  want per guest time just set the kernel.vsyscall64 sysctl to 0. | ||||
|  */ | ||||
| 
 | ||||
| /* Protect userspace from profiling */ | ||||
| #ifdef CONFIG_TRACE_UNLIKELY_PROFILE | ||||
| # undef likely | ||||
| # undef unlikely | ||||
| # define likely(x)		likely_notrace(x) | ||||
| # define unlikely(x)		unlikely_notrace(x) | ||||
| #endif | ||||
| 
 | ||||
| #include <linux/time.h> | ||||
| #include <linux/init.h> | ||||
| #include <linux/kernel.h> | ||||
|  |  | |||
|  | @ -45,6 +45,17 @@ | |||
| #define MCOUNT_REC() | ||||
| #endif | ||||
| 
 | ||||
| #ifdef CONFIG_TRACE_UNLIKELY_PROFILE | ||||
| #define LIKELY_PROFILE()	VMLINUX_SYMBOL(__start_likely_profile) = .;   \ | ||||
| 				*(_ftrace_likely)			      \ | ||||
| 				VMLINUX_SYMBOL(__stop_likely_profile) = .;    \ | ||||
| 				VMLINUX_SYMBOL(__start_unlikely_profile) = .; \ | ||||
| 				*(_ftrace_unlikely)			      \ | ||||
| 				VMLINUX_SYMBOL(__stop_unlikely_profile) = .; | ||||
| #else | ||||
| #define LIKELY_PROFILE() | ||||
| #endif | ||||
| 
 | ||||
| /* .data section */ | ||||
| #define DATA_DATA							\ | ||||
| 	*(.data)							\ | ||||
|  | @ -62,7 +73,8 @@ | |||
| 	VMLINUX_SYMBOL(__stop___markers) = .;				\ | ||||
| 	VMLINUX_SYMBOL(__start___tracepoints) = .;			\ | ||||
| 	*(__tracepoints)						\ | ||||
| 	VMLINUX_SYMBOL(__stop___tracepoints) = .; | ||||
| 	VMLINUX_SYMBOL(__stop___tracepoints) = .;			\ | ||||
| 	LIKELY_PROFILE() | ||||
| 
 | ||||
| #define RO_DATA(align)							\ | ||||
| 	. = ALIGN((align));						\ | ||||
|  |  | |||
|  | @ -59,8 +59,65 @@ extern void __chk_io_ptr(const volatile void __iomem *); | |||
|  * specific implementations come from the above header files | ||||
|  */ | ||||
| 
 | ||||
| #define likely(x)	__builtin_expect(!!(x), 1) | ||||
| #define unlikely(x)	__builtin_expect(!!(x), 0) | ||||
| #ifdef CONFIG_TRACE_UNLIKELY_PROFILE | ||||
| struct ftrace_likely_data { | ||||
| 	const char *func; | ||||
| 	const char *file; | ||||
| 	unsigned line; | ||||
| 	unsigned long correct; | ||||
| 	unsigned long incorrect; | ||||
| }; | ||||
| void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); | ||||
| 
 | ||||
| #define likely_notrace(x)	__builtin_expect(!!(x), 1) | ||||
| #define unlikely_notrace(x)	__builtin_expect(!!(x), 0) | ||||
| 
 | ||||
| #define likely_check(x) ({						\ | ||||
| 			int ______r;					\ | ||||
| 			static struct ftrace_likely_data		\ | ||||
| 				__attribute__((__aligned__(4)))		\ | ||||
| 				__attribute__((section("_ftrace_likely"))) \ | ||||
| 				______f = {				\ | ||||
| 				.func = __func__,			\ | ||||
| 				.file = __FILE__,			\ | ||||
| 				.line = __LINE__,			\ | ||||
| 			};						\ | ||||
| 			______f.line = __LINE__;			\ | ||||
| 			______r = likely_notrace(x);			\ | ||||
| 			ftrace_likely_update(&______f, ______r, 1);	\ | ||||
| 			______r;					\ | ||||
| 		}) | ||||
| #define unlikely_check(x) ({						\ | ||||
| 			int ______r;					\ | ||||
| 			static struct ftrace_likely_data		\ | ||||
| 				__attribute__((__aligned__(4)))		\ | ||||
| 				__attribute__((section("_ftrace_unlikely"))) \ | ||||
| 				______f = {				\ | ||||
| 				.func = __func__,			\ | ||||
| 				.file = __FILE__,			\ | ||||
| 				.line = __LINE__,			\ | ||||
| 			};						\ | ||||
| 			______f.line = __LINE__;			\ | ||||
| 			______r = unlikely_notrace(x);			\ | ||||
| 			ftrace_likely_update(&______f, ______r, 0);	\ | ||||
| 			______r;					\ | ||||
| 		}) | ||||
| 
 | ||||
| /*
 | ||||
|  * Using __builtin_constant_p(x) to ignore cases where the return | ||||
|  * value is always the same.  This idea is taken from a similar patch | ||||
|  * written by Daniel Walker. | ||||
|  */ | ||||
| # ifndef likely | ||||
| #  define likely(x)	(__builtin_constant_p(x) ? !!(x) : likely_check(x)) | ||||
| # endif | ||||
| # ifndef unlikely | ||||
| #  define unlikely(x)	(__builtin_constant_p(x) ? !!(x) : unlikely_check(x)) | ||||
| # endif | ||||
| #else | ||||
| # define likely(x)	__builtin_expect(!!(x), 1) | ||||
| # define unlikely(x)	__builtin_expect(!!(x), 0) | ||||
| #endif | ||||
| 
 | ||||
| /* Optimization barrier */ | ||||
| #ifndef barrier | ||||
|  |  | |||
|  | @ -159,6 +159,22 @@ config BOOT_TRACER | |||
| 	    selected, because the self-tests are an initcall as well and that | ||||
| 	    would invalidate the boot trace. ) | ||||
| 
 | ||||
| config TRACE_UNLIKELY_PROFILE | ||||
| 	bool "Trace likely/unlikely profiler" | ||||
| 	depends on DEBUG_KERNEL | ||||
| 	select TRACING | ||||
| 	help | ||||
| 	  This tracer profiles all the the likely and unlikely macros | ||||
| 	  in the kernel. It will display the results in: | ||||
| 
 | ||||
| 	  /debugfs/tracing/profile_likely | ||||
| 	  /debugfs/tracing/profile_unlikely | ||||
| 
 | ||||
| 	  Note: this will add a significant overhead, only turn this | ||||
| 	  on if you need to profile the system's use of these macros. | ||||
| 
 | ||||
| 	  Say N if unsure. | ||||
| 
 | ||||
| config STACK_TRACER | ||||
| 	bool "Trace max stack" | ||||
| 	depends on HAVE_FUNCTION_TRACER | ||||
|  |  | |||
|  | @ -25,5 +25,6 @@ obj-$(CONFIG_STACK_TRACER) += trace_stack.o | |||
| obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o | ||||
| obj-$(CONFIG_BOOT_TRACER) += trace_boot.o | ||||
| obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o | ||||
| obj-$(CONFIG_TRACE_UNLIKELY_PROFILE) += trace_unlikely.o | ||||
| 
 | ||||
| libftrace-y := ftrace.o | ||||
|  |  | |||
							
								
								
									
										164
									
								
								kernel/trace/trace_unlikely.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										164
									
								
								kernel/trace/trace_unlikely.c
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,164 @@ | |||
| /*
 | ||||
|  * unlikely profiler | ||||
|  * | ||||
|  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> | ||||
|  */ | ||||
| #include <linux/kallsyms.h> | ||||
| #include <linux/seq_file.h> | ||||
| #include <linux/spinlock.h> | ||||
| #include <linux/debugfs.h> | ||||
| #include <linux/uaccess.h> | ||||
| #include <linux/module.h> | ||||
| #include <linux/ftrace.h> | ||||
| #include <linux/hash.h> | ||||
| #include <linux/fs.h> | ||||
| #include <asm/local.h> | ||||
| #include "trace.h" | ||||
| 
 | ||||
| void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect) | ||||
| { | ||||
| 	/* FIXME: Make this atomic! */ | ||||
| 	if (val == expect) | ||||
| 		f->correct++; | ||||
| 	else | ||||
| 		f->incorrect++; | ||||
| } | ||||
| EXPORT_SYMBOL(ftrace_likely_update); | ||||
| 
 | ||||
| struct ftrace_pointer { | ||||
| 	void		*start; | ||||
| 	void		*stop; | ||||
| }; | ||||
| 
 | ||||
| static void * | ||||
| t_next(struct seq_file *m, void *v, loff_t *pos) | ||||
| { | ||||
| 	struct ftrace_pointer *f = m->private; | ||||
| 	struct ftrace_likely_data *p = v; | ||||
| 
 | ||||
| 	(*pos)++; | ||||
| 
 | ||||
| 	if (v == (void *)1) | ||||
| 		return f->start; | ||||
| 
 | ||||
| 	++p; | ||||
| 
 | ||||
| 	if ((void *)p >= (void *)f->stop) | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	return p; | ||||
| } | ||||
| 
 | ||||
| static void *t_start(struct seq_file *m, loff_t *pos) | ||||
| { | ||||
| 	void *t = (void *)1; | ||||
| 	loff_t l = 0; | ||||
| 
 | ||||
| 	for (; t && l < *pos; t = t_next(m, t, &l)) | ||||
| 		; | ||||
| 
 | ||||
| 	return t; | ||||
| } | ||||
| 
 | ||||
| static void t_stop(struct seq_file *m, void *p) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| static int t_show(struct seq_file *m, void *v) | ||||
| { | ||||
| 	struct ftrace_likely_data *p = v; | ||||
| 	const char *f; | ||||
| 	unsigned long percent; | ||||
| 
 | ||||
| 	if (v == (void *)1) { | ||||
| 		seq_printf(m, " correct incorrect  %% " | ||||
| 			      "       Function                " | ||||
| 			      "  File              Line\n" | ||||
| 			      " ------- ---------  - " | ||||
| 			      "       --------                " | ||||
| 			      "  ----              ----\n"); | ||||
| 		return 0; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Only print the file, not the path */ | ||||
| 	f = p->file + strlen(p->file); | ||||
| 	while (f >= p->file && *f != '/') | ||||
| 		f--; | ||||
| 	f++; | ||||
| 
 | ||||
| 	if (p->correct) { | ||||
| 		percent = p->incorrect * 100; | ||||
| 		percent /= p->correct + p->incorrect; | ||||
| 	} else | ||||
| 		percent = p->incorrect ? 100 : 0; | ||||
| 
 | ||||
| 	seq_printf(m, "%8lu %8lu %3lu ", p->correct, p->incorrect, percent); | ||||
| 	seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static struct seq_operations tracing_likely_seq_ops = { | ||||
| 	.start		= t_start, | ||||
| 	.next		= t_next, | ||||
| 	.stop		= t_stop, | ||||
| 	.show		= t_show, | ||||
| }; | ||||
| 
 | ||||
| static int tracing_likely_open(struct inode *inode, struct file *file) | ||||
| { | ||||
| 	int ret; | ||||
| 
 | ||||
| 	ret = seq_open(file, &tracing_likely_seq_ops); | ||||
| 	if (!ret) { | ||||
| 		struct seq_file *m = file->private_data; | ||||
| 		m->private = (void *)inode->i_private; | ||||
| 	} | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static struct file_operations tracing_likely_fops = { | ||||
| 	.open		= tracing_likely_open, | ||||
| 	.read		= seq_read, | ||||
| 	.llseek		= seq_lseek, | ||||
| }; | ||||
| 
 | ||||
| extern unsigned long __start_likely_profile[]; | ||||
| extern unsigned long __stop_likely_profile[]; | ||||
| extern unsigned long __start_unlikely_profile[]; | ||||
| extern unsigned long __stop_unlikely_profile[]; | ||||
| 
 | ||||
| static struct ftrace_pointer ftrace_likely_pos = { | ||||
| 	.start			= __start_likely_profile, | ||||
| 	.stop			= __stop_likely_profile, | ||||
| }; | ||||
| 
 | ||||
| static struct ftrace_pointer ftrace_unlikely_pos = { | ||||
| 	.start			= __start_unlikely_profile, | ||||
| 	.stop			= __stop_unlikely_profile, | ||||
| }; | ||||
| 
 | ||||
| static __init int ftrace_unlikely_init(void) | ||||
| { | ||||
| 	struct dentry *d_tracer; | ||||
| 	struct dentry *entry; | ||||
| 
 | ||||
| 	d_tracer = tracing_init_dentry(); | ||||
| 
 | ||||
| 	entry = debugfs_create_file("profile_likely", 0444, d_tracer, | ||||
| 				    &ftrace_likely_pos, | ||||
| 				    &tracing_likely_fops); | ||||
| 	if (!entry) | ||||
| 		pr_warning("Could not create debugfs 'profile_likely' entry\n"); | ||||
| 
 | ||||
| 	entry = debugfs_create_file("profile_unlikely", 0444, d_tracer, | ||||
| 				    &ftrace_unlikely_pos, | ||||
| 				    &tracing_likely_fops); | ||||
| 	if (!entry) | ||||
| 		pr_warning("Could not create debugfs" | ||||
| 			   " 'profile_unlikely' entry\n"); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| device_initcall(ftrace_unlikely_init); | ||||
		Loading…
	
		Reference in a new issue
	
	 Steven Rostedt
						Steven Rostedt