mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	tracing: Allow synthetic events to pass around stacktraces
Allow a stacktrace from one event to be displayed by the end event of a
synthetic event. This is very useful when looking for the longest latency
of a sleep or something blocked on I/O.
 # cd /sys/kernel/tracing/
 # echo 's:block_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
 # echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace  if prev_state == 1||prev_state == 2' > events/sched/sched_switch/trigger
 # echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(block_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
The above creates a "block_lat" synthetic event that take the stacktrace of
when a task schedules out in either the interruptible or uninterruptible
states, and on a new per process max $delta (the time it was scheduled
out), will print the process id and the stacktrace.
  # echo 1 > events/synthetic/block_lat/enable
  # cat trace
 #           TASK-PID     CPU#  |||||  TIMESTAMP  FUNCTION
 #              | |         |   |||||     |         |
    kworker/u16:0-767     [006] d..4.   560.645045: block_lat: pid=767 delta=66 stack=STACK:
 => __schedule
 => schedule
 => pipe_read
 => vfs_read
 => ksys_read
 => do_syscall_64
 => 0x966000aa
           <idle>-0       [003] d..4.   561.132117: block_lat: pid=0 delta=413787 stack=STACK:
 => __schedule
 => schedule
 => schedule_hrtimeout_range_clock
 => do_sys_poll
 => __x64_sys_poll
 => do_syscall_64
 => 0x966000aa
            <...>-153     [006] d..4.   562.068407: block_lat: pid=153 delta=54 stack=STACK:
 => __schedule
 => schedule
 => io_schedule
 => rq_qos_wait
 => wbt_wait
 => __rq_qos_throttle
 => blk_mq_submit_bio
 => submit_bio_noacct_nocheck
 => ext4_bio_write_page
 => mpage_submit_page
 => mpage_process_page_bufs
 => mpage_prepare_extent_to_map
 => ext4_do_writepages
 => ext4_writepages
 => do_writepages
 => __writeback_single_inode
Link: https://lkml.kernel.org/r/20230117152236.010941267@goodmis.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Cc: Ross Zwisler <zwisler@google.com>
Cc: Ching-lin Yu <chinglinyu@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
			
			
This commit is contained in:
		
							parent
							
								
									288709c9f3
								
							
						
					
					
						commit
						00cf3d672a
					
				
					 4 changed files with 87 additions and 5 deletions
				
			
		| 
						 | 
					@ -113,6 +113,10 @@ enum trace_type {
 | 
				
			||||||
#define MEM_FAIL(condition, fmt, ...)					\
 | 
					#define MEM_FAIL(condition, fmt, ...)					\
 | 
				
			||||||
	DO_ONCE_LITE_IF(condition, pr_err, "ERROR: " fmt, ##__VA_ARGS__)
 | 
						DO_ONCE_LITE_IF(condition, pr_err, "ERROR: " fmt, ##__VA_ARGS__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define HIST_STACKTRACE_DEPTH	16
 | 
				
			||||||
 | 
					#define HIST_STACKTRACE_SIZE	(HIST_STACKTRACE_DEPTH * sizeof(unsigned long))
 | 
				
			||||||
 | 
					#define HIST_STACKTRACE_SKIP	5
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * syscalls are special, and need special handling, this is why
 | 
					 * syscalls are special, and need special handling, this is why
 | 
				
			||||||
 * they are not included in trace_entries.h
 | 
					 * they are not included in trace_entries.h
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -480,10 +480,6 @@ DEFINE_HIST_FIELD_FN(u8);
 | 
				
			||||||
#define for_each_hist_key_field(i, hist_data)	\
 | 
					#define for_each_hist_key_field(i, hist_data)	\
 | 
				
			||||||
	for ((i) = (hist_data)->n_vals; (i) < (hist_data)->n_fields; (i)++)
 | 
						for ((i) = (hist_data)->n_vals; (i) < (hist_data)->n_fields; (i)++)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define HIST_STACKTRACE_DEPTH	16
 | 
					 | 
				
			||||||
#define HIST_STACKTRACE_SIZE	(HIST_STACKTRACE_DEPTH * sizeof(unsigned long))
 | 
					 | 
				
			||||||
#define HIST_STACKTRACE_SKIP	5
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define HITCOUNT_IDX		0
 | 
					#define HITCOUNT_IDX		0
 | 
				
			||||||
#define HIST_KEY_SIZE_MAX	(MAX_FILTER_STR_VAL + HIST_STACKTRACE_SIZE)
 | 
					#define HIST_KEY_SIZE_MAX	(MAX_FILTER_STR_VAL + HIST_STACKTRACE_SIZE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3869,6 +3865,9 @@ static int check_synth_field(struct synth_event *event,
 | 
				
			||||||
	    && field->is_dynamic)
 | 
						    && field->is_dynamic)
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (strstr(hist_field->type, "long[") && field->is_stack)
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (strcmp(field->type, hist_field->type) != 0) {
 | 
						if (strcmp(field->type, hist_field->type) != 0) {
 | 
				
			||||||
		if (field->size != hist_field->size ||
 | 
							if (field->size != hist_field->size ||
 | 
				
			||||||
		    (!field->is_string && field->is_signed != hist_field->is_signed))
 | 
							    (!field->is_string && field->is_signed != hist_field->is_signed))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -173,6 +173,14 @@ static int synth_field_is_string(char *type)
 | 
				
			||||||
	return false;
 | 
						return false;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int synth_field_is_stack(char *type)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (strstr(type, "long[") != NULL)
 | 
				
			||||||
 | 
							return true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return false;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int synth_field_string_size(char *type)
 | 
					static int synth_field_string_size(char *type)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	char buf[4], *end, *start;
 | 
						char buf[4], *end, *start;
 | 
				
			||||||
| 
						 | 
					@ -248,6 +256,8 @@ static int synth_field_size(char *type)
 | 
				
			||||||
		size = sizeof(gfp_t);
 | 
							size = sizeof(gfp_t);
 | 
				
			||||||
	else if (synth_field_is_string(type))
 | 
						else if (synth_field_is_string(type))
 | 
				
			||||||
		size = synth_field_string_size(type);
 | 
							size = synth_field_string_size(type);
 | 
				
			||||||
 | 
						else if (synth_field_is_stack(type))
 | 
				
			||||||
 | 
							size = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return size;
 | 
						return size;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -292,6 +302,8 @@ static const char *synth_field_fmt(char *type)
 | 
				
			||||||
		fmt = "%x";
 | 
							fmt = "%x";
 | 
				
			||||||
	else if (synth_field_is_string(type))
 | 
						else if (synth_field_is_string(type))
 | 
				
			||||||
		fmt = "%.*s";
 | 
							fmt = "%.*s";
 | 
				
			||||||
 | 
						else if (synth_field_is_stack(type))
 | 
				
			||||||
 | 
							fmt = "%s";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return fmt;
 | 
						return fmt;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -371,6 +383,23 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
 | 
				
			||||||
						 i == se->n_fields - 1 ? "" : " ");
 | 
											 i == se->n_fields - 1 ? "" : " ");
 | 
				
			||||||
				n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
 | 
									n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
							} else if (se->fields[i]->is_stack) {
 | 
				
			||||||
 | 
								u32 offset, data_offset, len;
 | 
				
			||||||
 | 
								unsigned long *p, *end;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								offset = (u32)entry->fields[n_u64];
 | 
				
			||||||
 | 
								data_offset = offset & 0xffff;
 | 
				
			||||||
 | 
								len = offset >> 16;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								p = (void *)entry + data_offset;
 | 
				
			||||||
 | 
								end = (void *)p + len - (sizeof(long) - 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								trace_seq_printf(s, "%s=STACK:\n", se->fields[i]->name);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								for (; *p && p < end; p++)
 | 
				
			||||||
 | 
									trace_seq_printf(s, "=> %pS\n", (void *)*p);
 | 
				
			||||||
 | 
								n_u64++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			struct trace_print_flags __flags[] = {
 | 
								struct trace_print_flags __flags[] = {
 | 
				
			||||||
			    __def_gfpflag_names, {-1, NULL} };
 | 
								    __def_gfpflag_names, {-1, NULL} };
 | 
				
			||||||
| 
						 | 
					@ -446,6 +475,43 @@ static unsigned int trace_string(struct synth_trace_event *entry,
 | 
				
			||||||
	return len;
 | 
						return len;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static unsigned int trace_stack(struct synth_trace_event *entry,
 | 
				
			||||||
 | 
									 struct synth_event *event,
 | 
				
			||||||
 | 
									 long *stack,
 | 
				
			||||||
 | 
									 unsigned int data_size,
 | 
				
			||||||
 | 
									 unsigned int *n_u64)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned int len;
 | 
				
			||||||
 | 
						u32 data_offset;
 | 
				
			||||||
 | 
						void *data_loc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						data_offset = struct_size(entry, fields, event->n_u64);
 | 
				
			||||||
 | 
						data_offset += data_size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (len = 0; len < HIST_STACKTRACE_DEPTH; len++) {
 | 
				
			||||||
 | 
							if (!stack[len])
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Include the zero'd element if it fits */
 | 
				
			||||||
 | 
						if (len < HIST_STACKTRACE_DEPTH)
 | 
				
			||||||
 | 
							len++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						len *= sizeof(long);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Find the dynamic section to copy the stack into. */
 | 
				
			||||||
 | 
						data_loc = (void *)entry + data_offset;
 | 
				
			||||||
 | 
						memcpy(data_loc, stack, len);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Fill in the field that holds the offset/len combo */
 | 
				
			||||||
 | 
						data_offset |= len << 16;
 | 
				
			||||||
 | 
						*(u32 *)&entry->fields[*n_u64] = data_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						(*n_u64)++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return len;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static notrace void trace_event_raw_event_synth(void *__data,
 | 
					static notrace void trace_event_raw_event_synth(void *__data,
 | 
				
			||||||
						u64 *var_ref_vals,
 | 
											u64 *var_ref_vals,
 | 
				
			||||||
						unsigned int *var_ref_idx)
 | 
											unsigned int *var_ref_idx)
 | 
				
			||||||
| 
						 | 
					@ -498,6 +564,12 @@ static notrace void trace_event_raw_event_synth(void *__data,
 | 
				
			||||||
					   event->fields[i]->is_dynamic,
 | 
										   event->fields[i]->is_dynamic,
 | 
				
			||||||
					   data_size, &n_u64);
 | 
										   data_size, &n_u64);
 | 
				
			||||||
			data_size += len; /* only dynamic string increments */
 | 
								data_size += len; /* only dynamic string increments */
 | 
				
			||||||
 | 
							} if (event->fields[i]->is_stack) {
 | 
				
			||||||
 | 
							        long *stack = (long *)(long)var_ref_vals[val_idx];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								len = trace_stack(entry, event, stack,
 | 
				
			||||||
 | 
										   data_size, &n_u64);
 | 
				
			||||||
 | 
								data_size += len;
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			struct synth_field *field = event->fields[i];
 | 
								struct synth_field *field = event->fields[i];
 | 
				
			||||||
			u64 val = var_ref_vals[val_idx];
 | 
								u64 val = var_ref_vals[val_idx];
 | 
				
			||||||
| 
						 | 
					@ -560,6 +632,9 @@ static int __set_synth_event_print_fmt(struct synth_event *event,
 | 
				
			||||||
		    event->fields[i]->is_dynamic)
 | 
							    event->fields[i]->is_dynamic)
 | 
				
			||||||
			pos += snprintf(buf + pos, LEN_OR_ZERO,
 | 
								pos += snprintf(buf + pos, LEN_OR_ZERO,
 | 
				
			||||||
				", __get_str(%s)", event->fields[i]->name);
 | 
									", __get_str(%s)", event->fields[i]->name);
 | 
				
			||||||
 | 
							else if (event->fields[i]->is_stack)
 | 
				
			||||||
 | 
								pos += snprintf(buf + pos, LEN_OR_ZERO,
 | 
				
			||||||
 | 
									", __get_stacktrace(%s)", event->fields[i]->name);
 | 
				
			||||||
		else
 | 
							else
 | 
				
			||||||
			pos += snprintf(buf + pos, LEN_OR_ZERO,
 | 
								pos += snprintf(buf + pos, LEN_OR_ZERO,
 | 
				
			||||||
					", REC->%s", event->fields[i]->name);
 | 
										", REC->%s", event->fields[i]->name);
 | 
				
			||||||
| 
						 | 
					@ -696,7 +771,8 @@ static struct synth_field *parse_synth_field(int argc, char **argv,
 | 
				
			||||||
		ret = -EINVAL;
 | 
							ret = -EINVAL;
 | 
				
			||||||
		goto free;
 | 
							goto free;
 | 
				
			||||||
	} else if (size == 0) {
 | 
						} else if (size == 0) {
 | 
				
			||||||
		if (synth_field_is_string(field->type)) {
 | 
							if (synth_field_is_string(field->type) ||
 | 
				
			||||||
 | 
							    synth_field_is_stack(field->type)) {
 | 
				
			||||||
			char *type;
 | 
								char *type;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			len = sizeof("__data_loc ") + strlen(field->type) + 1;
 | 
								len = sizeof("__data_loc ") + strlen(field->type) + 1;
 | 
				
			||||||
| 
						 | 
					@ -727,6 +803,8 @@ static struct synth_field *parse_synth_field(int argc, char **argv,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (synth_field_is_string(field->type))
 | 
						if (synth_field_is_string(field->type))
 | 
				
			||||||
		field->is_string = true;
 | 
							field->is_string = true;
 | 
				
			||||||
 | 
						else if (synth_field_is_stack(field->type))
 | 
				
			||||||
 | 
							field->is_stack = true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	field->is_signed = synth_field_signed(field->type);
 | 
						field->is_signed = synth_field_signed(field->type);
 | 
				
			||||||
 out:
 | 
					 out:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -18,6 +18,7 @@ struct synth_field {
 | 
				
			||||||
	bool is_signed;
 | 
						bool is_signed;
 | 
				
			||||||
	bool is_string;
 | 
						bool is_string;
 | 
				
			||||||
	bool is_dynamic;
 | 
						bool is_dynamic;
 | 
				
			||||||
 | 
						bool is_stack;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct synth_event {
 | 
					struct synth_event {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue