mirror of
https://github.com/torvalds/linux.git
synced 2025-11-07 03:59:22 +02:00
perf stat: Add user_time and system_time events
It bothered me that during benchmarking using 'perf stat' (to collect
for example CPU cache events) I could not simultaneously retrieve the
times spend in user or kernel mode in a machine readable format.
When running 'perf stat' the output for humans contains the times
reported by rusage and wait4.
$ perf stat -e cache-misses:u -- true
Performance counter stats for 'true':
4,206 cache-misses:u
0.001113619 seconds time elapsed
0.001175000 seconds user
0.000000000 seconds sys
But 'perf stat's machine-readable format does not provide this information.
$ perf stat -x, -e cache-misses:u -- true
4282,,cache-misses:u,492859,100.00,,
I found no way to retrieve this information using the available events
while using machine-readable output.
This patch adds two new tool internal events 'user_time' and
'system_time', similarly to the already present 'duration_time' event.
Both events use the already collected rusage information obtained by
wait4 and tracked in the global ru_stats.
Examples presenting cache-misses and rusage information in both human
and machine-readable form:
$ perf stat -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time .
Performance counter stats for 'grep -q -r duration_time .':
67,422,542 ns duration_time:u
50,517,000 ns user_time:u
16,839,000 ns system_time:u
30,937 cache-misses:u
0.067422542 seconds time elapsed
0.050517000 seconds user
0.016839000 seconds sys
$ perf stat -x, -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time .
72134524,ns,duration_time:u,72134524,100.00,,
65225000,ns,user_time:u,65225000,100.00,,
6865000,ns,system_time:u,6865000,100.00,,
38705,,cache-misses:u,71189328,100.00,,
Signed-off-by: Florian Fischer <florian.fischer@muhq.space>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20220420102354.468173-3-florian.fischer@muhq.space
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
c735b0a521
commit
b03b89b350
4 changed files with 37 additions and 9 deletions
|
|
@ -342,7 +342,8 @@ static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 t
|
|||
static int read_single_counter(struct evsel *counter, int cpu_map_idx,
|
||||
int thread, struct timespec *rs)
|
||||
{
|
||||
if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
|
||||
switch(counter->tool_event) {
|
||||
case PERF_TOOL_DURATION_TIME: {
|
||||
u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
|
||||
struct perf_counts_values *count =
|
||||
perf_counts(counter->counts, cpu_map_idx, thread);
|
||||
|
|
@ -350,7 +351,26 @@ static int read_single_counter(struct evsel *counter, int cpu_map_idx,
|
|||
count->val = val;
|
||||
return 0;
|
||||
}
|
||||
case PERF_TOOL_USER_TIME:
|
||||
case PERF_TOOL_SYSTEM_TIME: {
|
||||
u64 val;
|
||||
struct perf_counts_values *count =
|
||||
perf_counts(counter->counts, cpu_map_idx, thread);
|
||||
if (counter->tool_event == PERF_TOOL_USER_TIME)
|
||||
val = ru_stats.ru_utime_usec_stat.mean;
|
||||
else
|
||||
val = ru_stats.ru_stime_usec_stat.mean;
|
||||
count->ena = count->run = val;
|
||||
count->val = val;
|
||||
return 0;
|
||||
}
|
||||
default:
|
||||
case PERF_TOOL_NONE:
|
||||
return evsel__read_counter(counter, cpu_map_idx, thread);
|
||||
case PERF_TOOL_MAX:
|
||||
/* This should never be reached */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -30,6 +30,10 @@ typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
|
|||
enum perf_tool_event {
|
||||
PERF_TOOL_NONE = 0,
|
||||
PERF_TOOL_DURATION_TIME = 1,
|
||||
PERF_TOOL_USER_TIME = 2,
|
||||
PERF_TOOL_SYSTEM_TIME = 3,
|
||||
|
||||
PERF_TOOL_MAX,
|
||||
};
|
||||
|
||||
/** struct evsel - event selector
|
||||
|
|
|
|||
|
|
@ -402,7 +402,9 @@ static int add_event_tool(struct list_head *list, int *idx,
|
|||
if (!evsel)
|
||||
return -ENOMEM;
|
||||
evsel->tool_event = tool_event;
|
||||
if (tool_event == PERF_TOOL_DURATION_TIME) {
|
||||
if (tool_event == PERF_TOOL_DURATION_TIME
|
||||
|| tool_event == PERF_TOOL_USER_TIME
|
||||
|| tool_event == PERF_TOOL_SYSTEM_TIME) {
|
||||
free((char *)evsel->unit);
|
||||
evsel->unit = strdup("ns");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -353,6 +353,8 @@ alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_AL
|
|||
emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
|
||||
dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
|
||||
duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
|
||||
user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); }
|
||||
system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); }
|
||||
bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
|
||||
cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue