mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	This is a breakdown of perf_mem_data_src.mem_dtlb values.  It assumes
PMU drivers would set PERF_MEM_TLB_HIT bit with an appropriate level.
And having PERF_MEM_TLB_MISS means that it failed to find one in any
levels of TLB.  For now, it doesn't use PERF_MEM_TLB_{WK,OS} bits.
Also it seems Intel machines don't distinguish L1 or L2 precisely.  So I
added ANY_HIT (printed as "L?-Hit") to handle the case.
  $ perf mem report -F overhead,dtlb,dso --stdio
  ...
  #           --- D-TLB ----
  # Overhead   L?-Hit   Miss  Shared Object
  # ........  ..............  .................
  #
      67.03%    99.5%   0.5%  [unknown]
      31.23%    99.2%   0.8%  [kernel.kallsyms]
       1.08%    97.8%   2.2%  [i915]
       0.36%   100.0%   0.0%  [JIT] tid 6853
       0.12%   100.0%   0.0%  [drm]
       0.05%   100.0%   0.0%  [drm_kms_helper]
       0.05%   100.0%   0.0%  [ext4]
       0.02%   100.0%   0.0%  [aesni_intel]
       0.02%   100.0%   0.0%  [crc32c_intel]
       0.02%   100.0%   0.0%  [dm_crypt]
       ...
Committer testing:
  # perf report --header | grep cpudesc
  # cpudesc : AMD Ryzen 9 9950X3D 16-Core Processor
  # perf mem report -F overhead,dtlb,dso --stdio | head -20
  # To display the perf.data header info, please use --header/--header-only options.
  #
  #
  # Total Lost Samples: 0
  #
  # Samples: 2K of event 'cycles:P'
  # Total weight : 2637
  # Sort order   : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc
  #
  #           ---------- D-TLB -----------
  # Overhead   L1-Hit L2-Hit   Miss  Other  Shared Object
  # ........  ............................  .................................
  #
      77.47%    18.4%   0.1%   0.6%  80.9%  [kernel.kallsyms]
       5.61%    36.5%   0.7%   1.4%  61.5%  libxul.so
       2.77%    39.7%   0.0%  12.3%  47.9%  libc.so.6
       2.01%    34.0%   1.9%   1.9%  62.3%  libglib-2.0.so.0.8400.1
       1.93%    31.4%   2.0%   2.0%  64.7%  [amdgpu]
       1.63%    48.8%   0.0%   0.0%  51.2%  [JIT] tid 60168
       1.14%     3.3%   0.0%   0.0%  96.7%  [vdso]
  #
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20250430205548.789750-12-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
		
	
			
		
			
				
	
	
		
			149 lines
		
	
	
	
		
			4.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			149 lines
		
	
	
	
		
			4.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* SPDX-License-Identifier: GPL-2.0 */
 | 
						|
#ifndef __PERF_MEM_EVENTS_H
 | 
						|
#define __PERF_MEM_EVENTS_H
 | 
						|
 | 
						|
#include <stdbool.h>
 | 
						|
#include <linux/types.h>
 | 
						|
 | 
						|
struct perf_mem_event {
 | 
						|
	bool		supported;
 | 
						|
	bool		ldlat;
 | 
						|
	u32		aux_event;
 | 
						|
	const char	*tag;
 | 
						|
	const char	*name;
 | 
						|
	const char	*event_name;
 | 
						|
};
 | 
						|
 | 
						|
enum {
 | 
						|
	PERF_MEM_EVENTS__LOAD,
 | 
						|
	PERF_MEM_EVENTS__STORE,
 | 
						|
	PERF_MEM_EVENTS__LOAD_STORE,
 | 
						|
	PERF_MEM_EVENTS__MAX,
 | 
						|
};
 | 
						|
 | 
						|
struct evsel;
 | 
						|
struct mem_info;
 | 
						|
struct perf_pmu;
 | 
						|
 | 
						|
extern unsigned int perf_mem_events__loads_ldlat;
 | 
						|
extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
 | 
						|
extern bool perf_mem_record[PERF_MEM_EVENTS__MAX];
 | 
						|
 | 
						|
int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str);
 | 
						|
int perf_pmu__mem_events_init(void);
 | 
						|
 | 
						|
struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i);
 | 
						|
struct perf_pmu *perf_mem_events_find_pmu(void);
 | 
						|
int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu);
 | 
						|
bool is_mem_loads_aux_event(struct evsel *leader);
 | 
						|
 | 
						|
void perf_pmu__mem_events_list(struct perf_pmu *pmu);
 | 
						|
int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
 | 
						|
				 char **event_name_storage_out);
 | 
						|
 | 
						|
int perf_mem__tlb_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
 | 
						|
int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
 | 
						|
int perf_mem__snp_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
 | 
						|
int perf_mem__lck_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
 | 
						|
int perf_mem__blk_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
 | 
						|
 | 
						|
int perf_script__meminfo_scnprintf(char *bf, size_t size, const struct mem_info *mem_info);
 | 
						|
 | 
						|
struct c2c_stats {
 | 
						|
	u32	nr_entries;
 | 
						|
 | 
						|
	u32	locks;               /* count of 'lock' transactions */
 | 
						|
	u32	store;               /* count of all stores in trace */
 | 
						|
	u32	st_uncache;          /* stores to uncacheable address */
 | 
						|
	u32	st_noadrs;           /* cacheable store with no address */
 | 
						|
	u32	st_l1hit;            /* count of stores that hit L1D */
 | 
						|
	u32	st_l1miss;           /* count of stores that miss L1D */
 | 
						|
	u32	st_na;               /* count of stores with memory level is not available */
 | 
						|
	u32	load;                /* count of all loads in trace */
 | 
						|
	u32	ld_excl;             /* exclusive loads, rmt/lcl DRAM - snp none/miss */
 | 
						|
	u32	ld_shared;           /* shared loads, rmt/lcl DRAM - snp hit */
 | 
						|
	u32	ld_uncache;          /* loads to uncacheable address */
 | 
						|
	u32	ld_io;               /* loads to io address */
 | 
						|
	u32	ld_miss;             /* loads miss */
 | 
						|
	u32	ld_noadrs;           /* cacheable load with no address */
 | 
						|
	u32	ld_fbhit;            /* count of loads hitting Fill Buffer */
 | 
						|
	u32	ld_l1hit;            /* count of loads that hit L1D */
 | 
						|
	u32	ld_l2hit;            /* count of loads that hit L2D */
 | 
						|
	u32	ld_llchit;           /* count of loads that hit LLC */
 | 
						|
	u32	lcl_hitm;            /* count of loads with local HITM  */
 | 
						|
	u32	rmt_hitm;            /* count of loads with remote HITM */
 | 
						|
	u32	tot_hitm;            /* count of loads with local and remote HITM */
 | 
						|
	u32	lcl_peer;            /* count of loads with local peer cache */
 | 
						|
	u32	rmt_peer;            /* count of loads with remote peer cache */
 | 
						|
	u32	tot_peer;            /* count of loads with local and remote peer cache */
 | 
						|
	u32	rmt_hit;             /* count of loads with remote hit clean; */
 | 
						|
	u32	lcl_dram;            /* count of loads miss to local DRAM */
 | 
						|
	u32	rmt_dram;            /* count of loads miss to remote DRAM */
 | 
						|
	u32	blk_data;            /* count of loads blocked by data */
 | 
						|
	u32	blk_addr;            /* count of loads blocked by address conflict */
 | 
						|
	u32	nomap;               /* count of load/stores with no phys addrs */
 | 
						|
	u32	noparse;             /* count of unparsable data sources */
 | 
						|
};
 | 
						|
 | 
						|
struct hist_entry;
 | 
						|
int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi);
 | 
						|
void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add);
 | 
						|
 | 
						|
enum mem_stat_type {
 | 
						|
	PERF_MEM_STAT_OP,
 | 
						|
	PERF_MEM_STAT_CACHE,
 | 
						|
	PERF_MEM_STAT_MEMORY,
 | 
						|
	PERF_MEM_STAT_SNOOP,
 | 
						|
	PERF_MEM_STAT_DTLB,
 | 
						|
};
 | 
						|
 | 
						|
#define MEM_STAT_PRINT_LEN  7  /* 1 space + 5 digits + 1 percent sign */
 | 
						|
 | 
						|
enum mem_stat_op {
 | 
						|
	MEM_STAT_OP_LOAD,
 | 
						|
	MEM_STAT_OP_STORE,
 | 
						|
	MEM_STAT_OP_LDST,
 | 
						|
	MEM_STAT_OP_PFETCH,
 | 
						|
	MEM_STAT_OP_EXEC,
 | 
						|
	MEM_STAT_OP_OTHER,
 | 
						|
};
 | 
						|
 | 
						|
enum mem_stat_cache {
 | 
						|
	MEM_STAT_CACHE_L1,
 | 
						|
	MEM_STAT_CACHE_L2,
 | 
						|
	MEM_STAT_CACHE_L3,
 | 
						|
	MEM_STAT_CACHE_L4,
 | 
						|
	MEM_STAT_CACHE_L1_BUF,
 | 
						|
	MEM_STAT_CACHE_L2_BUF,
 | 
						|
	MEM_STAT_CACHE_OTHER,
 | 
						|
};
 | 
						|
 | 
						|
enum mem_stat_memory {
 | 
						|
	MEM_STAT_MEMORY_RAM,
 | 
						|
	MEM_STAT_MEMORY_MSC,
 | 
						|
	MEM_STAT_MEMORY_UNC,
 | 
						|
	MEM_STAT_MEMORY_CXL,
 | 
						|
	MEM_STAT_MEMORY_IO,
 | 
						|
	MEM_STAT_MEMORY_PMEM,
 | 
						|
	MEM_STAT_MEMORY_OTHER,
 | 
						|
};
 | 
						|
 | 
						|
enum mem_stat_snoop {
 | 
						|
	MEM_STAT_SNOOP_HIT,
 | 
						|
	MEM_STAT_SNOOP_HITM,
 | 
						|
	MEM_STAT_SNOOP_MISS,
 | 
						|
	MEM_STAT_SNOOP_OTHER,
 | 
						|
};
 | 
						|
 | 
						|
enum mem_stat_dtlb {
 | 
						|
	MEM_STAT_DTLB_L1_HIT,
 | 
						|
	MEM_STAT_DTLB_L2_HIT,
 | 
						|
	MEM_STAT_DTLB_ANY_HIT,
 | 
						|
	MEM_STAT_DTLB_MISS,
 | 
						|
	MEM_STAT_DTLB_OTHER,
 | 
						|
};
 | 
						|
 | 
						|
int mem_stat_index(const enum mem_stat_type mst, const u64 data_src);
 | 
						|
const char *mem_stat_name(const enum mem_stat_type mst, const int idx);
 | 
						|
 | 
						|
#endif /* __PERF_MEM_EVENTS_H */
 |