mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 08:38:45 +02:00 
			
		
		
		
	 24f1e32c60
			
		
	
	
		24f1e32c60
		
	
	
	
	
		
			
			This patch rebase the implementation of the breakpoints API on top of
perf events instances.
Each breakpoints are now perf events that handle the
register scheduling, thread/cpu attachment, etc..
The new layering is now made as follows:
       ptrace       kgdb      ftrace   perf syscall
          \          |          /         /
           \         |         /         /
                                        /
            Core breakpoint API        /
                                      /
                     |               /
                     |              /
              Breakpoints perf events
                     |
                     |
               Breakpoints PMU ---- Debug Register constraints handling
                                    (Part of core breakpoint API)
                     |
                     |
             Hardware debug registers
Reasons of this rewrite:
- Use the centralized/optimized pmu registers scheduling,
  implying an easier arch integration
- More powerful register handling: perf attributes (pinned/flexible
  events, exclusive/non-exclusive, tunable period, etc...)
Impact:
- New perf ABI: the hardware breakpoints counters
- Ptrace breakpoints setting remains tricky and still needs some per
  thread breakpoints references.
Todo (in the order):
- Support breakpoints perf counter events for perf tools (ie: implement
  perf_bpcounter_event())
- Support from perf tools
Changes in v2:
- Follow the perf "event " rename
- The ptrace regression have been fixed (ptrace breakpoint perf events
  weren't released when a task ended)
- Drop the struct hw_breakpoint and store generic fields in
  perf_event_attr.
- Separate core and arch specific headers, drop
  asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h
- Use new generic len/type for breakpoint
- Handle off case: when breakpoints api is not supported by an arch
Changes in v3:
- Fix broken CONFIG_KVM, we need to propagate the breakpoint api
  changes to kvm when we exit the guest and restore the bp registers
  to the host.
Changes in v4:
- Drop the hw_breakpoint_restore() stub as it is only used by KVM
- EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a
  module
- Restore the breakpoints unconditionally on kvm guest exit:
  TIF_DEBUG_THREAD doesn't anymore cover every cases of running
  breakpoints and vcpu->arch.switch_db_regs might not always be
  set when the guest used debug registers.
  (Waiting for a reliable optimization)
Changes in v5:
- Split-up the asm-generic/hw-breakpoint.h moving to
  linux/hw_breakpoint.h into a separate patch
- Optimize the breakpoints restoring while switching from kvm guest
  to host. We only want to restore the state if we have active
  breakpoints to the host, otherwise we don't care about messed-up
  address registers.
- Add asm/hw_breakpoint.h to Kbuild
- Fix bad breakpoint type in trace_selftest.c
Changes in v6:
- Fix wrong header inclusion in trace.h (triggered a build
  error with CONFIG_FTRACE_SELFTEST
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jan Kiszka <jan.kiszka@web.de>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
		
	
			
		
			
				
	
	
		
			904 lines
		
	
	
	
		
			22 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			904 lines
		
	
	
	
		
			22 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Performance events:
 | |
|  *
 | |
|  *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
 | |
|  *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
 | |
|  *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
 | |
|  *
 | |
|  * Data type definitions, declarations, prototypes.
 | |
|  *
 | |
|  *    Started by: Thomas Gleixner and Ingo Molnar
 | |
|  *
 | |
|  * For licencing details see kernel-base/COPYING
 | |
|  */
 | |
| #ifndef _LINUX_PERF_EVENT_H
 | |
| #define _LINUX_PERF_EVENT_H
 | |
| 
 | |
| #include <linux/types.h>
 | |
| #include <linux/ioctl.h>
 | |
| #include <asm/byteorder.h>
 | |
| 
 | |
| #ifdef CONFIG_HAVE_HW_BREAKPOINT
 | |
| #include <asm/hw_breakpoint.h>
 | |
| #endif
 | |
| 
 | |
| /*
 | |
|  * User-space ABI bits:
 | |
|  */
 | |
| 
 | |
| /*
 | |
|  * attr.type
 | |
|  */
 | |
| enum perf_type_id {
 | |
| 	PERF_TYPE_HARDWARE			= 0,
 | |
| 	PERF_TYPE_SOFTWARE			= 1,
 | |
| 	PERF_TYPE_TRACEPOINT			= 2,
 | |
| 	PERF_TYPE_HW_CACHE			= 3,
 | |
| 	PERF_TYPE_RAW				= 4,
 | |
| 	PERF_TYPE_BREAKPOINT			= 5,
 | |
| 
 | |
| 	PERF_TYPE_MAX,				/* non-ABI */
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * Generalized performance event event_id types, used by the
 | |
|  * attr.event_id parameter of the sys_perf_event_open()
 | |
|  * syscall:
 | |
|  */
 | |
| enum perf_hw_id {
 | |
| 	/*
 | |
| 	 * Common hardware events, generalized by the kernel:
 | |
| 	 */
 | |
| 	PERF_COUNT_HW_CPU_CYCLES		= 0,
 | |
| 	PERF_COUNT_HW_INSTRUCTIONS		= 1,
 | |
| 	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
 | |
| 	PERF_COUNT_HW_CACHE_MISSES		= 3,
 | |
| 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
 | |
| 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 | |
| 	PERF_COUNT_HW_BUS_CYCLES		= 6,
 | |
| 
 | |
| 	PERF_COUNT_HW_MAX,			/* non-ABI */
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * Generalized hardware cache events:
 | |
|  *
 | |
|  *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
 | |
|  *       { read, write, prefetch } x
 | |
|  *       { accesses, misses }
 | |
|  */
 | |
| enum perf_hw_cache_id {
 | |
| 	PERF_COUNT_HW_CACHE_L1D			= 0,
 | |
| 	PERF_COUNT_HW_CACHE_L1I			= 1,
 | |
| 	PERF_COUNT_HW_CACHE_LL			= 2,
 | |
| 	PERF_COUNT_HW_CACHE_DTLB		= 3,
 | |
| 	PERF_COUNT_HW_CACHE_ITLB		= 4,
 | |
| 	PERF_COUNT_HW_CACHE_BPU			= 5,
 | |
| 
 | |
| 	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
 | |
| };
 | |
| 
 | |
| enum perf_hw_cache_op_id {
 | |
| 	PERF_COUNT_HW_CACHE_OP_READ		= 0,
 | |
| 	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
 | |
| 	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
 | |
| 
 | |
| 	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
 | |
| };
 | |
| 
 | |
| enum perf_hw_cache_op_result_id {
 | |
| 	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
 | |
| 	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
 | |
| 
 | |
| 	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * Special "software" events provided by the kernel, even if the hardware
 | |
|  * does not support performance events. These events measure various
 | |
|  * physical and sw events of the kernel (and allow the profiling of them as
 | |
|  * well):
 | |
|  */
 | |
| enum perf_sw_ids {
 | |
| 	PERF_COUNT_SW_CPU_CLOCK			= 0,
 | |
| 	PERF_COUNT_SW_TASK_CLOCK		= 1,
 | |
| 	PERF_COUNT_SW_PAGE_FAULTS		= 2,
 | |
| 	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
 | |
| 	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
 | |
| 	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
 | |
| 	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
 | |
| 
 | |
| 	PERF_COUNT_SW_MAX,			/* non-ABI */
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * Bits that can be set in attr.sample_type to request information
 | |
|  * in the overflow packets.
 | |
|  */
 | |
| enum perf_event_sample_format {
 | |
| 	PERF_SAMPLE_IP				= 1U << 0,
 | |
| 	PERF_SAMPLE_TID				= 1U << 1,
 | |
| 	PERF_SAMPLE_TIME			= 1U << 2,
 | |
| 	PERF_SAMPLE_ADDR			= 1U << 3,
 | |
| 	PERF_SAMPLE_READ			= 1U << 4,
 | |
| 	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
 | |
| 	PERF_SAMPLE_ID				= 1U << 6,
 | |
| 	PERF_SAMPLE_CPU				= 1U << 7,
 | |
| 	PERF_SAMPLE_PERIOD			= 1U << 8,
 | |
| 	PERF_SAMPLE_STREAM_ID			= 1U << 9,
 | |
| 	PERF_SAMPLE_RAW				= 1U << 10,
 | |
| 
 | |
| 	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * The format of the data returned by read() on a perf event fd,
 | |
|  * as specified by attr.read_format:
 | |
|  *
 | |
|  * struct read_format {
 | |
|  *	{ u64		value;
 | |
|  *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
 | |
|  *	  { u64		time_running; } && PERF_FORMAT_RUNNING
 | |
|  *	  { u64		id;           } && PERF_FORMAT_ID
 | |
|  *	} && !PERF_FORMAT_GROUP
 | |
|  *
 | |
|  *	{ u64		nr;
 | |
|  *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
 | |
|  *	  { u64		time_running; } && PERF_FORMAT_RUNNING
 | |
|  *	  { u64		value;
 | |
|  *	    { u64	id;           } && PERF_FORMAT_ID
 | |
|  *	  }		cntr[nr];
 | |
|  *	} && PERF_FORMAT_GROUP
 | |
|  * };
 | |
|  */
 | |
| enum perf_event_read_format {
 | |
| 	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
 | |
| 	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
 | |
| 	PERF_FORMAT_ID				= 1U << 2,
 | |
| 	PERF_FORMAT_GROUP			= 1U << 3,
 | |
| 
 | |
| 	PERF_FORMAT_MAX = 1U << 4,		/* non-ABI */
 | |
| };
 | |
| 
 | |
| #define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
 | |
| 
 | |
| /*
 | |
|  * Hardware event_id to monitor via a performance monitoring event:
 | |
|  */
 | |
| struct perf_event_attr {
 | |
| 
 | |
| 	/*
 | |
| 	 * Major type: hardware/software/tracepoint/etc.
 | |
| 	 */
 | |
| 	__u32			type;
 | |
| 
 | |
| 	/*
 | |
| 	 * Size of the attr structure, for fwd/bwd compat.
 | |
| 	 */
 | |
| 	__u32			size;
 | |
| 
 | |
| 	/*
 | |
| 	 * Type specific configuration information.
 | |
| 	 */
 | |
| 	__u64			config;
 | |
| 
 | |
| 	union {
 | |
| 		__u64		sample_period;
 | |
| 		__u64		sample_freq;
 | |
| 	};
 | |
| 
 | |
| 	__u64			sample_type;
 | |
| 	__u64			read_format;
 | |
| 
 | |
| 	__u64			disabled       :  1, /* off by default        */
 | |
| 				inherit	       :  1, /* children inherit it   */
 | |
| 				pinned	       :  1, /* must always be on PMU */
 | |
| 				exclusive      :  1, /* only group on PMU     */
 | |
| 				exclude_user   :  1, /* don't count user      */
 | |
| 				exclude_kernel :  1, /* ditto kernel          */
 | |
| 				exclude_hv     :  1, /* ditto hypervisor      */
 | |
| 				exclude_idle   :  1, /* don't count when idle */
 | |
| 				mmap           :  1, /* include mmap data     */
 | |
| 				comm	       :  1, /* include comm data     */
 | |
| 				freq           :  1, /* use freq, not period  */
 | |
| 				inherit_stat   :  1, /* per task counts       */
 | |
| 				enable_on_exec :  1, /* next exec enables     */
 | |
| 				task           :  1, /* trace fork/exit       */
 | |
| 				watermark      :  1, /* wakeup_watermark      */
 | |
| 
 | |
| 				__reserved_1   : 49;
 | |
| 
 | |
| 	union {
 | |
| 		__u32		wakeup_events;	  /* wakeup every n events */
 | |
| 		__u32		wakeup_watermark; /* bytes before wakeup   */
 | |
| 	};
 | |
| 
 | |
| 	union {
 | |
| 		struct { /* Hardware breakpoint info */
 | |
| 			__u64		bp_addr;
 | |
| 			__u32		bp_type;
 | |
| 			__u32		bp_len;
 | |
| 		};
 | |
| 	};
 | |
| 
 | |
| 	__u32			__reserved_2;
 | |
| 
 | |
| 	__u64			__reserved_3;
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * Ioctls that can be done on a perf event fd:
 | |
|  */
 | |
| #define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
 | |
| #define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
 | |
| #define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
 | |
| #define PERF_EVENT_IOC_RESET		_IO ('$', 3)
 | |
| #define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, u64)
 | |
| #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
 | |
| #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 | |
| 
 | |
| enum perf_event_ioc_flags {
 | |
| 	PERF_IOC_FLAG_GROUP		= 1U << 0,
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * Structure of the page that can be mapped via mmap
 | |
|  */
 | |
| struct perf_event_mmap_page {
 | |
| 	__u32	version;		/* version number of this structure */
 | |
| 	__u32	compat_version;		/* lowest version this is compat with */
 | |
| 
 | |
| 	/*
 | |
| 	 * Bits needed to read the hw events in user-space.
 | |
| 	 *
 | |
| 	 *   u32 seq;
 | |
| 	 *   s64 count;
 | |
| 	 *
 | |
| 	 *   do {
 | |
| 	 *     seq = pc->lock;
 | |
| 	 *
 | |
| 	 *     barrier()
 | |
| 	 *     if (pc->index) {
 | |
| 	 *       count = pmc_read(pc->index - 1);
 | |
| 	 *       count += pc->offset;
 | |
| 	 *     } else
 | |
| 	 *       goto regular_read;
 | |
| 	 *
 | |
| 	 *     barrier();
 | |
| 	 *   } while (pc->lock != seq);
 | |
| 	 *
 | |
| 	 * NOTE: for obvious reason this only works on self-monitoring
 | |
| 	 *       processes.
 | |
| 	 */
 | |
| 	__u32	lock;			/* seqlock for synchronization */
 | |
| 	__u32	index;			/* hardware event identifier */
 | |
| 	__s64	offset;			/* add to hardware event value */
 | |
| 	__u64	time_enabled;		/* time event active */
 | |
| 	__u64	time_running;		/* time event on cpu */
 | |
| 
 | |
| 		/*
 | |
| 		 * Hole for extension of the self monitor capabilities
 | |
| 		 */
 | |
| 
 | |
| 	__u64	__reserved[123];	/* align to 1k */
 | |
| 
 | |
| 	/*
 | |
| 	 * Control data for the mmap() data buffer.
 | |
| 	 *
 | |
| 	 * User-space reading the @data_head value should issue an rmb(), on
 | |
| 	 * SMP capable platforms, after reading this value -- see
 | |
| 	 * perf_event_wakeup().
 | |
| 	 *
 | |
| 	 * When the mapping is PROT_WRITE the @data_tail value should be
 | |
| 	 * written by userspace to reflect the last read data. In this case
 | |
| 	 * the kernel will not over-write unread data.
 | |
| 	 */
 | |
| 	__u64   data_head;		/* head in the data section */
 | |
| 	__u64	data_tail;		/* user-space written tail */
 | |
| };
 | |
| 
 | |
| #define PERF_RECORD_MISC_CPUMODE_MASK		(3 << 0)
 | |
| #define PERF_RECORD_MISC_CPUMODE_UNKNOWN		(0 << 0)
 | |
| #define PERF_RECORD_MISC_KERNEL			(1 << 0)
 | |
| #define PERF_RECORD_MISC_USER			(2 << 0)
 | |
| #define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
 | |
| 
 | |
| struct perf_event_header {
 | |
| 	__u32	type;
 | |
| 	__u16	misc;
 | |
| 	__u16	size;
 | |
| };
 | |
| 
 | |
| enum perf_event_type {
 | |
| 
 | |
| 	/*
 | |
| 	 * The MMAP events record the PROT_EXEC mappings so that we can
 | |
| 	 * correlate userspace IPs to code. They have the following structure:
 | |
| 	 *
 | |
| 	 * struct {
 | |
| 	 *	struct perf_event_header	header;
 | |
| 	 *
 | |
| 	 *	u32				pid, tid;
 | |
| 	 *	u64				addr;
 | |
| 	 *	u64				len;
 | |
| 	 *	u64				pgoff;
 | |
| 	 *	char				filename[];
 | |
| 	 * };
 | |
| 	 */
 | |
| 	PERF_RECORD_MMAP			= 1,
 | |
| 
 | |
| 	/*
 | |
| 	 * struct {
 | |
| 	 *	struct perf_event_header	header;
 | |
| 	 *	u64				id;
 | |
| 	 *	u64				lost;
 | |
| 	 * };
 | |
| 	 */
 | |
| 	PERF_RECORD_LOST			= 2,
 | |
| 
 | |
| 	/*
 | |
| 	 * struct {
 | |
| 	 *	struct perf_event_header	header;
 | |
| 	 *
 | |
| 	 *	u32				pid, tid;
 | |
| 	 *	char				comm[];
 | |
| 	 * };
 | |
| 	 */
 | |
| 	PERF_RECORD_COMM			= 3,
 | |
| 
 | |
| 	/*
 | |
| 	 * struct {
 | |
| 	 *	struct perf_event_header	header;
 | |
| 	 *	u32				pid, ppid;
 | |
| 	 *	u32				tid, ptid;
 | |
| 	 *	u64				time;
 | |
| 	 * };
 | |
| 	 */
 | |
| 	PERF_RECORD_EXIT			= 4,
 | |
| 
 | |
| 	/*
 | |
| 	 * struct {
 | |
| 	 *	struct perf_event_header	header;
 | |
| 	 *	u64				time;
 | |
| 	 *	u64				id;
 | |
| 	 *	u64				stream_id;
 | |
| 	 * };
 | |
| 	 */
 | |
| 	PERF_RECORD_THROTTLE		= 5,
 | |
| 	PERF_RECORD_UNTHROTTLE		= 6,
 | |
| 
 | |
| 	/*
 | |
| 	 * struct {
 | |
| 	 *	struct perf_event_header	header;
 | |
| 	 *	u32				pid, ppid;
 | |
| 	 *	u32				tid, ptid;
 | |
| 	 *	u64				time;
 | |
| 	 * };
 | |
| 	 */
 | |
| 	PERF_RECORD_FORK			= 7,
 | |
| 
 | |
| 	/*
 | |
| 	 * struct {
 | |
| 	 * 	struct perf_event_header	header;
 | |
| 	 * 	u32				pid, tid;
 | |
| 	 *
 | |
| 	 * 	struct read_format		values;
 | |
| 	 * };
 | |
| 	 */
 | |
| 	PERF_RECORD_READ			= 8,
 | |
| 
 | |
| 	/*
 | |
| 	 * struct {
 | |
| 	 *	struct perf_event_header	header;
 | |
| 	 *
 | |
| 	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
 | |
| 	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
 | |
| 	 *	{ u64			time;     } && PERF_SAMPLE_TIME
 | |
| 	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
 | |
| 	 *	{ u64			id;	  } && PERF_SAMPLE_ID
 | |
| 	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
 | |
| 	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
 | |
| 	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
 | |
| 	 *
 | |
| 	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
 | |
| 	 *
 | |
| 	 *	{ u64			nr,
 | |
| 	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
 | |
| 	 *
 | |
| 	 *	#
 | |
| 	 *	# The RAW record below is opaque data wrt the ABI
 | |
| 	 *	#
 | |
| 	 *	# That is, the ABI doesn't make any promises wrt to
 | |
| 	 *	# the stability of its content, it may vary depending
 | |
| 	 *	# on event, hardware, kernel version and phase of
 | |
| 	 *	# the moon.
 | |
| 	 *	#
 | |
| 	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
 | |
| 	 *	#
 | |
| 	 *
 | |
| 	 *	{ u32			size;
 | |
| 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
 | |
| 	 * };
 | |
| 	 */
 | |
| 	PERF_RECORD_SAMPLE		= 9,
 | |
| 
 | |
| 	PERF_RECORD_MAX,			/* non-ABI */
 | |
| };
 | |
| 
 | |
| enum perf_callchain_context {
 | |
| 	PERF_CONTEXT_HV			= (__u64)-32,
 | |
| 	PERF_CONTEXT_KERNEL		= (__u64)-128,
 | |
| 	PERF_CONTEXT_USER		= (__u64)-512,
 | |
| 
 | |
| 	PERF_CONTEXT_GUEST		= (__u64)-2048,
 | |
| 	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
 | |
| 	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
 | |
| 
 | |
| 	PERF_CONTEXT_MAX		= (__u64)-4095,
 | |
| };
 | |
| 
 | |
| #define PERF_FLAG_FD_NO_GROUP	(1U << 0)
 | |
| #define PERF_FLAG_FD_OUTPUT	(1U << 1)
 | |
| 
 | |
| #ifdef __KERNEL__
 | |
| /*
 | |
|  * Kernel-internal data types and definitions:
 | |
|  */
 | |
| 
 | |
| #ifdef CONFIG_PERF_EVENTS
 | |
| # include <asm/perf_event.h>
 | |
| #endif
 | |
| 
 | |
| #include <linux/list.h>
 | |
| #include <linux/mutex.h>
 | |
| #include <linux/rculist.h>
 | |
| #include <linux/rcupdate.h>
 | |
| #include <linux/spinlock.h>
 | |
| #include <linux/hrtimer.h>
 | |
| #include <linux/fs.h>
 | |
| #include <linux/pid_namespace.h>
 | |
| #include <linux/workqueue.h>
 | |
| #include <asm/atomic.h>
 | |
| 
 | |
| #define PERF_MAX_STACK_DEPTH		255
 | |
| 
 | |
| struct perf_callchain_entry {
 | |
| 	__u64				nr;
 | |
| 	__u64				ip[PERF_MAX_STACK_DEPTH];
 | |
| };
 | |
| 
 | |
| struct perf_raw_record {
 | |
| 	u32				size;
 | |
| 	void				*data;
 | |
| };
 | |
| 
 | |
| struct task_struct;
 | |
| 
 | |
| /**
 | |
|  * struct hw_perf_event - performance event hardware details:
 | |
|  */
 | |
| struct hw_perf_event {
 | |
| #ifdef CONFIG_PERF_EVENTS
 | |
| 	union {
 | |
| 		struct { /* hardware */
 | |
| 			u64		config;
 | |
| 			unsigned long	config_base;
 | |
| 			unsigned long	event_base;
 | |
| 			int		idx;
 | |
| 		};
 | |
| 		union { /* software */
 | |
| 			atomic64_t	count;
 | |
| 			struct hrtimer	hrtimer;
 | |
| 		};
 | |
| #ifdef CONFIG_HAVE_HW_BREAKPOINT
 | |
| 		union { /* breakpoint */
 | |
| 			struct arch_hw_breakpoint	info;
 | |
| 		};
 | |
| #endif
 | |
| 	};
 | |
| 	atomic64_t			prev_count;
 | |
| 	u64				sample_period;
 | |
| 	u64				last_period;
 | |
| 	atomic64_t			period_left;
 | |
| 	u64				interrupts;
 | |
| 
 | |
| 	u64				freq_count;
 | |
| 	u64				freq_interrupts;
 | |
| 	u64				freq_stamp;
 | |
| #endif
 | |
| };
 | |
| 
 | |
| struct perf_event;
 | |
| 
 | |
| /**
 | |
|  * struct pmu - generic performance monitoring unit
 | |
|  */
 | |
| struct pmu {
 | |
| 	int (*enable)			(struct perf_event *event);
 | |
| 	void (*disable)			(struct perf_event *event);
 | |
| 	void (*read)			(struct perf_event *event);
 | |
| 	void (*unthrottle)		(struct perf_event *event);
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * enum perf_event_active_state - the states of a event
 | |
|  */
 | |
| enum perf_event_active_state {
 | |
| 	PERF_EVENT_STATE_ERROR		= -2,
 | |
| 	PERF_EVENT_STATE_OFF		= -1,
 | |
| 	PERF_EVENT_STATE_INACTIVE	=  0,
 | |
| 	PERF_EVENT_STATE_ACTIVE		=  1,
 | |
| };
 | |
| 
 | |
| struct file;
 | |
| 
 | |
| struct perf_mmap_data {
 | |
| 	struct rcu_head			rcu_head;
 | |
| #ifdef CONFIG_PERF_USE_VMALLOC
 | |
| 	struct work_struct		work;
 | |
| #endif
 | |
| 	int				data_order;
 | |
| 	int				nr_pages;	/* nr of data pages  */
 | |
| 	int				writable;	/* are we writable   */
 | |
| 	int				nr_locked;	/* nr pages mlocked  */
 | |
| 
 | |
| 	atomic_t			poll;		/* POLL_ for wakeups */
 | |
| 	atomic_t			events;		/* event_id limit       */
 | |
| 
 | |
| 	atomic_long_t			head;		/* write position    */
 | |
| 	atomic_long_t			done_head;	/* completed head    */
 | |
| 
 | |
| 	atomic_t			lock;		/* concurrent writes */
 | |
| 	atomic_t			wakeup;		/* needs a wakeup    */
 | |
| 	atomic_t			lost;		/* nr records lost   */
 | |
| 
 | |
| 	long				watermark;	/* wakeup watermark  */
 | |
| 
 | |
| 	struct perf_event_mmap_page	*user_page;
 | |
| 	void				*data_pages[0];
 | |
| };
 | |
| 
 | |
| struct perf_pending_entry {
 | |
| 	struct perf_pending_entry *next;
 | |
| 	void (*func)(struct perf_pending_entry *);
 | |
| };
 | |
| 
 | |
| typedef void (*perf_callback_t)(struct perf_event *, void *);
 | |
| 
 | |
| /**
 | |
|  * struct perf_event - performance event kernel representation:
 | |
|  */
 | |
| struct perf_event {
 | |
| #ifdef CONFIG_PERF_EVENTS
 | |
| 	struct list_head		group_entry;
 | |
| 	struct list_head		event_entry;
 | |
| 	struct list_head		sibling_list;
 | |
| 	int				nr_siblings;
 | |
| 	struct perf_event		*group_leader;
 | |
| 	struct perf_event		*output;
 | |
| 	const struct pmu		*pmu;
 | |
| 
 | |
| 	enum perf_event_active_state	state;
 | |
| 	atomic64_t			count;
 | |
| 
 | |
| 	/*
 | |
| 	 * These are the total time in nanoseconds that the event
 | |
| 	 * has been enabled (i.e. eligible to run, and the task has
 | |
| 	 * been scheduled in, if this is a per-task event)
 | |
| 	 * and running (scheduled onto the CPU), respectively.
 | |
| 	 *
 | |
| 	 * They are computed from tstamp_enabled, tstamp_running and
 | |
| 	 * tstamp_stopped when the event is in INACTIVE or ACTIVE state.
 | |
| 	 */
 | |
| 	u64				total_time_enabled;
 | |
| 	u64				total_time_running;
 | |
| 
 | |
| 	/*
 | |
| 	 * These are timestamps used for computing total_time_enabled
 | |
| 	 * and total_time_running when the event is in INACTIVE or
 | |
| 	 * ACTIVE state, measured in nanoseconds from an arbitrary point
 | |
| 	 * in time.
 | |
| 	 * tstamp_enabled: the notional time when the event was enabled
 | |
| 	 * tstamp_running: the notional time when the event was scheduled on
 | |
| 	 * tstamp_stopped: in INACTIVE state, the notional time when the
 | |
| 	 *	event was scheduled off.
 | |
| 	 */
 | |
| 	u64				tstamp_enabled;
 | |
| 	u64				tstamp_running;
 | |
| 	u64				tstamp_stopped;
 | |
| 
 | |
| 	struct perf_event_attr		attr;
 | |
| 	struct hw_perf_event		hw;
 | |
| 
 | |
| 	struct perf_event_context	*ctx;
 | |
| 	struct file			*filp;
 | |
| 
 | |
| 	/*
 | |
| 	 * These accumulate total time (in nanoseconds) that children
 | |
| 	 * events have been enabled and running, respectively.
 | |
| 	 */
 | |
| 	atomic64_t			child_total_time_enabled;
 | |
| 	atomic64_t			child_total_time_running;
 | |
| 
 | |
| 	/*
 | |
| 	 * Protect attach/detach and child_list:
 | |
| 	 */
 | |
| 	struct mutex			child_mutex;
 | |
| 	struct list_head		child_list;
 | |
| 	struct perf_event		*parent;
 | |
| 
 | |
| 	int				oncpu;
 | |
| 	int				cpu;
 | |
| 
 | |
| 	struct list_head		owner_entry;
 | |
| 	struct task_struct		*owner;
 | |
| 
 | |
| 	/* mmap bits */
 | |
| 	struct mutex			mmap_mutex;
 | |
| 	atomic_t			mmap_count;
 | |
| 	struct perf_mmap_data		*data;
 | |
| 
 | |
| 	/* poll related */
 | |
| 	wait_queue_head_t		waitq;
 | |
| 	struct fasync_struct		*fasync;
 | |
| 
 | |
| 	/* delayed work for NMIs and such */
 | |
| 	int				pending_wakeup;
 | |
| 	int				pending_kill;
 | |
| 	int				pending_disable;
 | |
| 	struct perf_pending_entry	pending;
 | |
| 
 | |
| 	atomic_t			event_limit;
 | |
| 
 | |
| 	void (*destroy)(struct perf_event *);
 | |
| 	struct rcu_head			rcu_head;
 | |
| 
 | |
| 	struct pid_namespace		*ns;
 | |
| 	u64				id;
 | |
| 
 | |
| #ifdef CONFIG_EVENT_PROFILE
 | |
| 	struct event_filter		*filter;
 | |
| #endif
 | |
| 
 | |
| 	perf_callback_t			callback;
 | |
| 
 | |
| 	perf_callback_t			event_callback;
 | |
| 
 | |
| #endif /* CONFIG_PERF_EVENTS */
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * struct perf_event_context - event context structure
 | |
|  *
 | |
|  * Used as a container for task events and CPU events as well:
 | |
|  */
 | |
| struct perf_event_context {
 | |
| 	/*
 | |
| 	 * Protect the states of the events in the list,
 | |
| 	 * nr_active, and the list:
 | |
| 	 */
 | |
| 	spinlock_t			lock;
 | |
| 	/*
 | |
| 	 * Protect the list of events.  Locking either mutex or lock
 | |
| 	 * is sufficient to ensure the list doesn't change; to change
 | |
| 	 * the list you need to lock both the mutex and the spinlock.
 | |
| 	 */
 | |
| 	struct mutex			mutex;
 | |
| 
 | |
| 	struct list_head		group_list;
 | |
| 	struct list_head		event_list;
 | |
| 	int				nr_events;
 | |
| 	int				nr_active;
 | |
| 	int				is_active;
 | |
| 	int				nr_stat;
 | |
| 	atomic_t			refcount;
 | |
| 	struct task_struct		*task;
 | |
| 
 | |
| 	/*
 | |
| 	 * Context clock, runs when context enabled.
 | |
| 	 */
 | |
| 	u64				time;
 | |
| 	u64				timestamp;
 | |
| 
 | |
| 	/*
 | |
| 	 * These fields let us detect when two contexts have both
 | |
| 	 * been cloned (inherited) from a common ancestor.
 | |
| 	 */
 | |
| 	struct perf_event_context	*parent_ctx;
 | |
| 	u64				parent_gen;
 | |
| 	u64				generation;
 | |
| 	int				pin_count;
 | |
| 	struct rcu_head			rcu_head;
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * struct perf_event_cpu_context - per cpu event context structure
 | |
|  */
 | |
| struct perf_cpu_context {
 | |
| 	struct perf_event_context	ctx;
 | |
| 	struct perf_event_context	*task_ctx;
 | |
| 	int				active_oncpu;
 | |
| 	int				max_pertask;
 | |
| 	int				exclusive;
 | |
| 
 | |
| 	/*
 | |
| 	 * Recursion avoidance:
 | |
| 	 *
 | |
| 	 * task, softirq, irq, nmi context
 | |
| 	 */
 | |
| 	int				recursion[4];
 | |
| };
 | |
| 
 | |
| struct perf_output_handle {
 | |
| 	struct perf_event		*event;
 | |
| 	struct perf_mmap_data		*data;
 | |
| 	unsigned long			head;
 | |
| 	unsigned long			offset;
 | |
| 	int				nmi;
 | |
| 	int				sample;
 | |
| 	int				locked;
 | |
| 	unsigned long			flags;
 | |
| };
 | |
| 
 | |
| #ifdef CONFIG_PERF_EVENTS
 | |
| 
 | |
| /*
 | |
|  * Set by architecture code:
 | |
|  */
 | |
| extern int perf_max_events;
 | |
| 
 | |
| extern const struct pmu *hw_perf_event_init(struct perf_event *event);
 | |
| 
 | |
| extern void perf_event_task_sched_in(struct task_struct *task, int cpu);
 | |
| extern void perf_event_task_sched_out(struct task_struct *task,
 | |
| 					struct task_struct *next, int cpu);
 | |
| extern void perf_event_task_tick(struct task_struct *task, int cpu);
 | |
| extern int perf_event_init_task(struct task_struct *child);
 | |
| extern void perf_event_exit_task(struct task_struct *child);
 | |
| extern void perf_event_free_task(struct task_struct *task);
 | |
| extern void set_perf_event_pending(void);
 | |
| extern void perf_event_do_pending(void);
 | |
| extern void perf_event_print_debug(void);
 | |
| extern void __perf_disable(void);
 | |
| extern bool __perf_enable(void);
 | |
| extern void perf_disable(void);
 | |
| extern void perf_enable(void);
 | |
| extern int perf_event_task_disable(void);
 | |
| extern int perf_event_task_enable(void);
 | |
| extern int hw_perf_group_sched_in(struct perf_event *group_leader,
 | |
| 	       struct perf_cpu_context *cpuctx,
 | |
| 	       struct perf_event_context *ctx, int cpu);
 | |
| extern void perf_event_update_userpage(struct perf_event *event);
 | |
| extern int perf_event_release_kernel(struct perf_event *event);
 | |
| extern struct perf_event *
 | |
| perf_event_create_kernel_counter(struct perf_event_attr *attr,
 | |
| 				int cpu,
 | |
| 				pid_t pid,
 | |
| 				perf_callback_t callback);
 | |
| extern u64 perf_event_read_value(struct perf_event *event);
 | |
| 
 | |
| struct perf_sample_data {
 | |
| 	u64				type;
 | |
| 
 | |
| 	u64				ip;
 | |
| 	struct {
 | |
| 		u32	pid;
 | |
| 		u32	tid;
 | |
| 	}				tid_entry;
 | |
| 	u64				time;
 | |
| 	u64				addr;
 | |
| 	u64				id;
 | |
| 	u64				stream_id;
 | |
| 	struct {
 | |
| 		u32	cpu;
 | |
| 		u32	reserved;
 | |
| 	}				cpu_entry;
 | |
| 	u64				period;
 | |
| 	struct perf_callchain_entry	*callchain;
 | |
| 	struct perf_raw_record		*raw;
 | |
| };
 | |
| 
 | |
| extern void perf_output_sample(struct perf_output_handle *handle,
 | |
| 			       struct perf_event_header *header,
 | |
| 			       struct perf_sample_data *data,
 | |
| 			       struct perf_event *event);
 | |
| extern void perf_prepare_sample(struct perf_event_header *header,
 | |
| 				struct perf_sample_data *data,
 | |
| 				struct perf_event *event,
 | |
| 				struct pt_regs *regs);
 | |
| 
 | |
| extern int perf_event_overflow(struct perf_event *event, int nmi,
 | |
| 				 struct perf_sample_data *data,
 | |
| 				 struct pt_regs *regs);
 | |
| 
 | |
| /*
 | |
|  * Return 1 for a software event, 0 for a hardware event
 | |
|  */
 | |
| static inline int is_software_event(struct perf_event *event)
 | |
| {
 | |
| 	return (event->attr.type != PERF_TYPE_RAW) &&
 | |
| 		(event->attr.type != PERF_TYPE_HARDWARE) &&
 | |
| 		(event->attr.type != PERF_TYPE_HW_CACHE);
 | |
| }
 | |
| 
 | |
| extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
 | |
| 
 | |
| extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
 | |
| 
 | |
| static inline void
 | |
| perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 | |
| {
 | |
| 	if (atomic_read(&perf_swevent_enabled[event_id]))
 | |
| 		__perf_sw_event(event_id, nr, nmi, regs, addr);
 | |
| }
 | |
| 
 | |
| extern void __perf_event_mmap(struct vm_area_struct *vma);
 | |
| 
 | |
| static inline void perf_event_mmap(struct vm_area_struct *vma)
 | |
| {
 | |
| 	if (vma->vm_flags & VM_EXEC)
 | |
| 		__perf_event_mmap(vma);
 | |
| }
 | |
| 
 | |
| extern void perf_event_comm(struct task_struct *tsk);
 | |
| extern void perf_event_fork(struct task_struct *tsk);
 | |
| 
 | |
| extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 | |
| 
 | |
| extern int sysctl_perf_event_paranoid;
 | |
| extern int sysctl_perf_event_mlock;
 | |
| extern int sysctl_perf_event_sample_rate;
 | |
| 
 | |
| extern void perf_event_init(void);
 | |
| extern void perf_tp_event(int event_id, u64 addr, u64 count,
 | |
| 				 void *record, int entry_size);
 | |
| extern void perf_bp_event(struct perf_event *event, void *data);
 | |
| 
 | |
| #ifndef perf_misc_flags
 | |
| #define perf_misc_flags(regs)	(user_mode(regs) ? PERF_RECORD_MISC_USER : \
 | |
| 				 PERF_RECORD_MISC_KERNEL)
 | |
| #define perf_instruction_pointer(regs)	instruction_pointer(regs)
 | |
| #endif
 | |
| 
 | |
| extern int perf_output_begin(struct perf_output_handle *handle,
 | |
| 			     struct perf_event *event, unsigned int size,
 | |
| 			     int nmi, int sample);
 | |
| extern void perf_output_end(struct perf_output_handle *handle);
 | |
| extern void perf_output_copy(struct perf_output_handle *handle,
 | |
| 			     const void *buf, unsigned int len);
 | |
| #else
 | |
| static inline void
 | |
| perf_event_task_sched_in(struct task_struct *task, int cpu)		{ }
 | |
| static inline void
 | |
| perf_event_task_sched_out(struct task_struct *task,
 | |
| 			    struct task_struct *next, int cpu)		{ }
 | |
| static inline void
 | |
| perf_event_task_tick(struct task_struct *task, int cpu)			{ }
 | |
| static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 | |
| static inline void perf_event_exit_task(struct task_struct *child)	{ }
 | |
| static inline void perf_event_free_task(struct task_struct *task)	{ }
 | |
| static inline void perf_event_do_pending(void)				{ }
 | |
| static inline void perf_event_print_debug(void)				{ }
 | |
| static inline void perf_disable(void)					{ }
 | |
| static inline void perf_enable(void)					{ }
 | |
| static inline int perf_event_task_disable(void)				{ return -EINVAL; }
 | |
| static inline int perf_event_task_enable(void)				{ return -EINVAL; }
 | |
| 
 | |
| static inline void
 | |
| perf_sw_event(u32 event_id, u64 nr, int nmi,
 | |
| 		     struct pt_regs *regs, u64 addr)			{ }
 | |
| static inline void
 | |
| perf_bp_event(struct perf_event *event, void *data)		{ }
 | |
| 
 | |
| static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 | |
| static inline void perf_event_comm(struct task_struct *tsk)		{ }
 | |
| static inline void perf_event_fork(struct task_struct *tsk)		{ }
 | |
| static inline void perf_event_init(void)				{ }
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #define perf_output_put(handle, x) \
 | |
| 	perf_output_copy((handle), &(x), sizeof(x))
 | |
| 
 | |
| #endif /* __KERNEL__ */
 | |
| #endif /* _LINUX_PERF_EVENT_H */
 |