mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	drm/vc4: Expose performance counters to userspace
The V3D engine has various hardware counters which might be interesting to userspace performance analysis tools. Expose new ioctls to create/destroy a performance monitor object and query the counter values of this perfmance monitor. Note that a perfomance monitor is given an ID that is only valid on the file descriptor it has been allocated from. A performance monitor can be attached to a CL submission and the driver will enable HW counters for this request and update the performance monitor values at the end of the job. Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com> Reviewed-by: Eric Anholt <eric@anholt.net> Signed-off-by: Eric Anholt <eric@anholt.net> Link: https://patchwork.freedesktop.org/patch/msgid/20180112090926.12538-1-boris.brezillon@free-electrons.com
This commit is contained in:
		
							parent
							
								
									9c950e468c
								
							
						
					
					
						commit
						65101d8c91
					
				
					 9 changed files with 474 additions and 72 deletions
				
			
		| 
						 | 
				
			
			@ -15,6 +15,7 @@ vc4-y := \
 | 
			
		|||
	vc4_vec.o \
 | 
			
		||||
	vc4_hvs.o \
 | 
			
		||||
	vc4_irq.o \
 | 
			
		||||
	vc4_perfmon.o \
 | 
			
		||||
	vc4_plane.o \
 | 
			
		||||
	vc4_render_cl.o \
 | 
			
		||||
	vc4_trace_points.o \
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -101,6 +101,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
 | 
			
		|||
	case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
 | 
			
		||||
	case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
 | 
			
		||||
	case DRM_VC4_PARAM_SUPPORTS_MADVISE:
 | 
			
		||||
	case DRM_VC4_PARAM_SUPPORTS_PERFMON:
 | 
			
		||||
		args->value = true;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
| 
						 | 
				
			
			@ -111,6 +112,26 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
 | 
			
		|||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int vc4_open(struct drm_device *dev, struct drm_file *file)
 | 
			
		||||
{
 | 
			
		||||
	struct vc4_file *vc4file;
 | 
			
		||||
 | 
			
		||||
	vc4file = kzalloc(sizeof(*vc4file), GFP_KERNEL);
 | 
			
		||||
	if (!vc4file)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	vc4_perfmon_open_file(vc4file);
 | 
			
		||||
	file->driver_priv = vc4file;
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void vc4_close(struct drm_device *dev, struct drm_file *file)
 | 
			
		||||
{
 | 
			
		||||
	struct vc4_file *vc4file = file->driver_priv;
 | 
			
		||||
 | 
			
		||||
	vc4_perfmon_close_file(vc4file);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static const struct vm_operations_struct vc4_vm_ops = {
 | 
			
		||||
	.fault = vc4_fault,
 | 
			
		||||
	.open = drm_gem_vm_open,
 | 
			
		||||
| 
						 | 
				
			
			@ -143,6 +164,9 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
 | 
			
		|||
	DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW),
 | 
			
		||||
	DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW),
 | 
			
		||||
	DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW),
 | 
			
		||||
	DRM_IOCTL_DEF_DRV(VC4_PERFMON_CREATE, vc4_perfmon_create_ioctl, DRM_RENDER_ALLOW),
 | 
			
		||||
	DRM_IOCTL_DEF_DRV(VC4_PERFMON_DESTROY, vc4_perfmon_destroy_ioctl, DRM_RENDER_ALLOW),
 | 
			
		||||
	DRM_IOCTL_DEF_DRV(VC4_PERFMON_GET_VALUES, vc4_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct drm_driver vc4_drm_driver = {
 | 
			
		||||
| 
						 | 
				
			
			@ -153,6 +177,8 @@ static struct drm_driver vc4_drm_driver = {
 | 
			
		|||
			    DRIVER_RENDER |
 | 
			
		||||
			    DRIVER_PRIME),
 | 
			
		||||
	.lastclose = drm_fb_helper_lastclose,
 | 
			
		||||
	.open = vc4_open,
 | 
			
		||||
	.postclose = vc4_close,
 | 
			
		||||
	.irq_handler = vc4_irq,
 | 
			
		||||
	.irq_preinstall = vc4_irq_preinstall,
 | 
			
		||||
	.irq_postinstall = vc4_irq_postinstall,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -11,6 +11,8 @@
 | 
			
		|||
#include <drm/drm_encoder.h>
 | 
			
		||||
#include <drm/drm_gem_cma_helper.h>
 | 
			
		||||
 | 
			
		||||
#include "uapi/drm/vc4_drm.h"
 | 
			
		||||
 | 
			
		||||
/* Don't forget to update vc4_bo.c: bo_type_names[] when adding to
 | 
			
		||||
 * this.
 | 
			
		||||
 */
 | 
			
		||||
| 
						 | 
				
			
			@ -29,6 +31,36 @@ enum vc4_kernel_bo_type {
 | 
			
		|||
	VC4_BO_TYPE_COUNT
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* Performance monitor object. The perform lifetime is controlled by userspace
 | 
			
		||||
 * using perfmon related ioctls. A perfmon can be attached to a submit_cl
 | 
			
		||||
 * request, and when this is the case, HW perf counters will be activated just
 | 
			
		||||
 * before the submit_cl is submitted to the GPU and disabled when the job is
 | 
			
		||||
 * done. This way, only events related to a specific job will be counted.
 | 
			
		||||
 */
 | 
			
		||||
struct vc4_perfmon {
 | 
			
		||||
	/* Tracks the number of users of the perfmon, when this counter reaches
 | 
			
		||||
	 * zero the perfmon is destroyed.
 | 
			
		||||
	 */
 | 
			
		||||
	refcount_t refcnt;
 | 
			
		||||
 | 
			
		||||
	/* Number of counters activated in this perfmon instance
 | 
			
		||||
	 * (should be less than DRM_VC4_MAX_PERF_COUNTERS).
 | 
			
		||||
	 */
 | 
			
		||||
	u8 ncounters;
 | 
			
		||||
 | 
			
		||||
	/* Events counted by the HW perf counters. */
 | 
			
		||||
	u8 events[DRM_VC4_MAX_PERF_COUNTERS];
 | 
			
		||||
 | 
			
		||||
	/* Storage for counter values. Counters are incremented by the HW
 | 
			
		||||
	 * perf counter values every time the perfmon is attached to a GPU job.
 | 
			
		||||
	 * This way, perfmon users don't have to retrieve the results after
 | 
			
		||||
	 * each job if they want to track events covering several submissions.
 | 
			
		||||
	 * Note that counter values can't be reset, but you can fake a reset by
 | 
			
		||||
	 * destroying the perfmon and creating a new one.
 | 
			
		||||
	 */
 | 
			
		||||
	u64 counters[0];
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct vc4_dev {
 | 
			
		||||
	struct drm_device *dev;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -121,6 +153,11 @@ struct vc4_dev {
 | 
			
		|||
	wait_queue_head_t job_wait_queue;
 | 
			
		||||
	struct work_struct job_done_work;
 | 
			
		||||
 | 
			
		||||
	/* Used to track the active perfmon if any. Access to this field is
 | 
			
		||||
	 * protected by job_lock.
 | 
			
		||||
	 */
 | 
			
		||||
	struct vc4_perfmon *active_perfmon;
 | 
			
		||||
 | 
			
		||||
	/* List of struct vc4_seqno_cb for callbacks to be made from a
 | 
			
		||||
	 * workqueue when the given seqno is passed.
 | 
			
		||||
	 */
 | 
			
		||||
| 
						 | 
				
			
			@ -406,6 +443,21 @@ struct vc4_exec_info {
 | 
			
		|||
	void *uniforms_v;
 | 
			
		||||
	uint32_t uniforms_p;
 | 
			
		||||
	uint32_t uniforms_size;
 | 
			
		||||
 | 
			
		||||
	/* Pointer to a performance monitor object if the user requested it,
 | 
			
		||||
	 * NULL otherwise.
 | 
			
		||||
	 */
 | 
			
		||||
	struct vc4_perfmon *perfmon;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* Per-open file private data. Any driver-specific resource that has to be
 | 
			
		||||
 * released when the DRM file is closed should be placed here.
 | 
			
		||||
 */
 | 
			
		||||
struct vc4_file {
 | 
			
		||||
	struct {
 | 
			
		||||
		struct idr idr;
 | 
			
		||||
		struct mutex lock;
 | 
			
		||||
	} perfmon;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static inline struct vc4_exec_info *
 | 
			
		||||
| 
						 | 
				
			
			@ -646,3 +698,19 @@ bool vc4_check_tex_size(struct vc4_exec_info *exec,
 | 
			
		|||
/* vc4_validate_shader.c */
 | 
			
		||||
struct vc4_validated_shader_info *
 | 
			
		||||
vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
 | 
			
		||||
 | 
			
		||||
/* vc4_perfmon.c */
 | 
			
		||||
void vc4_perfmon_get(struct vc4_perfmon *perfmon);
 | 
			
		||||
void vc4_perfmon_put(struct vc4_perfmon *perfmon);
 | 
			
		||||
void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon);
 | 
			
		||||
void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon,
 | 
			
		||||
		      bool capture);
 | 
			
		||||
struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id);
 | 
			
		||||
void vc4_perfmon_open_file(struct vc4_file *vc4file);
 | 
			
		||||
void vc4_perfmon_close_file(struct vc4_file *vc4file);
 | 
			
		||||
int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
 | 
			
		||||
			     struct drm_file *file_priv);
 | 
			
		||||
int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
 | 
			
		||||
			      struct drm_file *file_priv);
 | 
			
		||||
int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
 | 
			
		||||
				 struct drm_file *file_priv);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -454,13 +454,29 @@ vc4_submit_next_bin_job(struct drm_device *dev)
 | 
			
		|||
 | 
			
		||||
	vc4_flush_caches(dev);
 | 
			
		||||
 | 
			
		||||
	/* Only start the perfmon if it was not already started by a previous
 | 
			
		||||
	 * job.
 | 
			
		||||
	 */
 | 
			
		||||
	if (exec->perfmon && vc4->active_perfmon != exec->perfmon)
 | 
			
		||||
		vc4_perfmon_start(vc4, exec->perfmon);
 | 
			
		||||
 | 
			
		||||
	/* Either put the job in the binner if it uses the binner, or
 | 
			
		||||
	 * immediately move it to the to-be-rendered queue.
 | 
			
		||||
	 */
 | 
			
		||||
	if (exec->ct0ca != exec->ct0ea) {
 | 
			
		||||
		submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
 | 
			
		||||
	} else {
 | 
			
		||||
		struct vc4_exec_info *next;
 | 
			
		||||
 | 
			
		||||
		vc4_move_job_to_render(dev, exec);
 | 
			
		||||
		next = vc4_first_bin_job(vc4);
 | 
			
		||||
 | 
			
		||||
		/* We can't start the next bin job if the previous job had a
 | 
			
		||||
		 * different perfmon instance attached to it. The same goes
 | 
			
		||||
		 * if one of them had a perfmon attached to it and the other
 | 
			
		||||
		 * one doesn't.
 | 
			
		||||
		 */
 | 
			
		||||
		if (next && next->perfmon == exec->perfmon)
 | 
			
		||||
			goto again;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -621,6 +637,7 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
 | 
			
		|||
		 struct ww_acquire_ctx *acquire_ctx)
 | 
			
		||||
{
 | 
			
		||||
	struct vc4_dev *vc4 = to_vc4_dev(dev);
 | 
			
		||||
	struct vc4_exec_info *renderjob;
 | 
			
		||||
	uint64_t seqno;
 | 
			
		||||
	unsigned long irqflags;
 | 
			
		||||
	struct vc4_fence *fence;
 | 
			
		||||
| 
						 | 
				
			
			@ -646,11 +663,14 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
 | 
			
		|||
 | 
			
		||||
	list_add_tail(&exec->head, &vc4->bin_job_list);
 | 
			
		||||
 | 
			
		||||
	/* If no job was executing, kick ours off.  Otherwise, it'll
 | 
			
		||||
	 * get started when the previous job's flush done interrupt
 | 
			
		||||
	 * occurs.
 | 
			
		||||
	/* If no bin job was executing and if the render job (if any) has the
 | 
			
		||||
	 * same perfmon as our job attached to it (or if both jobs don't have
 | 
			
		||||
	 * perfmon activated), then kick ours off.  Otherwise, it'll get
 | 
			
		||||
	 * started when the previous job's flush/render done interrupt occurs.
 | 
			
		||||
	 */
 | 
			
		||||
	if (vc4_first_bin_job(vc4) == exec) {
 | 
			
		||||
	renderjob = vc4_first_render_job(vc4);
 | 
			
		||||
	if (vc4_first_bin_job(vc4) == exec &&
 | 
			
		||||
	    (!renderjob || renderjob->perfmon == exec->perfmon)) {
 | 
			
		||||
		vc4_submit_next_bin_job(dev);
 | 
			
		||||
		vc4_queue_hangcheck(dev);
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -915,6 +935,9 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 | 
			
		|||
	vc4->bin_alloc_used &= ~exec->bin_slots;
 | 
			
		||||
	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 | 
			
		||||
 | 
			
		||||
	/* Release the reference we had on the perf monitor. */
 | 
			
		||||
	vc4_perfmon_put(exec->perfmon);
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&vc4->power_lock);
 | 
			
		||||
	if (--vc4->power_refcount == 0) {
 | 
			
		||||
		pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
 | 
			
		||||
| 
						 | 
				
			
			@ -1067,6 +1090,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 | 
			
		|||
		    struct drm_file *file_priv)
 | 
			
		||||
{
 | 
			
		||||
	struct vc4_dev *vc4 = to_vc4_dev(dev);
 | 
			
		||||
	struct vc4_file *vc4file = file_priv->driver_priv;
 | 
			
		||||
	struct drm_vc4_submit_cl *args = data;
 | 
			
		||||
	struct vc4_exec_info *exec;
 | 
			
		||||
	struct ww_acquire_ctx acquire_ctx;
 | 
			
		||||
| 
						 | 
				
			
			@ -1080,6 +1104,11 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 | 
			
		|||
		return -EINVAL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (args->pad2 != 0) {
 | 
			
		||||
		DRM_DEBUG("->pad2 must be set to zero\n");
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
 | 
			
		||||
	if (!exec) {
 | 
			
		||||
		DRM_ERROR("malloc failure on exec struct\n");
 | 
			
		||||
| 
						 | 
				
			
			@ -1105,6 +1134,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 | 
			
		|||
	if (ret)
 | 
			
		||||
		goto fail;
 | 
			
		||||
 | 
			
		||||
	if (args->perfmonid) {
 | 
			
		||||
		exec->perfmon = vc4_perfmon_find(vc4file,
 | 
			
		||||
						 args->perfmonid);
 | 
			
		||||
		if (!exec->perfmon) {
 | 
			
		||||
			ret = -ENOENT;
 | 
			
		||||
			goto fail;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (exec->args->bin_cl_size != 0) {
 | 
			
		||||
		ret = vc4_get_bcl(dev, exec);
 | 
			
		||||
		if (ret)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -104,12 +104,19 @@ static void
 | 
			
		|||
vc4_irq_finish_bin_job(struct drm_device *dev)
 | 
			
		||||
{
 | 
			
		||||
	struct vc4_dev *vc4 = to_vc4_dev(dev);
 | 
			
		||||
	struct vc4_exec_info *exec = vc4_first_bin_job(vc4);
 | 
			
		||||
	struct vc4_exec_info *next, *exec = vc4_first_bin_job(vc4);
 | 
			
		||||
 | 
			
		||||
	if (!exec)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	vc4_move_job_to_render(dev, exec);
 | 
			
		||||
	next = vc4_first_bin_job(vc4);
 | 
			
		||||
 | 
			
		||||
	/* Only submit the next job in the bin list if it matches the perfmon
 | 
			
		||||
	 * attached to the one that just finished (or if both jobs don't have
 | 
			
		||||
	 * perfmon attached to them).
 | 
			
		||||
	 */
 | 
			
		||||
	if (next && next->perfmon == exec->perfmon)
 | 
			
		||||
		vc4_submit_next_bin_job(dev);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -122,6 +129,10 @@ vc4_cancel_bin_job(struct drm_device *dev)
 | 
			
		|||
	if (!exec)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	/* Stop the perfmon so that the next bin job can be started. */
 | 
			
		||||
	if (exec->perfmon)
 | 
			
		||||
		vc4_perfmon_stop(vc4, exec->perfmon, false);
 | 
			
		||||
 | 
			
		||||
	list_move_tail(&exec->head, &vc4->bin_job_list);
 | 
			
		||||
	vc4_submit_next_bin_job(dev);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -131,18 +142,41 @@ vc4_irq_finish_render_job(struct drm_device *dev)
 | 
			
		|||
{
 | 
			
		||||
	struct vc4_dev *vc4 = to_vc4_dev(dev);
 | 
			
		||||
	struct vc4_exec_info *exec = vc4_first_render_job(vc4);
 | 
			
		||||
	struct vc4_exec_info *nextbin, *nextrender;
 | 
			
		||||
 | 
			
		||||
	if (!exec)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	vc4->finished_seqno++;
 | 
			
		||||
	list_move_tail(&exec->head, &vc4->job_done_list);
 | 
			
		||||
 | 
			
		||||
	nextbin = vc4_first_bin_job(vc4);
 | 
			
		||||
	nextrender = vc4_first_render_job(vc4);
 | 
			
		||||
 | 
			
		||||
	/* Only stop the perfmon if following jobs in the queue don't expect it
 | 
			
		||||
	 * to be enabled.
 | 
			
		||||
	 */
 | 
			
		||||
	if (exec->perfmon && !nextrender &&
 | 
			
		||||
	    (!nextbin || nextbin->perfmon != exec->perfmon))
 | 
			
		||||
		vc4_perfmon_stop(vc4, exec->perfmon, true);
 | 
			
		||||
 | 
			
		||||
	/* If there's a render job waiting, start it. If this is not the case
 | 
			
		||||
	 * we may have to unblock the binner if it's been stalled because of
 | 
			
		||||
	 * perfmon (this can be checked by comparing the perfmon attached to
 | 
			
		||||
	 * the finished renderjob to the one attached to the next bin job: if
 | 
			
		||||
	 * they don't match, this means the binner is stalled and should be
 | 
			
		||||
	 * restarted).
 | 
			
		||||
	 */
 | 
			
		||||
	if (nextrender)
 | 
			
		||||
		vc4_submit_next_render_job(dev);
 | 
			
		||||
	else if (nextbin && nextbin->perfmon != exec->perfmon)
 | 
			
		||||
		vc4_submit_next_bin_job(dev);
 | 
			
		||||
 | 
			
		||||
	if (exec->fence) {
 | 
			
		||||
		dma_fence_signal_locked(exec->fence);
 | 
			
		||||
		dma_fence_put(exec->fence);
 | 
			
		||||
		exec->fence = NULL;
 | 
			
		||||
	}
 | 
			
		||||
	vc4_submit_next_render_job(dev);
 | 
			
		||||
 | 
			
		||||
	wake_up_all(&vc4->job_wait_queue);
 | 
			
		||||
	schedule_work(&vc4->job_done_work);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										188
									
								
								drivers/gpu/drm/vc4/vc4_perfmon.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										188
									
								
								drivers/gpu/drm/vc4/vc4_perfmon.c
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,188 @@
 | 
			
		|||
// SPDX-License-Identifier: GPL-2.0
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (C) 2018 Broadcom
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * DOC: VC4 V3D performance monitor module
 | 
			
		||||
 *
 | 
			
		||||
 * The V3D block provides 16 hardware counters which can count various events.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "vc4_drv.h"
 | 
			
		||||
#include "vc4_regs.h"
 | 
			
		||||
 | 
			
		||||
#define VC4_PERFMONID_MIN	1
 | 
			
		||||
#define VC4_PERFMONID_MAX	U32_MAX
 | 
			
		||||
 | 
			
		||||
void vc4_perfmon_get(struct vc4_perfmon *perfmon)
 | 
			
		||||
{
 | 
			
		||||
	if (perfmon)
 | 
			
		||||
		refcount_inc(&perfmon->refcnt);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void vc4_perfmon_put(struct vc4_perfmon *perfmon)
 | 
			
		||||
{
 | 
			
		||||
	if (perfmon && refcount_dec_and_test(&perfmon->refcnt))
 | 
			
		||||
		kfree(perfmon);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon)
 | 
			
		||||
{
 | 
			
		||||
	unsigned int i;
 | 
			
		||||
	u32 mask;
 | 
			
		||||
 | 
			
		||||
	if (WARN_ON_ONCE(!perfmon || vc4->active_perfmon))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < perfmon->ncounters; i++)
 | 
			
		||||
		V3D_WRITE(V3D_PCTRS(i), perfmon->events[i]);
 | 
			
		||||
 | 
			
		||||
	mask = GENMASK(perfmon->ncounters - 1, 0);
 | 
			
		||||
	V3D_WRITE(V3D_PCTRC, mask);
 | 
			
		||||
	V3D_WRITE(V3D_PCTRE, V3D_PCTRE_EN | mask);
 | 
			
		||||
	vc4->active_perfmon = perfmon;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon,
 | 
			
		||||
		      bool capture)
 | 
			
		||||
{
 | 
			
		||||
	unsigned int i;
 | 
			
		||||
 | 
			
		||||
	if (WARN_ON_ONCE(!vc4->active_perfmon ||
 | 
			
		||||
			 perfmon != vc4->active_perfmon))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (capture) {
 | 
			
		||||
		for (i = 0; i < perfmon->ncounters; i++)
 | 
			
		||||
			perfmon->counters[i] += V3D_READ(V3D_PCTR(i));
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	V3D_WRITE(V3D_PCTRE, 0);
 | 
			
		||||
	vc4->active_perfmon = NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id)
 | 
			
		||||
{
 | 
			
		||||
	struct vc4_perfmon *perfmon;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&vc4file->perfmon.lock);
 | 
			
		||||
	perfmon = idr_find(&vc4file->perfmon.idr, id);
 | 
			
		||||
	vc4_perfmon_get(perfmon);
 | 
			
		||||
	mutex_unlock(&vc4file->perfmon.lock);
 | 
			
		||||
 | 
			
		||||
	return perfmon;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void vc4_perfmon_open_file(struct vc4_file *vc4file)
 | 
			
		||||
{
 | 
			
		||||
	mutex_init(&vc4file->perfmon.lock);
 | 
			
		||||
	idr_init(&vc4file->perfmon.idr);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int vc4_perfmon_idr_del(int id, void *elem, void *data)
 | 
			
		||||
{
 | 
			
		||||
	struct vc4_perfmon *perfmon = elem;
 | 
			
		||||
 | 
			
		||||
	vc4_perfmon_put(perfmon);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void vc4_perfmon_close_file(struct vc4_file *vc4file)
 | 
			
		||||
{
 | 
			
		||||
	mutex_lock(&vc4file->perfmon.lock);
 | 
			
		||||
	idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL);
 | 
			
		||||
	idr_destroy(&vc4file->perfmon.idr);
 | 
			
		||||
	mutex_unlock(&vc4file->perfmon.lock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
 | 
			
		||||
			     struct drm_file *file_priv)
 | 
			
		||||
{
 | 
			
		||||
	struct vc4_file *vc4file = file_priv->driver_priv;
 | 
			
		||||
	struct drm_vc4_perfmon_create *req = data;
 | 
			
		||||
	struct vc4_perfmon *perfmon;
 | 
			
		||||
	unsigned int i;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	/* Number of monitored counters cannot exceed HW limits. */
 | 
			
		||||
	if (req->ncounters > DRM_VC4_MAX_PERF_COUNTERS ||
 | 
			
		||||
	    !req->ncounters)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	/* Make sure all events are valid. */
 | 
			
		||||
	for (i = 0; i < req->ncounters; i++) {
 | 
			
		||||
		if (req->events[i] >= VC4_PERFCNT_NUM_EVENTS)
 | 
			
		||||
			return -EINVAL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	perfmon = kzalloc(sizeof(*perfmon) + (req->ncounters * sizeof(u64)),
 | 
			
		||||
			  GFP_KERNEL);
 | 
			
		||||
	if (!perfmon)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < req->ncounters; i++)
 | 
			
		||||
		perfmon->events[i] = req->events[i];
 | 
			
		||||
 | 
			
		||||
	perfmon->ncounters = req->ncounters;
 | 
			
		||||
 | 
			
		||||
	refcount_set(&perfmon->refcnt, 1);
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&vc4file->perfmon.lock);
 | 
			
		||||
	ret = idr_alloc(&vc4file->perfmon.idr, perfmon, VC4_PERFMONID_MIN,
 | 
			
		||||
			VC4_PERFMONID_MAX, GFP_KERNEL);
 | 
			
		||||
	mutex_unlock(&vc4file->perfmon.lock);
 | 
			
		||||
 | 
			
		||||
	if (ret < 0) {
 | 
			
		||||
		kfree(perfmon);
 | 
			
		||||
		return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	req->id = ret;
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
 | 
			
		||||
			      struct drm_file *file_priv)
 | 
			
		||||
{
 | 
			
		||||
	struct vc4_file *vc4file = file_priv->driver_priv;
 | 
			
		||||
	struct drm_vc4_perfmon_destroy *req = data;
 | 
			
		||||
	struct vc4_perfmon *perfmon;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&vc4file->perfmon.lock);
 | 
			
		||||
	perfmon = idr_remove(&vc4file->perfmon.idr, req->id);
 | 
			
		||||
	mutex_unlock(&vc4file->perfmon.lock);
 | 
			
		||||
 | 
			
		||||
	if (!perfmon)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	vc4_perfmon_put(perfmon);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
 | 
			
		||||
				 struct drm_file *file_priv)
 | 
			
		||||
{
 | 
			
		||||
	struct vc4_file *vc4file = file_priv->driver_priv;
 | 
			
		||||
	struct drm_vc4_perfmon_get_values *req = data;
 | 
			
		||||
	struct vc4_perfmon *perfmon;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&vc4file->perfmon.lock);
 | 
			
		||||
	perfmon = idr_find(&vc4file->perfmon.idr, req->id);
 | 
			
		||||
	vc4_perfmon_get(perfmon);
 | 
			
		||||
	mutex_unlock(&vc4file->perfmon.lock);
 | 
			
		||||
 | 
			
		||||
	if (!perfmon)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	if (copy_to_user(u64_to_user_ptr(req->values_ptr), perfmon->counters,
 | 
			
		||||
			 perfmon->ncounters * sizeof(u64)))
 | 
			
		||||
		ret = -EFAULT;
 | 
			
		||||
	else
 | 
			
		||||
		ret = 0;
 | 
			
		||||
 | 
			
		||||
	vc4_perfmon_put(perfmon);
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -122,38 +122,9 @@
 | 
			
		|||
#define V3D_VPMBASE  0x00504
 | 
			
		||||
#define V3D_PCTRC    0x00670
 | 
			
		||||
#define V3D_PCTRE    0x00674
 | 
			
		||||
#define V3D_PCTR0    0x00680
 | 
			
		||||
#define V3D_PCTRS0   0x00684
 | 
			
		||||
#define V3D_PCTR1    0x00688
 | 
			
		||||
#define V3D_PCTRS1   0x0068c
 | 
			
		||||
#define V3D_PCTR2    0x00690
 | 
			
		||||
#define V3D_PCTRS2   0x00694
 | 
			
		||||
#define V3D_PCTR3    0x00698
 | 
			
		||||
#define V3D_PCTRS3   0x0069c
 | 
			
		||||
#define V3D_PCTR4    0x006a0
 | 
			
		||||
#define V3D_PCTRS4   0x006a4
 | 
			
		||||
#define V3D_PCTR5    0x006a8
 | 
			
		||||
#define V3D_PCTRS5   0x006ac
 | 
			
		||||
#define V3D_PCTR6    0x006b0
 | 
			
		||||
#define V3D_PCTRS6   0x006b4
 | 
			
		||||
#define V3D_PCTR7    0x006b8
 | 
			
		||||
#define V3D_PCTRS7   0x006bc
 | 
			
		||||
#define V3D_PCTR8    0x006c0
 | 
			
		||||
#define V3D_PCTRS8   0x006c4
 | 
			
		||||
#define V3D_PCTR9    0x006c8
 | 
			
		||||
#define V3D_PCTRS9   0x006cc
 | 
			
		||||
#define V3D_PCTR10   0x006d0
 | 
			
		||||
#define V3D_PCTRS10  0x006d4
 | 
			
		||||
#define V3D_PCTR11   0x006d8
 | 
			
		||||
#define V3D_PCTRS11  0x006dc
 | 
			
		||||
#define V3D_PCTR12   0x006e0
 | 
			
		||||
#define V3D_PCTRS12  0x006e4
 | 
			
		||||
#define V3D_PCTR13   0x006e8
 | 
			
		||||
#define V3D_PCTRS13  0x006ec
 | 
			
		||||
#define V3D_PCTR14   0x006f0
 | 
			
		||||
#define V3D_PCTRS14  0x006f4
 | 
			
		||||
#define V3D_PCTR15   0x006f8
 | 
			
		||||
#define V3D_PCTRS15  0x006fc
 | 
			
		||||
# define V3D_PCTRE_EN	BIT(31)
 | 
			
		||||
#define V3D_PCTR(x)  (0x00680 + ((x) * 8))
 | 
			
		||||
#define V3D_PCTRS(x) (0x00684 + ((x) * 8))
 | 
			
		||||
#define V3D_DBGE     0x00f00
 | 
			
		||||
#define V3D_FDBGO    0x00f04
 | 
			
		||||
#define V3D_FDBGB    0x00f08
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -68,38 +68,38 @@ static const struct {
 | 
			
		|||
	REGDEF(V3D_VPMBASE),
 | 
			
		||||
	REGDEF(V3D_PCTRC),
 | 
			
		||||
	REGDEF(V3D_PCTRE),
 | 
			
		||||
	REGDEF(V3D_PCTR0),
 | 
			
		||||
	REGDEF(V3D_PCTRS0),
 | 
			
		||||
	REGDEF(V3D_PCTR1),
 | 
			
		||||
	REGDEF(V3D_PCTRS1),
 | 
			
		||||
	REGDEF(V3D_PCTR2),
 | 
			
		||||
	REGDEF(V3D_PCTRS2),
 | 
			
		||||
	REGDEF(V3D_PCTR3),
 | 
			
		||||
	REGDEF(V3D_PCTRS3),
 | 
			
		||||
	REGDEF(V3D_PCTR4),
 | 
			
		||||
	REGDEF(V3D_PCTRS4),
 | 
			
		||||
	REGDEF(V3D_PCTR5),
 | 
			
		||||
	REGDEF(V3D_PCTRS5),
 | 
			
		||||
	REGDEF(V3D_PCTR6),
 | 
			
		||||
	REGDEF(V3D_PCTRS6),
 | 
			
		||||
	REGDEF(V3D_PCTR7),
 | 
			
		||||
	REGDEF(V3D_PCTRS7),
 | 
			
		||||
	REGDEF(V3D_PCTR8),
 | 
			
		||||
	REGDEF(V3D_PCTRS8),
 | 
			
		||||
	REGDEF(V3D_PCTR9),
 | 
			
		||||
	REGDEF(V3D_PCTRS9),
 | 
			
		||||
	REGDEF(V3D_PCTR10),
 | 
			
		||||
	REGDEF(V3D_PCTRS10),
 | 
			
		||||
	REGDEF(V3D_PCTR11),
 | 
			
		||||
	REGDEF(V3D_PCTRS11),
 | 
			
		||||
	REGDEF(V3D_PCTR12),
 | 
			
		||||
	REGDEF(V3D_PCTRS12),
 | 
			
		||||
	REGDEF(V3D_PCTR13),
 | 
			
		||||
	REGDEF(V3D_PCTRS13),
 | 
			
		||||
	REGDEF(V3D_PCTR14),
 | 
			
		||||
	REGDEF(V3D_PCTRS14),
 | 
			
		||||
	REGDEF(V3D_PCTR15),
 | 
			
		||||
	REGDEF(V3D_PCTRS15),
 | 
			
		||||
	REGDEF(V3D_PCTR(0)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(0)),
 | 
			
		||||
	REGDEF(V3D_PCTR(1)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(1)),
 | 
			
		||||
	REGDEF(V3D_PCTR(2)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(2)),
 | 
			
		||||
	REGDEF(V3D_PCTR(3)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(3)),
 | 
			
		||||
	REGDEF(V3D_PCTR(4)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(4)),
 | 
			
		||||
	REGDEF(V3D_PCTR(5)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(5)),
 | 
			
		||||
	REGDEF(V3D_PCTR(6)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(6)),
 | 
			
		||||
	REGDEF(V3D_PCTR(7)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(7)),
 | 
			
		||||
	REGDEF(V3D_PCTR(8)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(8)),
 | 
			
		||||
	REGDEF(V3D_PCTR(9)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(9)),
 | 
			
		||||
	REGDEF(V3D_PCTR(10)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(10)),
 | 
			
		||||
	REGDEF(V3D_PCTR(11)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(11)),
 | 
			
		||||
	REGDEF(V3D_PCTR(12)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(12)),
 | 
			
		||||
	REGDEF(V3D_PCTR(13)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(13)),
 | 
			
		||||
	REGDEF(V3D_PCTR(14)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(14)),
 | 
			
		||||
	REGDEF(V3D_PCTR(15)),
 | 
			
		||||
	REGDEF(V3D_PCTRS(15)),
 | 
			
		||||
	REGDEF(V3D_DBGE),
 | 
			
		||||
	REGDEF(V3D_FDBGO),
 | 
			
		||||
	REGDEF(V3D_FDBGB),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -42,6 +42,9 @@ extern "C" {
 | 
			
		|||
#define DRM_VC4_GET_TILING                        0x09
 | 
			
		||||
#define DRM_VC4_LABEL_BO                          0x0a
 | 
			
		||||
#define DRM_VC4_GEM_MADVISE                       0x0b
 | 
			
		||||
#define DRM_VC4_PERFMON_CREATE                    0x0c
 | 
			
		||||
#define DRM_VC4_PERFMON_DESTROY                   0x0d
 | 
			
		||||
#define DRM_VC4_PERFMON_GET_VALUES                0x0e
 | 
			
		||||
 | 
			
		||||
#define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
 | 
			
		||||
#define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
 | 
			
		||||
| 
						 | 
				
			
			@ -55,6 +58,9 @@ extern "C" {
 | 
			
		|||
#define DRM_IOCTL_VC4_GET_TILING          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling)
 | 
			
		||||
#define DRM_IOCTL_VC4_LABEL_BO            DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo)
 | 
			
		||||
#define DRM_IOCTL_VC4_GEM_MADVISE         DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GEM_MADVISE, struct drm_vc4_gem_madvise)
 | 
			
		||||
#define DRM_IOCTL_VC4_PERFMON_CREATE      DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_CREATE, struct drm_vc4_perfmon_create)
 | 
			
		||||
#define DRM_IOCTL_VC4_PERFMON_DESTROY     DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_DESTROY, struct drm_vc4_perfmon_destroy)
 | 
			
		||||
#define DRM_IOCTL_VC4_PERFMON_GET_VALUES  DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_GET_VALUES, struct drm_vc4_perfmon_get_values)
 | 
			
		||||
 | 
			
		||||
struct drm_vc4_submit_rcl_surface {
 | 
			
		||||
	__u32 hindex; /* Handle index, or ~0 if not present. */
 | 
			
		||||
| 
						 | 
				
			
			@ -173,6 +179,15 @@ struct drm_vc4_submit_cl {
 | 
			
		|||
	 * wait ioctl).
 | 
			
		||||
	 */
 | 
			
		||||
	__u64 seqno;
 | 
			
		||||
 | 
			
		||||
	/* ID of the perfmon to attach to this job. 0 means no perfmon. */
 | 
			
		||||
	__u32 perfmonid;
 | 
			
		||||
 | 
			
		||||
	/* Unused field to align this struct on 64 bits. Must be set to 0.
 | 
			
		||||
	 * If one ever needs to add an u32 field to this struct, this field
 | 
			
		||||
	 * can be used.
 | 
			
		||||
	 */
 | 
			
		||||
	__u32 pad2;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			@ -308,6 +323,7 @@ struct drm_vc4_get_hang_state {
 | 
			
		|||
#define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5
 | 
			
		||||
#define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER	6
 | 
			
		||||
#define DRM_VC4_PARAM_SUPPORTS_MADVISE		7
 | 
			
		||||
#define DRM_VC4_PARAM_SUPPORTS_PERFMON		8
 | 
			
		||||
 | 
			
		||||
struct drm_vc4_get_param {
 | 
			
		||||
	__u32 param;
 | 
			
		||||
| 
						 | 
				
			
			@ -352,6 +368,66 @@ struct drm_vc4_gem_madvise {
 | 
			
		|||
	__u32 pad;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum {
 | 
			
		||||
	VC4_PERFCNT_FEP_VALID_PRIMS_NO_RENDER,
 | 
			
		||||
	VC4_PERFCNT_FEP_VALID_PRIMS_RENDER,
 | 
			
		||||
	VC4_PERFCNT_FEP_CLIPPED_QUADS,
 | 
			
		||||
	VC4_PERFCNT_FEP_VALID_QUADS,
 | 
			
		||||
	VC4_PERFCNT_TLB_QUADS_NOT_PASSING_STENCIL,
 | 
			
		||||
	VC4_PERFCNT_TLB_QUADS_NOT_PASSING_Z_AND_STENCIL,
 | 
			
		||||
	VC4_PERFCNT_TLB_QUADS_PASSING_Z_AND_STENCIL,
 | 
			
		||||
	VC4_PERFCNT_TLB_QUADS_ZERO_COVERAGE,
 | 
			
		||||
	VC4_PERFCNT_TLB_QUADS_NON_ZERO_COVERAGE,
 | 
			
		||||
	VC4_PERFCNT_TLB_QUADS_WRITTEN_TO_COLOR_BUF,
 | 
			
		||||
	VC4_PERFCNT_PLB_PRIMS_OUTSIDE_VIEWPORT,
 | 
			
		||||
	VC4_PERFCNT_PLB_PRIMS_NEED_CLIPPING,
 | 
			
		||||
	VC4_PERFCNT_PSE_PRIMS_REVERSED,
 | 
			
		||||
	VC4_PERFCNT_QPU_TOTAL_IDLE_CYCLES,
 | 
			
		||||
	VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_VERTEX_COORD_SHADING,
 | 
			
		||||
	VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_FRAGMENT_SHADING,
 | 
			
		||||
	VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_EXEC_VALID_INST,
 | 
			
		||||
	VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_TMUS,
 | 
			
		||||
	VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_SCOREBOARD,
 | 
			
		||||
	VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_VARYINGS,
 | 
			
		||||
	VC4_PERFCNT_QPU_TOTAL_INST_CACHE_HIT,
 | 
			
		||||
	VC4_PERFCNT_QPU_TOTAL_INST_CACHE_MISS,
 | 
			
		||||
	VC4_PERFCNT_QPU_TOTAL_UNIFORM_CACHE_HIT,
 | 
			
		||||
	VC4_PERFCNT_QPU_TOTAL_UNIFORM_CACHE_MISS,
 | 
			
		||||
	VC4_PERFCNT_TMU_TOTAL_TEXT_QUADS_PROCESSED,
 | 
			
		||||
	VC4_PERFCNT_TMU_TOTAL_TEXT_CACHE_MISS,
 | 
			
		||||
	VC4_PERFCNT_VPM_TOTAL_CLK_CYCLES_VDW_STALLED,
 | 
			
		||||
	VC4_PERFCNT_VPM_TOTAL_CLK_CYCLES_VCD_STALLED,
 | 
			
		||||
	VC4_PERFCNT_L2C_TOTAL_L2_CACHE_HIT,
 | 
			
		||||
	VC4_PERFCNT_L2C_TOTAL_L2_CACHE_MISS,
 | 
			
		||||
	VC4_PERFCNT_NUM_EVENTS,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define DRM_VC4_MAX_PERF_COUNTERS	16
 | 
			
		||||
 | 
			
		||||
struct drm_vc4_perfmon_create {
 | 
			
		||||
	__u32 id;
 | 
			
		||||
	__u32 ncounters;
 | 
			
		||||
	__u8 events[DRM_VC4_MAX_PERF_COUNTERS];
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct drm_vc4_perfmon_destroy {
 | 
			
		||||
	__u32 id;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Returns the values of the performance counters tracked by this
 | 
			
		||||
 * perfmon (as an array of ncounters u64 values).
 | 
			
		||||
 *
 | 
			
		||||
 * No implicit synchronization is performed, so the user has to
 | 
			
		||||
 * guarantee that any jobs using this perfmon have already been
 | 
			
		||||
 * completed  (probably by blocking on the seqno returned by the
 | 
			
		||||
 * last exec that used the perfmon).
 | 
			
		||||
 */
 | 
			
		||||
struct drm_vc4_perfmon_get_values {
 | 
			
		||||
	__u32 id;
 | 
			
		||||
	__u64 values_ptr;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#if defined(__cplusplus)
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue