mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	drm/msm: devcoredump iommu fault support
Wire up support to stall the SMMU on iova fault, and collect a devcore- dump snapshot for easier debugging of faults. Currently this is a6xx-only, but mostly only because so far it is the only one using adreno-smmu-priv. Signed-off-by: Rob Clark <robdclark@chromium.org> Acked-by: Jordan Crouse <jordan@cosmicpenguin.net> Link: https://lore.kernel.org/r/20210610214431.539029-6-robdclark@gmail.com Signed-off-by: Rob Clark <robdclark@chromium.org>
This commit is contained in:
		
							parent
							
								
									ba6014a4e4
								
							
						
					
					
						commit
						e25e92e08e
					
				
					 11 changed files with 186 additions and 12 deletions
				
			
		| 
						 | 
					@ -1200,6 +1200,15 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
 | 
				
			||||||
	struct drm_device *dev = gpu->dev;
 | 
						struct drm_device *dev = gpu->dev;
 | 
				
			||||||
	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
 | 
						struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * If stalled on SMMU fault, we could trip the GPU's hang detection,
 | 
				
			||||||
 | 
						 * but the fault handler will trigger the devcore dump, and we want
 | 
				
			||||||
 | 
						 * to otherwise resume normally rather than killing the submit, so
 | 
				
			||||||
 | 
						 * just bail.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24))
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
 | 
						DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
 | 
				
			||||||
		ring ? ring->id : -1, ring ? ring->seqno : 0,
 | 
							ring ? ring->id : -1, ring ? ring->seqno : 0,
 | 
				
			||||||
		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 | 
							gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 | 
				
			||||||
| 
						 | 
					@ -1523,6 +1532,7 @@ static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
 | 
						struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
 | 
				
			||||||
			GFP_KERNEL);
 | 
								GFP_KERNEL);
 | 
				
			||||||
 | 
						bool stalled = !!(gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!a5xx_state)
 | 
						if (!a5xx_state)
 | 
				
			||||||
		return ERR_PTR(-ENOMEM);
 | 
							return ERR_PTR(-ENOMEM);
 | 
				
			||||||
| 
						 | 
					@ -1535,8 +1545,13 @@ static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
 | 
						a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Get the HLSQ regs with the help of the crashdumper */
 | 
						/*
 | 
				
			||||||
	a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
 | 
						 * Get the HLSQ regs with the help of the crashdumper, but only if
 | 
				
			||||||
 | 
						 * we are not stalled in an iommu fault (in which case the crashdumper
 | 
				
			||||||
 | 
						 * would not have access to memory)
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (!stalled)
 | 
				
			||||||
 | 
							a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	a5xx_set_hwcg(gpu, true);
 | 
						a5xx_set_hwcg(gpu, true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1193,6 +1193,16 @@ static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *da
 | 
				
			||||||
	struct msm_gpu *gpu = arg;
 | 
						struct msm_gpu *gpu = arg;
 | 
				
			||||||
	struct adreno_smmu_fault_info *info = data;
 | 
						struct adreno_smmu_fault_info *info = data;
 | 
				
			||||||
	const char *type = "UNKNOWN";
 | 
						const char *type = "UNKNOWN";
 | 
				
			||||||
 | 
						const char *block;
 | 
				
			||||||
 | 
						bool do_devcoredump = info && !READ_ONCE(gpu->crashstate);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * If we aren't going to be resuming later from fault_worker, then do
 | 
				
			||||||
 | 
						 * it now.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (!do_devcoredump) {
 | 
				
			||||||
 | 
							gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Print a default message if we couldn't get the data from the
 | 
						 * Print a default message if we couldn't get the data from the
 | 
				
			||||||
| 
						 | 
					@ -1216,15 +1226,30 @@ static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *da
 | 
				
			||||||
	else if (info->fsr & ARM_SMMU_FSR_EF)
 | 
						else if (info->fsr & ARM_SMMU_FSR_EF)
 | 
				
			||||||
		type = "EXTERNAL";
 | 
							type = "EXTERNAL";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pr_warn_ratelimited("*** gpu fault: ttbr0=%.16llx iova=%.16lx dir=%s type=%s source=%s (%u,%u,%u,%u)\n",
 | 
						pr_warn_ratelimited("*** gpu fault: ttbr0=%.16llx iova=%.16lx dir=%s type=%s source=%s (%u,%u,%u,%u)\n",
 | 
				
			||||||
			info->ttbr0, iova,
 | 
								info->ttbr0, iova,
 | 
				
			||||||
			flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ", type,
 | 
								flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ",
 | 
				
			||||||
			a6xx_fault_block(gpu, info->fsynr1 & 0xff),
 | 
								type, block,
 | 
				
			||||||
			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
 | 
								gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
 | 
				
			||||||
			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
 | 
								gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
 | 
				
			||||||
			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
 | 
								gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
 | 
				
			||||||
			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
 | 
								gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (do_devcoredump) {
 | 
				
			||||||
 | 
							/* Turn off the hangcheck timer to keep it from bothering us */
 | 
				
			||||||
 | 
							del_timer(&gpu->hangcheck_timer);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							gpu->fault_info.ttbr0 = info->ttbr0;
 | 
				
			||||||
 | 
							gpu->fault_info.iova  = iova;
 | 
				
			||||||
 | 
							gpu->fault_info.flags = flags;
 | 
				
			||||||
 | 
							gpu->fault_info.type  = type;
 | 
				
			||||||
 | 
							gpu->fault_info.block = block;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							kthread_queue_work(gpu->worker, &gpu->fault_work);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1276,6 +1301,15 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
 | 
				
			||||||
	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 | 
						struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 | 
				
			||||||
	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
 | 
						struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * If stalled on SMMU fault, we could trip the GPU's hang detection,
 | 
				
			||||||
 | 
						 * but the fault handler will trigger the devcore dump, and we want
 | 
				
			||||||
 | 
						 * to otherwise resume normally rather than killing the submit, so
 | 
				
			||||||
 | 
						 * just bail.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Force the GPU to stay on until after we finish
 | 
						 * Force the GPU to stay on until after we finish
 | 
				
			||||||
	 * collecting information
 | 
						 * collecting information
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -832,6 +832,20 @@ static void a6xx_get_registers(struct msm_gpu *gpu,
 | 
				
			||||||
		a6xx_get_ahb_gpu_registers(gpu,
 | 
							a6xx_get_ahb_gpu_registers(gpu,
 | 
				
			||||||
				a6xx_state, &a6xx_vbif_reglist,
 | 
									a6xx_state, &a6xx_vbif_reglist,
 | 
				
			||||||
				&a6xx_state->registers[index++]);
 | 
									&a6xx_state->registers[index++]);
 | 
				
			||||||
 | 
						if (!dumper) {
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * We can't use the crashdumper when the SMMU is stalled,
 | 
				
			||||||
 | 
							 * because the GPU has no memory access until we resume
 | 
				
			||||||
 | 
							 * translation (but we don't want to do that until after
 | 
				
			||||||
 | 
							 * we have captured as much useful GPU state as possible).
 | 
				
			||||||
 | 
							 * So instead collect registers via the CPU:
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
 | 
				
			||||||
 | 
								a6xx_get_ahb_gpu_registers(gpu,
 | 
				
			||||||
 | 
									a6xx_state, &a6xx_reglist[i],
 | 
				
			||||||
 | 
									&a6xx_state->registers[index++]);
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
 | 
						for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
 | 
				
			||||||
		a6xx_get_crashdumper_registers(gpu,
 | 
							a6xx_get_crashdumper_registers(gpu,
 | 
				
			||||||
| 
						 | 
					@ -905,11 +919,13 @@ static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
 | 
					struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct a6xx_crashdumper dumper = { 0 };
 | 
						struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
 | 
				
			||||||
	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 | 
						struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 | 
				
			||||||
	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 | 
						struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 | 
				
			||||||
	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
 | 
						struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
 | 
				
			||||||
		GFP_KERNEL);
 | 
							GFP_KERNEL);
 | 
				
			||||||
 | 
						bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
 | 
				
			||||||
 | 
								A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!a6xx_state)
 | 
						if (!a6xx_state)
 | 
				
			||||||
		return ERR_PTR(-ENOMEM);
 | 
							return ERR_PTR(-ENOMEM);
 | 
				
			||||||
| 
						 | 
					@ -928,14 +944,24 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
 | 
				
			||||||
	/* Get the banks of indexed registers */
 | 
						/* Get the banks of indexed registers */
 | 
				
			||||||
	a6xx_get_indexed_registers(gpu, a6xx_state);
 | 
						a6xx_get_indexed_registers(gpu, a6xx_state);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Try to initialize the crashdumper */
 | 
						/*
 | 
				
			||||||
	if (!a6xx_crashdumper_init(gpu, &dumper)) {
 | 
						 * Try to initialize the crashdumper, if we are not dumping state
 | 
				
			||||||
		a6xx_get_registers(gpu, a6xx_state, &dumper);
 | 
						 * with the SMMU stalled.  The crashdumper needs memory access to
 | 
				
			||||||
		a6xx_get_shaders(gpu, a6xx_state, &dumper);
 | 
						 * write out GPU state, so we need to skip this when the SMMU is
 | 
				
			||||||
		a6xx_get_clusters(gpu, a6xx_state, &dumper);
 | 
						 * stalled in response to an iova fault
 | 
				
			||||||
		a6xx_get_dbgahb_clusters(gpu, a6xx_state, &dumper);
 | 
						 */
 | 
				
			||||||
 | 
						if (!stalled && !a6xx_crashdumper_init(gpu, &_dumper)) {
 | 
				
			||||||
 | 
							dumper = &_dumper;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
 | 
						a6xx_get_registers(gpu, a6xx_state, dumper);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (dumper) {
 | 
				
			||||||
 | 
							a6xx_get_shaders(gpu, a6xx_state, dumper);
 | 
				
			||||||
 | 
							a6xx_get_clusters(gpu, a6xx_state, dumper);
 | 
				
			||||||
 | 
							a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							msm_gem_kernel_put(dumper->bo, gpu->aspace, true);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (snapshot_debugbus)
 | 
						if (snapshot_debugbus)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -684,6 +684,21 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
 | 
				
			||||||
			adreno_gpu->info->revn, adreno_gpu->rev.core,
 | 
								adreno_gpu->info->revn, adreno_gpu->rev.core,
 | 
				
			||||||
			adreno_gpu->rev.major, adreno_gpu->rev.minor,
 | 
								adreno_gpu->rev.major, adreno_gpu->rev.minor,
 | 
				
			||||||
			adreno_gpu->rev.patchid);
 | 
								adreno_gpu->rev.patchid);
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * If this is state collected due to iova fault, so fault related info
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * TTBR0 would not be zero, so this is a good way to distinguish
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (state->fault_info.ttbr0) {
 | 
				
			||||||
 | 
							const struct msm_gpu_fault_info *info = &state->fault_info;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							drm_puts(p, "fault-info:\n");
 | 
				
			||||||
 | 
							drm_printf(p, "  - ttbr0=%.16llx\n", info->ttbr0);
 | 
				
			||||||
 | 
							drm_printf(p, "  - iova=%.16lx\n", info->iova);
 | 
				
			||||||
 | 
							drm_printf(p, "  - dir=%s\n", info->flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ");
 | 
				
			||||||
 | 
							drm_printf(p, "  - type=%s\n", info->type);
 | 
				
			||||||
 | 
							drm_printf(p, "  - source=%s\n", info->block);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	drm_printf(p, "rbbm-status: 0x%08x\n", state->rbbm_status);
 | 
						drm_printf(p, "rbbm-status: 0x%08x\n", state->rbbm_status);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -328,6 +328,7 @@ struct msm_gem_submit {
 | 
				
			||||||
	struct dma_fence *fence;
 | 
						struct dma_fence *fence;
 | 
				
			||||||
	struct msm_gpu_submitqueue *queue;
 | 
						struct msm_gpu_submitqueue *queue;
 | 
				
			||||||
	struct pid *pid;    /* submitting process */
 | 
						struct pid *pid;    /* submitting process */
 | 
				
			||||||
 | 
						bool fault_dumped;  /* Limit devcoredump dumping to one per submit */
 | 
				
			||||||
	bool valid;         /* true if no cmdstream patching needed */
 | 
						bool valid;         /* true if no cmdstream patching needed */
 | 
				
			||||||
	bool in_rb;         /* "sudo" mode, copy cmds into RB */
 | 
						bool in_rb;         /* "sudo" mode, copy cmds into RB */
 | 
				
			||||||
	struct msm_ringbuffer *ring;
 | 
						struct msm_ringbuffer *ring;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -50,6 +50,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
 | 
				
			||||||
	submit->cmd = (void *)&submit->bos[nr_bos];
 | 
						submit->cmd = (void *)&submit->bos[nr_bos];
 | 
				
			||||||
	submit->queue = queue;
 | 
						submit->queue = queue;
 | 
				
			||||||
	submit->ring = gpu->rb[queue->prio];
 | 
						submit->ring = gpu->rb[queue->prio];
 | 
				
			||||||
 | 
						submit->fault_dumped = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* initially, until copy_from_user() and bo lookup succeeds: */
 | 
						/* initially, until copy_from_user() and bo lookup succeeds: */
 | 
				
			||||||
	submit->nr_bos = 0;
 | 
						submit->nr_bos = 0;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -387,6 +387,7 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
 | 
				
			||||||
	/* Fill in the additional crash state information */
 | 
						/* Fill in the additional crash state information */
 | 
				
			||||||
	state->comm = kstrdup(comm, GFP_KERNEL);
 | 
						state->comm = kstrdup(comm, GFP_KERNEL);
 | 
				
			||||||
	state->cmd = kstrdup(cmd, GFP_KERNEL);
 | 
						state->cmd = kstrdup(cmd, GFP_KERNEL);
 | 
				
			||||||
 | 
						state->fault_info = gpu->fault_info;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (submit) {
 | 
						if (submit) {
 | 
				
			||||||
		int i, nr = 0;
 | 
							int i, nr = 0;
 | 
				
			||||||
| 
						 | 
					@ -559,6 +560,52 @@ static void recover_worker(struct kthread_work *work)
 | 
				
			||||||
	msm_gpu_retire(gpu);
 | 
						msm_gpu_retire(gpu);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void fault_worker(struct kthread_work *work)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct msm_gpu *gpu = container_of(work, struct msm_gpu, fault_work);
 | 
				
			||||||
 | 
						struct drm_device *dev = gpu->dev;
 | 
				
			||||||
 | 
						struct msm_gem_submit *submit;
 | 
				
			||||||
 | 
						struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
 | 
				
			||||||
 | 
						char *comm = NULL, *cmd = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mutex_lock(&dev->struct_mutex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
 | 
				
			||||||
 | 
						if (submit && submit->fault_dumped)
 | 
				
			||||||
 | 
							goto resume_smmu;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (submit) {
 | 
				
			||||||
 | 
							struct task_struct *task;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							task = get_pid_task(submit->pid, PIDTYPE_PID);
 | 
				
			||||||
 | 
							if (task) {
 | 
				
			||||||
 | 
								comm = kstrdup(task->comm, GFP_KERNEL);
 | 
				
			||||||
 | 
								cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL);
 | 
				
			||||||
 | 
								put_task_struct(task);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * When we get GPU iova faults, we can get 1000s of them,
 | 
				
			||||||
 | 
							 * but we really only want to log the first one.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							submit->fault_dumped = true;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Record the crash state */
 | 
				
			||||||
 | 
						pm_runtime_get_sync(&gpu->pdev->dev);
 | 
				
			||||||
 | 
						msm_gpu_crashstate_capture(gpu, submit, comm, cmd);
 | 
				
			||||||
 | 
						pm_runtime_put_sync(&gpu->pdev->dev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						kfree(cmd);
 | 
				
			||||||
 | 
						kfree(comm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					resume_smmu:
 | 
				
			||||||
 | 
						memset(&gpu->fault_info, 0, sizeof(gpu->fault_info));
 | 
				
			||||||
 | 
						gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mutex_unlock(&dev->struct_mutex);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void hangcheck_timer_reset(struct msm_gpu *gpu)
 | 
					static void hangcheck_timer_reset(struct msm_gpu *gpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct msm_drm_private *priv = gpu->dev->dev_private;
 | 
						struct msm_drm_private *priv = gpu->dev->dev_private;
 | 
				
			||||||
| 
						 | 
					@ -923,6 +970,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 | 
				
			||||||
	INIT_LIST_HEAD(&gpu->active_list);
 | 
						INIT_LIST_HEAD(&gpu->active_list);
 | 
				
			||||||
	kthread_init_work(&gpu->retire_work, retire_worker);
 | 
						kthread_init_work(&gpu->retire_work, retire_worker);
 | 
				
			||||||
	kthread_init_work(&gpu->recover_work, recover_worker);
 | 
						kthread_init_work(&gpu->recover_work, recover_worker);
 | 
				
			||||||
 | 
						kthread_init_work(&gpu->fault_work, fault_worker);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0);
 | 
						timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -71,6 +71,15 @@ struct msm_gpu_funcs {
 | 
				
			||||||
	uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
 | 
						uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Additional state for iommu faults: */
 | 
				
			||||||
 | 
					struct msm_gpu_fault_info {
 | 
				
			||||||
 | 
						u64 ttbr0;
 | 
				
			||||||
 | 
						unsigned long iova;
 | 
				
			||||||
 | 
						int flags;
 | 
				
			||||||
 | 
						const char *type;
 | 
				
			||||||
 | 
						const char *block;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct msm_gpu {
 | 
					struct msm_gpu {
 | 
				
			||||||
	const char *name;
 | 
						const char *name;
 | 
				
			||||||
	struct drm_device *dev;
 | 
						struct drm_device *dev;
 | 
				
			||||||
| 
						 | 
					@ -125,6 +134,12 @@ struct msm_gpu {
 | 
				
			||||||
#define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
 | 
					#define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
 | 
				
			||||||
	struct timer_list hangcheck_timer;
 | 
						struct timer_list hangcheck_timer;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Fault info for most recent iova fault: */
 | 
				
			||||||
 | 
						struct msm_gpu_fault_info fault_info;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* work for handling GPU ioval faults: */
 | 
				
			||||||
 | 
						struct kthread_work fault_work;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* work for handling GPU recovery: */
 | 
						/* work for handling GPU recovery: */
 | 
				
			||||||
	struct kthread_work recover_work;
 | 
						struct kthread_work recover_work;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -232,6 +247,8 @@ struct msm_gpu_state {
 | 
				
			||||||
	char *comm;
 | 
						char *comm;
 | 
				
			||||||
	char *cmd;
 | 
						char *cmd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						struct msm_gpu_fault_info fault_info;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	int nr_bos;
 | 
						int nr_bos;
 | 
				
			||||||
	struct msm_gpu_state_bo *bos;
 | 
						struct msm_gpu_state_bo *bos;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -68,6 +68,10 @@ static int msm_gpummu_unmap(struct msm_mmu *mmu, uint64_t iova, size_t len)
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void msm_gpummu_resume_translation(struct msm_mmu *mmu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void msm_gpummu_destroy(struct msm_mmu *mmu)
 | 
					static void msm_gpummu_destroy(struct msm_mmu *mmu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct msm_gpummu *gpummu = to_msm_gpummu(mmu);
 | 
						struct msm_gpummu *gpummu = to_msm_gpummu(mmu);
 | 
				
			||||||
| 
						 | 
					@ -83,6 +87,7 @@ static const struct msm_mmu_funcs funcs = {
 | 
				
			||||||
		.map = msm_gpummu_map,
 | 
							.map = msm_gpummu_map,
 | 
				
			||||||
		.unmap = msm_gpummu_unmap,
 | 
							.unmap = msm_gpummu_unmap,
 | 
				
			||||||
		.destroy = msm_gpummu_destroy,
 | 
							.destroy = msm_gpummu_destroy,
 | 
				
			||||||
 | 
							.resume_translation = msm_gpummu_resume_translation,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu)
 | 
					struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -184,6 +184,9 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
 | 
				
			||||||
	 * the arm-smmu driver as a trigger to set up TTBR0
 | 
						 * the arm-smmu driver as a trigger to set up TTBR0
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (atomic_inc_return(&iommu->pagetables) == 1) {
 | 
						if (atomic_inc_return(&iommu->pagetables) == 1) {
 | 
				
			||||||
 | 
							/* Enable stall on iommu fault: */
 | 
				
			||||||
 | 
							adreno_smmu->set_stall(adreno_smmu->cookie, true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ret = adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, &ttbr0_cfg);
 | 
							ret = adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, &ttbr0_cfg);
 | 
				
			||||||
		if (ret) {
 | 
							if (ret) {
 | 
				
			||||||
			free_io_pgtable_ops(pagetable->pgtbl_ops);
 | 
								free_io_pgtable_ops(pagetable->pgtbl_ops);
 | 
				
			||||||
| 
						 | 
					@ -226,6 +229,13 @@ static int msm_fault_handler(struct iommu_domain *domain, struct device *dev,
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void msm_iommu_resume_translation(struct msm_mmu *mmu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(mmu->dev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						adreno_smmu->resume_translation(adreno_smmu->cookie, true);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void msm_iommu_detach(struct msm_mmu *mmu)
 | 
					static void msm_iommu_detach(struct msm_mmu *mmu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct msm_iommu *iommu = to_msm_iommu(mmu);
 | 
						struct msm_iommu *iommu = to_msm_iommu(mmu);
 | 
				
			||||||
| 
						 | 
					@ -273,6 +283,7 @@ static const struct msm_mmu_funcs funcs = {
 | 
				
			||||||
		.map = msm_iommu_map,
 | 
							.map = msm_iommu_map,
 | 
				
			||||||
		.unmap = msm_iommu_unmap,
 | 
							.unmap = msm_iommu_unmap,
 | 
				
			||||||
		.destroy = msm_iommu_destroy,
 | 
							.destroy = msm_iommu_destroy,
 | 
				
			||||||
 | 
							.resume_translation = msm_iommu_resume_translation,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)
 | 
					struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -15,6 +15,7 @@ struct msm_mmu_funcs {
 | 
				
			||||||
			size_t len, int prot);
 | 
								size_t len, int prot);
 | 
				
			||||||
	int (*unmap)(struct msm_mmu *mmu, uint64_t iova, size_t len);
 | 
						int (*unmap)(struct msm_mmu *mmu, uint64_t iova, size_t len);
 | 
				
			||||||
	void (*destroy)(struct msm_mmu *mmu);
 | 
						void (*destroy)(struct msm_mmu *mmu);
 | 
				
			||||||
 | 
						void (*resume_translation)(struct msm_mmu *mmu);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum msm_mmu_type {
 | 
					enum msm_mmu_type {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue