mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	amd/amdkfd: Trigger segfault for early userptr unmmapping
If applications unmap the memory before destroying the userptr, it needs trigger a segfault to notify user space to correct the free sequence in VM debug mode. v2: Send gpu access fault to user space v3: Report gpu address to user space, remove unnecessary params v4: update pr_err into one line, remove userptr log info Signed-off-by: Shane Xiao <shane.xiao@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
		
							parent
							
								
									8e320f67d4
								
							
						
					
					
						commit
						2d274bf709
					
				
					 3 changed files with 33 additions and 0 deletions
				
			
		| 
						 | 
					@ -2559,6 +2559,18 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 | 
				
			||||||
			if (ret != -EFAULT)
 | 
								if (ret != -EFAULT)
 | 
				
			||||||
				return ret;
 | 
									return ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								/* If applications unmap memory before destroying the userptr
 | 
				
			||||||
 | 
								 * from the KFD, trigger a segmentation fault in VM debug mode.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
 | 
				
			||||||
 | 
									pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
 | 
				
			||||||
 | 
													pid_nr(process_info->pid), mem->va);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									// Send GPU VM fault to user space
 | 
				
			||||||
 | 
									kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid),
 | 
				
			||||||
 | 
													mem->va);
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			ret = 0;
 | 
								ret = 0;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1177,6 +1177,25 @@ void kfd_signal_hw_exception_event(u32 pasid)
 | 
				
			||||||
	kfd_unref_process(p);
 | 
						kfd_unref_process(p);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct kfd_process_device *pdd;
 | 
				
			||||||
 | 
						struct kfd_hsa_memory_exception_data exception_data;
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						memset(&exception_data, 0, sizeof(exception_data));
 | 
				
			||||||
 | 
						exception_data.va = gpu_va;
 | 
				
			||||||
 | 
						exception_data.failure.NotPresent = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Send VM seg fault to all kfd process device
 | 
				
			||||||
 | 
						for (i = 0; i < p->n_pdds; i++) {
 | 
				
			||||||
 | 
							pdd = p->pdds[i];
 | 
				
			||||||
 | 
							exception_data.gpu_id = pdd->user_gpu_id;
 | 
				
			||||||
 | 
							kfd_evict_process_device(pdd);
 | 
				
			||||||
 | 
							kfd_signal_vm_fault_event(pdd, NULL, &exception_data);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
 | 
					void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
 | 
				
			||||||
				struct kfd_vm_fault_info *info,
 | 
									struct kfd_vm_fault_info *info,
 | 
				
			||||||
				struct kfd_hsa_memory_exception_data *data)
 | 
									struct kfd_hsa_memory_exception_data *data)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1507,6 +1507,8 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
 | 
				
			||||||
int kfd_get_num_events(struct kfd_process *p);
 | 
					int kfd_get_num_events(struct kfd_process *p);
 | 
				
			||||||
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
 | 
					int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
 | 
					void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
 | 
				
			||||||
				struct kfd_vm_fault_info *info,
 | 
									struct kfd_vm_fault_info *info,
 | 
				
			||||||
				struct kfd_hsa_memory_exception_data *data);
 | 
									struct kfd_hsa_memory_exception_data *data);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue