mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	drm/amdgpu: rework TLB flushing
Instead of tracking the VM updates through the dependencies just use a sequence counter for page table updates which indicates the need to flush the TLB. This reduces the need to flush the TLB drastically. v2: squash in NULL check fix (Christian) Signed-off-by: Christian König <christian.koenig@amd.com> Acked-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
		
							parent
							
								
									e997b82745
								
							
						
					
					
						commit
						5255e146c9
					
				
					 6 changed files with 76 additions and 32 deletions
				
			
		| 
						 | 
				
			
			@ -810,7 +810,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 | 
			
		|||
	if (r)
 | 
			
		||||
		return r;
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
 | 
			
		||||
	r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
 | 
			
		||||
	if (r)
 | 
			
		||||
		return r;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -821,7 +821,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 | 
			
		|||
		if (r)
 | 
			
		||||
			return r;
 | 
			
		||||
 | 
			
		||||
		r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
 | 
			
		||||
		r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
 | 
			
		||||
		if (r)
 | 
			
		||||
			return r;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -840,7 +840,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 | 
			
		|||
		if (r)
 | 
			
		||||
			return r;
 | 
			
		||||
 | 
			
		||||
		r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
 | 
			
		||||
		r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
 | 
			
		||||
		if (r)
 | 
			
		||||
			return r;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -853,7 +853,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 | 
			
		|||
	if (r)
 | 
			
		||||
		return r;
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
 | 
			
		||||
	r = amdgpu_sync_fence(&p->job->sync, vm->last_update);
 | 
			
		||||
	if (r)
 | 
			
		||||
		return r;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -277,7 +277,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 | 
			
		|||
	unsigned vmhub = ring->funcs->vmhub;
 | 
			
		||||
	uint64_t fence_context = adev->fence_context + ring->idx;
 | 
			
		||||
	bool needs_flush = vm->use_cpu_for_update;
 | 
			
		||||
	uint64_t updates = sync->last_vm_update;
 | 
			
		||||
	uint64_t updates = amdgpu_vm_tlb_seq(vm);
 | 
			
		||||
	int r;
 | 
			
		||||
 | 
			
		||||
	*id = vm->reserved_vmid[vmhub];
 | 
			
		||||
| 
						 | 
				
			
			@ -338,7 +338,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
 | 
			
		|||
	unsigned vmhub = ring->funcs->vmhub;
 | 
			
		||||
	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 | 
			
		||||
	uint64_t fence_context = adev->fence_context + ring->idx;
 | 
			
		||||
	uint64_t updates = sync->last_vm_update;
 | 
			
		||||
	uint64_t updates = amdgpu_vm_tlb_seq(vm);
 | 
			
		||||
	int r;
 | 
			
		||||
 | 
			
		||||
	job->vm_needs_flush = vm->use_cpu_for_update;
 | 
			
		||||
| 
						 | 
				
			
			@ -426,7 +426,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 | 
			
		|||
			if (r)
 | 
			
		||||
				goto error;
 | 
			
		||||
 | 
			
		||||
			id->flushed_updates = sync->last_vm_update;
 | 
			
		||||
			id->flushed_updates = amdgpu_vm_tlb_seq(vm);
 | 
			
		||||
			job->vm_needs_flush = true;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -51,7 +51,6 @@ static struct kmem_cache *amdgpu_sync_slab;
 | 
			
		|||
void amdgpu_sync_create(struct amdgpu_sync *sync)
 | 
			
		||||
{
 | 
			
		||||
	hash_init(sync->fences);
 | 
			
		||||
	sync->last_vm_update = 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			@ -171,23 +170,6 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
 | 
			
		|||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_sync_vm_fence - remember to sync to this VM fence
 | 
			
		||||
 *
 | 
			
		||||
 * @sync: sync object to add fence to
 | 
			
		||||
 * @fence: the VM fence to add
 | 
			
		||||
 *
 | 
			
		||||
 * Add the fence to the sync object and remember it as VM update.
 | 
			
		||||
 */
 | 
			
		||||
int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
 | 
			
		||||
{
 | 
			
		||||
	if (!fence)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	sync->last_vm_update = max(sync->last_vm_update, fence->seqno);
 | 
			
		||||
	return amdgpu_sync_fence(sync, fence);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Determine based on the owner and mode if we should sync to a fence or not */
 | 
			
		||||
static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
 | 
			
		||||
				   enum amdgpu_sync_mode mode,
 | 
			
		||||
| 
						 | 
				
			
			@ -376,8 +358,6 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
 | 
			
		|||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	clone->last_vm_update = source->last_vm_update;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -43,12 +43,10 @@ enum amdgpu_sync_mode {
 | 
			
		|||
 */
 | 
			
		||||
struct amdgpu_sync {
 | 
			
		||||
	DECLARE_HASHTABLE(fences, 4);
 | 
			
		||||
	uint64_t	last_vm_update;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void amdgpu_sync_create(struct amdgpu_sync *sync);
 | 
			
		||||
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
 | 
			
		||||
int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
 | 
			
		||||
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 | 
			
		||||
		     struct dma_resv *resv, enum amdgpu_sync_mode mode,
 | 
			
		||||
		     void *owner);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -88,6 +88,21 @@ struct amdgpu_prt_cb {
 | 
			
		|||
	struct dma_fence_cb cb;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_vm_tlb_seq_cb - Helper to increment the TLB flush sequence
 | 
			
		||||
 */
 | 
			
		||||
struct amdgpu_vm_tlb_seq_cb {
 | 
			
		||||
	/**
 | 
			
		||||
	 * @vm: pointer to the amdgpu_vm structure to set the fence sequence on
 | 
			
		||||
	 */
 | 
			
		||||
	struct amdgpu_vm *vm;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * @cb: callback
 | 
			
		||||
	 */
 | 
			
		||||
	struct dma_fence_cb cb;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -760,6 +775,23 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
 | 
			
		|||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
 | 
			
		||||
 * @fence: unused
 | 
			
		||||
 * @cb: the callback structure
 | 
			
		||||
 *
 | 
			
		||||
 * Increments the tlb sequence to make sure that future CS execute a VM flush.
 | 
			
		||||
 */
 | 
			
		||||
static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
 | 
			
		||||
				 struct dma_fence_cb *cb)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_vm_tlb_seq_cb *tlb_cb;
 | 
			
		||||
 | 
			
		||||
	tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
 | 
			
		||||
	atomic64_inc(&tlb_cb->vm->tlb_seq);
 | 
			
		||||
	kfree(tlb_cb);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -795,6 +827,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 | 
			
		|||
				bool *table_freed)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_vm_update_params params;
 | 
			
		||||
	struct amdgpu_vm_tlb_seq_cb *tlb_cb;
 | 
			
		||||
	struct amdgpu_res_cursor cursor;
 | 
			
		||||
	enum amdgpu_sync_mode sync_mode;
 | 
			
		||||
	int r, idx;
 | 
			
		||||
| 
						 | 
				
			
			@ -802,6 +835,12 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 | 
			
		|||
	if (!drm_dev_enter(adev_to_drm(adev), &idx))
 | 
			
		||||
		return -ENODEV;
 | 
			
		||||
 | 
			
		||||
	tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
 | 
			
		||||
	if (!tlb_cb) {
 | 
			
		||||
		r = -ENOMEM;
 | 
			
		||||
		goto error_unlock;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	memset(¶ms, 0, sizeof(params));
 | 
			
		||||
	params.adev = adev;
 | 
			
		||||
	params.vm = vm;
 | 
			
		||||
| 
						 | 
				
			
			@ -820,7 +859,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 | 
			
		|||
	amdgpu_vm_eviction_lock(vm);
 | 
			
		||||
	if (vm->evicting) {
 | 
			
		||||
		r = -EBUSY;
 | 
			
		||||
		goto error_unlock;
 | 
			
		||||
		goto error_free;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -833,7 +872,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 | 
			
		|||
 | 
			
		||||
	r = vm->update_funcs->prepare(¶ms, resv, sync_mode);
 | 
			
		||||
	if (r)
 | 
			
		||||
		goto error_unlock;
 | 
			
		||||
		goto error_free;
 | 
			
		||||
 | 
			
		||||
	amdgpu_res_first(pages_addr ? NULL : res, offset,
 | 
			
		||||
			 (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor);
 | 
			
		||||
| 
						 | 
				
			
			@ -882,7 +921,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 | 
			
		|||
		tmp = start + num_entries;
 | 
			
		||||
		r = amdgpu_vm_ptes_update(¶ms, start, tmp, addr, flags);
 | 
			
		||||
		if (r)
 | 
			
		||||
			goto error_unlock;
 | 
			
		||||
			goto error_free;
 | 
			
		||||
 | 
			
		||||
		amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
 | 
			
		||||
		start = tmp;
 | 
			
		||||
| 
						 | 
				
			
			@ -890,9 +929,21 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 | 
			
		|||
 | 
			
		||||
	r = vm->update_funcs->commit(¶ms, fence);
 | 
			
		||||
 | 
			
		||||
	if (!unlocked && (!(flags & AMDGPU_PTE_VALID) || params.table_freed)) {
 | 
			
		||||
		tlb_cb->vm = vm;
 | 
			
		||||
		if (!fence || !*fence ||
 | 
			
		||||
		    dma_fence_add_callback(*fence, &tlb_cb->cb,
 | 
			
		||||
					   amdgpu_vm_tlb_seq_cb))
 | 
			
		||||
			amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
 | 
			
		||||
		tlb_cb = NULL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (table_freed)
 | 
			
		||||
		*table_freed = *table_freed || params.table_freed;
 | 
			
		||||
 | 
			
		||||
error_free:
 | 
			
		||||
	kfree(tlb_cb);
 | 
			
		||||
 | 
			
		||||
error_unlock:
 | 
			
		||||
	amdgpu_vm_eviction_unlock(vm);
 | 
			
		||||
	drm_dev_exit(idx);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -284,6 +284,9 @@ struct amdgpu_vm {
 | 
			
		|||
	struct drm_sched_entity	immediate;
 | 
			
		||||
	struct drm_sched_entity	delayed;
 | 
			
		||||
 | 
			
		||||
	/* Last finished delayed update */
 | 
			
		||||
	atomic64_t		tlb_seq;
 | 
			
		||||
 | 
			
		||||
	/* Last unlocked submission to the scheduler entities */
 | 
			
		||||
	struct dma_fence	*last_unlocked;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -478,4 +481,16 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
 | 
			
		|||
void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_vm_tlb_seq - return tlb flush sequence number
 | 
			
		||||
 * @vm: the amdgpu_vm structure to query
 | 
			
		||||
 *
 | 
			
		||||
 * Returns the tlb flush sequence number which indicates that the VM TLBs needs
 | 
			
		||||
 * to be invalidated whenever the sequence number change.
 | 
			
		||||
 */
 | 
			
		||||
static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm)
 | 
			
		||||
{
 | 
			
		||||
	return atomic64_read(&vm->tlb_seq);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue