drm/amdgpu: track ring state associated with a fence

We need to know the wptr and sequence number associated
with a fence so that we can re-emit the unprocessed state
after a ring reset.  Pre-allocate storage space for
the ring buffer contents and add helpers to save off
and re-emit the unprocessed state so that it can be
re-emitted after the queue is reset.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Alex Deucher 2025-05-27 21:35:00 -04:00
parent bc29c03b28
commit 77cc0da39c
6 changed files with 195 additions and 3 deletions

View file

@ -120,6 +120,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL); am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL);
if (!am_fence) if (!am_fence)
return -ENOMEM; return -ENOMEM;
am_fence->context = 0;
} else { } else {
am_fence = af; am_fence = af;
} }
@ -127,6 +128,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
am_fence->ring = ring; am_fence->ring = ring;
seq = ++ring->fence_drv.sync_seq; seq = ++ring->fence_drv.sync_seq;
am_fence->seq = seq;
if (af) { if (af) {
dma_fence_init(fence, &amdgpu_job_fence_ops, dma_fence_init(fence, &amdgpu_job_fence_ops,
&ring->fence_drv.lock, &ring->fence_drv.lock,
@ -141,6 +143,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT); seq, flags | AMDGPU_FENCE_FLAG_INT);
amdgpu_fence_save_wptr(fence);
pm_runtime_get_noresume(adev_to_drm(adev)->dev); pm_runtime_get_noresume(adev_to_drm(adev)->dev);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
if (unlikely(rcu_dereference_protected(*ptr, 1))) { if (unlikely(rcu_dereference_protected(*ptr, 1))) {
@ -253,6 +256,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
do { do {
struct dma_fence *fence, **ptr; struct dma_fence *fence, **ptr;
struct amdgpu_fence *am_fence;
++last_seq; ++last_seq;
last_seq &= drv->num_fences_mask; last_seq &= drv->num_fences_mask;
@ -265,6 +269,12 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
if (!fence) if (!fence)
continue; continue;
/* Save the wptr in the fence driver so we know what the last processed
* wptr was. This is required for re-emitting the ring state for
* queues that are reset but are not guilty and thus have no guilty fence.
*/
am_fence = container_of(fence, struct amdgpu_fence, base);
drv->signalled_wptr = am_fence->wptr;
dma_fence_signal(fence); dma_fence_signal(fence);
dma_fence_put(fence); dma_fence_put(fence);
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
@ -727,6 +737,86 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
amdgpu_fence_process(ring); amdgpu_fence_process(ring);
} }
/**
* Kernel queue reset handling
*
* The driver can reset individual queues for most engines, but those queues
* may contain work from multiple contexts. Resetting the queue will reset
* lose all of that state. In order to minimize the collateral damage, the
* driver will save the ring contents which are not associated with the guilty
* context prior to resetting the queue. After resetting the queue the queue
* contents from the other contexts is re-emitted to the rings so that it can
* be processed by the engine. To handle this, we save the queue's write
* pointer (wptr) in the fences associated with each context. If we get a
* queue timeout, we can then use the wptrs from the fences to determine
* which data needs to be saved out of the queue's ring buffer.
*/
/**
* amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence
*
* @fence: fence of the ring to signal
*
*/
void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence)
{
dma_fence_set_error(&fence->base, -ETIME);
amdgpu_fence_write(fence->ring, fence->seq);
amdgpu_fence_process(fence->ring);
}
void amdgpu_fence_save_wptr(struct dma_fence *fence)
{
struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base);
am_fence->wptr = am_fence->ring->wptr;
}
static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
u64 start_wptr, u32 end_wptr)
{
unsigned int first_idx = start_wptr & ring->buf_mask;
unsigned int last_idx = end_wptr & ring->buf_mask;
unsigned int i;
/* Backup the contents of the ring buffer. */
for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i];
}
void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
struct amdgpu_fence *guilty_fence)
{
struct dma_fence *unprocessed;
struct dma_fence __rcu **ptr;
struct amdgpu_fence *fence;
u64 wptr, i, seqno;
seqno = amdgpu_fence_read(ring);
wptr = ring->fence_drv.signalled_wptr;
ring->ring_backup_entries_to_copy = 0;
for (i = seqno + 1; i <= ring->fence_drv.sync_seq; ++i) {
ptr = &ring->fence_drv.fences[i & ring->fence_drv.num_fences_mask];
rcu_read_lock();
unprocessed = rcu_dereference(*ptr);
if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
fence = container_of(unprocessed, struct amdgpu_fence, base);
/* save everything if the ring is not guilty, otherwise
* just save the content from other contexts.
*/
if (!guilty_fence || (fence->context != guilty_fence->context))
amdgpu_ring_backup_unprocessed_command(ring, wptr,
fence->wptr);
wptr = fence->wptr;
}
rcu_read_unlock();
}
}
/* /*
* Common fence implementation * Common fence implementation
*/ */

View file

@ -139,7 +139,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
int vmid = AMDGPU_JOB_GET_VMID(job); int vmid = AMDGPU_JOB_GET_VMID(job);
bool need_pipe_sync = false; bool need_pipe_sync = false;
unsigned int cond_exec; unsigned int cond_exec;
unsigned int i; unsigned int i;
int r = 0; int r = 0;
@ -156,6 +155,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
gds_va = job->gds_va; gds_va = job->gds_va;
init_shadow = job->init_shadow; init_shadow = job->init_shadow;
af = &job->hw_fence; af = &job->hw_fence;
/* Save the context of the job for reset handling.
* The driver needs this so it can skip the ring
* contents for guilty contexts.
*/
af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0;
} else { } else {
vm = NULL; vm = NULL;
fence_ctx = 0; fence_ctx = 0;
@ -307,8 +311,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false); ring->funcs->emit_wave_limit(ring, false);
/* Save the wptr associated with this fence.
* This must be last for resets to work properly
* as we need to save the wptr associated with this
* fence so we know what rings contents to backup
* after we reset the queue.
*/
amdgpu_fence_save_wptr(*f);
amdgpu_ring_ib_end(ring); amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring); amdgpu_ring_commit(ring);
return 0; return 0;
} }

View file

@ -90,8 +90,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job); struct amdgpu_job *job = to_amdgpu_job(s_job);
struct drm_wedge_task_info *info = NULL; struct drm_wedge_task_info *info = NULL;
struct amdgpu_task_info *ti;
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
struct amdgpu_task_info *ti;
int idx, r; int idx, r;
if (!drm_dev_enter(adev_to_drm(adev), &idx)) { if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
@ -134,7 +134,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
} else if (amdgpu_gpu_recovery && ring->funcs->reset) { } else if (amdgpu_gpu_recovery && ring->funcs->reset) {
dev_err(adev->dev, "Starting %s ring reset\n", dev_err(adev->dev, "Starting %s ring reset\n",
s_job->sched->name); s_job->sched->name);
r = amdgpu_ring_reset(ring, job->vmid, NULL); r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence);
if (!r) { if (!r) {
atomic_inc(&ring->adev->gpu_reset_counter); atomic_inc(&ring->adev->gpu_reset_counter);
dev_err(adev->dev, "Ring %s reset succeeded\n", dev_err(adev->dev, "Ring %s reset succeeded\n",

View file

@ -99,6 +99,29 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
return 0; return 0;
} }
/**
* amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit
*
* @ring: amdgpu_ring structure holding ring information
* @ndw: number of dwords to allocate in the ring buffer
*
* Allocate @ndw dwords in the ring buffer (all asics).
* doesn't check the max_dw limit as we may be reemitting
* several submissions.
*/
static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw)
{
/* Align requested size with padding so unlock_commit can
* pad safely */
ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
ring->count_dw = ndw;
ring->wptr_old = ring->wptr;
if (ring->funcs->begin_use)
ring->funcs->begin_use(ring);
}
/** amdgpu_ring_insert_nop - insert NOP packets /** amdgpu_ring_insert_nop - insert NOP packets
* *
* @ring: amdgpu_ring structure holding ring information * @ring: amdgpu_ring structure holding ring information
@ -333,6 +356,12 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
/* Initialize cached_rptr to 0 */ /* Initialize cached_rptr to 0 */
ring->cached_rptr = 0; ring->cached_rptr = 0;
if (!ring->ring_backup) {
ring->ring_backup = kvzalloc(ring->ring_size, GFP_KERNEL);
if (!ring->ring_backup)
return -ENOMEM;
}
/* Allocate ring buffer */ /* Allocate ring buffer */
if (ring->ring_obj == NULL) { if (ring->ring_obj == NULL) {
r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE, r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
@ -342,6 +371,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
(void **)&ring->ring); (void **)&ring->ring);
if (r) { if (r) {
dev_err(adev->dev, "(%d) ring create failed\n", r); dev_err(adev->dev, "(%d) ring create failed\n", r);
kvfree(ring->ring_backup);
return r; return r;
} }
amdgpu_ring_clear_ring(ring); amdgpu_ring_clear_ring(ring);
@ -385,6 +415,8 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
amdgpu_bo_free_kernel(&ring->ring_obj, amdgpu_bo_free_kernel(&ring->ring_obj,
&ring->gpu_addr, &ring->gpu_addr,
(void **)&ring->ring); (void **)&ring->ring);
kvfree(ring->ring_backup);
ring->ring_backup = NULL;
dma_fence_put(ring->vmid_wait); dma_fence_put(ring->vmid_wait);
ring->vmid_wait = NULL; ring->vmid_wait = NULL;
@ -753,3 +785,38 @@ bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
return true; return true;
} }
void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
struct amdgpu_fence *guilty_fence)
{
/* Stop the scheduler to prevent anybody else from touching the ring buffer. */
drm_sched_wqueue_stop(&ring->sched);
/* back up the non-guilty commands */
amdgpu_ring_backup_unprocessed_commands(ring, guilty_fence);
}
int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
struct amdgpu_fence *guilty_fence)
{
unsigned int i;
int r;
/* verify that the ring is functional */
r = amdgpu_ring_test_ring(ring);
if (r)
return r;
/* signal the fence of the bad job */
if (guilty_fence)
amdgpu_fence_driver_guilty_force_completion(guilty_fence);
/* Re-emit the non-guilty commands */
if (ring->ring_backup_entries_to_copy) {
amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy);
for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
amdgpu_ring_write(ring, ring->ring_backup[i]);
amdgpu_ring_commit(ring);
}
/* Start the scheduler again */
drm_sched_wqueue_start(&ring->sched);
return 0;
}

View file

@ -118,6 +118,7 @@ struct amdgpu_fence_driver {
/* sync_seq is protected by ring emission lock */ /* sync_seq is protected by ring emission lock */
uint32_t sync_seq; uint32_t sync_seq;
atomic_t last_seq; atomic_t last_seq;
u64 signalled_wptr;
bool initialized; bool initialized;
struct amdgpu_irq_src *irq_src; struct amdgpu_irq_src *irq_src;
unsigned irq_type; unsigned irq_type;
@ -141,6 +142,12 @@ struct amdgpu_fence {
/* RB, DMA, etc. */ /* RB, DMA, etc. */
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
ktime_t start_timestamp; ktime_t start_timestamp;
/* wptr for the fence for resets */
u64 wptr;
/* fence context for resets */
u64 context;
uint32_t seq;
}; };
extern const struct drm_sched_backend_ops amdgpu_sched_ops; extern const struct drm_sched_backend_ops amdgpu_sched_ops;
@ -148,6 +155,8 @@ extern const struct drm_sched_backend_ops amdgpu_sched_ops;
void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence);
void amdgpu_fence_save_wptr(struct dma_fence *fence);
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
@ -284,6 +293,9 @@ struct amdgpu_ring {
struct amdgpu_bo *ring_obj; struct amdgpu_bo *ring_obj;
uint32_t *ring; uint32_t *ring;
/* backups for resets */
uint32_t *ring_backup;
unsigned int ring_backup_entries_to_copy;
unsigned rptr_offs; unsigned rptr_offs;
u64 rptr_gpu_addr; u64 rptr_gpu_addr;
volatile u32 *rptr_cpu_addr; volatile u32 *rptr_cpu_addr;
@ -550,4 +562,10 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev);
void amdgpu_ib_pool_fini(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
int amdgpu_ib_ring_tests(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring); bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring);
void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
struct amdgpu_fence *guilty_fence);
void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
struct amdgpu_fence *guilty_fence);
int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
struct amdgpu_fence *guilty_fence);
#endif #endif

View file

@ -765,6 +765,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
bool cleaner_shader_needed = false; bool cleaner_shader_needed = false;
bool pasid_mapping_needed = false; bool pasid_mapping_needed = false;
struct dma_fence *fence = NULL; struct dma_fence *fence = NULL;
struct amdgpu_fence *af;
unsigned int patch; unsigned int patch;
int r; int r;
@ -830,6 +831,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
r = amdgpu_fence_emit(ring, &fence, NULL, 0); r = amdgpu_fence_emit(ring, &fence, NULL, 0);
if (r) if (r)
return r; return r;
/* this is part of the job's context */
af = container_of(fence, struct amdgpu_fence, base);
af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0;
} }
if (vm_flush_needed) { if (vm_flush_needed) {