drm/amdgpu: Generate bad page threshold cper records

Generate CPER record when bad page threshold exceed and
commit to CPER ring.

v2: return -ENOMEM instead of false
v2: check return value of fill section function

Signed-off-by: Xiang Liu <xiang.liu@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Xiang Liu 2025-02-11 19:45:52 +08:00 committed by Alex Deucher
parent 4058e7cbfd
commit f9d35b945c
3 changed files with 28 additions and 1 deletions

View file

@ -207,7 +207,7 @@ int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev
NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
amdgpu_cper_entry_fill_section_desc(adev, section_desc, true, false,
CPER_SEV_FATAL, RUNTIME, NONSTD_SEC_LEN,
CPER_SEV_NUM, RUNTIME, NONSTD_SEC_LEN,
NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
section->hdr.valid_bits.err_info_cnt = 1;
@ -308,6 +308,28 @@ int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
return 0;
}
int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev)
{
struct cper_hdr *bp_threshold = NULL;
struct amdgpu_ring *ring = &adev->cper.ring_buf;
int ret;
bp_threshold = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_BP_THRESHOLD, 1);
if (!bp_threshold) {
dev_err(adev->dev, "fail to alloc cper entry for bad page threshold record\n");
return -ENOMEM;
}
amdgpu_cper_entry_fill_hdr(adev, bp_threshold, AMDGPU_CPER_TYPE_BP_THRESHOLD, CPER_SEV_NUM);
ret = amdgpu_cper_entry_fill_bad_page_threshold_section(adev, bp_threshold, 0);
if (ret)
return ret;
amdgpu_cper_ring_write(ring, bp_threshold, bp_threshold->record_length);
return 0;
}
static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev,
enum aca_error_type aca_err_type)
{

View file

@ -95,6 +95,8 @@ int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
struct aca_banks *banks,
uint16_t bank_count);
/* Bad page threshold is encoded into separated cper entry */
int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev);
void amdgpu_cper_ring_write(struct amdgpu_ring *ring,
void *src, int count);
int amdgpu_cper_init(struct amdgpu_device *adev);

View file

@ -716,6 +716,9 @@ int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev)
ret = smu_send_rma_reason(smu);
mutex_unlock(&adev->pm.mutex);
if (amdgpu_cper_generate_bp_threshold_record(adev))
dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
return ret;
}