mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	drm/amdgpu: Generate bad page threshold cper records
Generate CPER record when bad page threshold exceed and commit to CPER ring. v2: return -ENOMEM instead of false v2: check return value of fill section function Signed-off-by: Xiang Liu <xiang.liu@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
		
							parent
							
								
									4058e7cbfd
								
							
						
					
					
						commit
						f9d35b945c
					
				
					 3 changed files with 28 additions and 1 deletions
				
			
		| 
						 | 
					@ -207,7 +207,7 @@ int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev
 | 
				
			||||||
		   NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
 | 
							   NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	amdgpu_cper_entry_fill_section_desc(adev, section_desc, true, false,
 | 
						amdgpu_cper_entry_fill_section_desc(adev, section_desc, true, false,
 | 
				
			||||||
					    CPER_SEV_FATAL, RUNTIME, NONSTD_SEC_LEN,
 | 
										    CPER_SEV_NUM, RUNTIME, NONSTD_SEC_LEN,
 | 
				
			||||||
					    NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
 | 
										    NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	section->hdr.valid_bits.err_info_cnt = 1;
 | 
						section->hdr.valid_bits.err_info_cnt = 1;
 | 
				
			||||||
| 
						 | 
					@ -308,6 +308,28 @@ int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct cper_hdr *bp_threshold = NULL;
 | 
				
			||||||
 | 
						struct amdgpu_ring *ring = &adev->cper.ring_buf;
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						bp_threshold = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_BP_THRESHOLD, 1);
 | 
				
			||||||
 | 
						if (!bp_threshold) {
 | 
				
			||||||
 | 
							dev_err(adev->dev, "fail to alloc cper entry for bad page threshold record\n");
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						amdgpu_cper_entry_fill_hdr(adev, bp_threshold, AMDGPU_CPER_TYPE_BP_THRESHOLD, CPER_SEV_NUM);
 | 
				
			||||||
 | 
						ret = amdgpu_cper_entry_fill_bad_page_threshold_section(adev, bp_threshold, 0);
 | 
				
			||||||
 | 
						if (ret)
 | 
				
			||||||
 | 
							return ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						amdgpu_cper_ring_write(ring, bp_threshold, bp_threshold->record_length);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev,
 | 
					static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev,
 | 
				
			||||||
								enum aca_error_type aca_err_type)
 | 
													enum aca_error_type aca_err_type)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -95,6 +95,8 @@ int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
 | 
				
			||||||
int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
 | 
					int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
 | 
				
			||||||
				    struct aca_banks *banks,
 | 
									    struct aca_banks *banks,
 | 
				
			||||||
				    uint16_t bank_count);
 | 
									    uint16_t bank_count);
 | 
				
			||||||
 | 
					/* Bad page threshold is encoded into separated cper entry */
 | 
				
			||||||
 | 
					int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev);
 | 
				
			||||||
void amdgpu_cper_ring_write(struct amdgpu_ring *ring,
 | 
					void amdgpu_cper_ring_write(struct amdgpu_ring *ring,
 | 
				
			||||||
			void *src, int count);
 | 
								void *src, int count);
 | 
				
			||||||
int amdgpu_cper_init(struct amdgpu_device *adev);
 | 
					int amdgpu_cper_init(struct amdgpu_device *adev);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -716,6 +716,9 @@ int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev)
 | 
				
			||||||
	ret = smu_send_rma_reason(smu);
 | 
						ret = smu_send_rma_reason(smu);
 | 
				
			||||||
	mutex_unlock(&adev->pm.mutex);
 | 
						mutex_unlock(&adev->pm.mutex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (amdgpu_cper_generate_bp_threshold_record(adev))
 | 
				
			||||||
 | 
							dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue