mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	drm/amdgpu: add ras event id support for ACA
add ras event id support for ACA. Signed-off-by: Yang Wang <kevinyang.wang@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
		
							parent
							
								
									bd15bf742f
								
							
						
					
					
						commit
						31fd330b97
					
				
					 3 changed files with 23 additions and 19 deletions
				
			
		| 
						 | 
				
			
			@ -116,20 +116,22 @@ static struct aca_regs_dump {
 | 
			
		|||
	{"CONTROL_MASK",	ACA_REG_IDX_CTL_MASK},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank)
 | 
			
		||||
static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank,
 | 
			
		||||
			      struct ras_query_context *qctx)
 | 
			
		||||
{
 | 
			
		||||
	u64 event_id = qctx ? qctx->event_id : 0ULL;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n");
 | 
			
		||||
	RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
 | 
			
		||||
	/* plus 1 for output format, e.g: ACA[08/08]: xxxx */
 | 
			
		||||
	for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
 | 
			
		||||
		dev_info(adev->dev, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n",
 | 
			
		||||
		RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n",
 | 
			
		||||
			      idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
 | 
			
		||||
				       int start, int count,
 | 
			
		||||
				       struct aca_banks *banks)
 | 
			
		||||
				       struct aca_banks *banks, struct ras_query_context *qctx)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_aca *aca = &adev->aca;
 | 
			
		||||
	const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
 | 
			
		||||
| 
						 | 
				
			
			@ -165,7 +167,7 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_
 | 
			
		|||
 | 
			
		||||
		bank.type = type;
 | 
			
		||||
 | 
			
		||||
		aca_smu_bank_dump(adev, i, count, &bank);
 | 
			
		||||
		aca_smu_bank_dump(adev, i, count, &bank, qctx);
 | 
			
		||||
 | 
			
		||||
		ret = aca_banks_add_bank(banks, &bank);
 | 
			
		||||
		if (ret)
 | 
			
		||||
| 
						 | 
				
			
			@ -390,7 +392,7 @@ static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
 | 
			
		||||
			    bank_handler_t handler, void *data)
 | 
			
		||||
			    bank_handler_t handler, struct ras_query_context *qctx, void *data)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_aca *aca = &adev->aca;
 | 
			
		||||
	struct aca_banks banks;
 | 
			
		||||
| 
						 | 
				
			
			@ -412,7 +414,7 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
 | 
			
		|||
 | 
			
		||||
	aca_banks_init(&banks);
 | 
			
		||||
 | 
			
		||||
	ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks);
 | 
			
		||||
	ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks, qctx);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		goto err_release_banks;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -489,7 +491,7 @@ static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type,
 | 
			
		||||
				struct ras_err_data *err_data)
 | 
			
		||||
				struct ras_err_data *err_data, struct ras_query_context *qctx)
 | 
			
		||||
{
 | 
			
		||||
	enum aca_smu_type smu_type;
 | 
			
		||||
	int ret;
 | 
			
		||||
| 
						 | 
				
			
			@ -507,7 +509,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	/* udpate aca bank to aca source error_cache first */
 | 
			
		||||
	ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, NULL);
 | 
			
		||||
	ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		return ret;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -523,7 +525,7 @@ static bool aca_handle_is_valid(struct aca_handle *handle)
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
 | 
			
		||||
			      enum aca_error_type type, void *data)
 | 
			
		||||
			      enum aca_error_type type, void *data, void *qctx)
 | 
			
		||||
{
 | 
			
		||||
	struct ras_err_data *err_data = (struct ras_err_data *)data;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -536,7 +538,8 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han
 | 
			
		|||
	if (!(BIT(type) & handle->mask))
 | 
			
		||||
		return  0;
 | 
			
		||||
 | 
			
		||||
	return __aca_get_error_data(adev, handle, type, err_data);
 | 
			
		||||
	return __aca_get_error_data(adev, handle, type, err_data,
 | 
			
		||||
				    (struct ras_query_context *)qctx);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void aca_error_init(struct aca_error *aerr, enum aca_error_type type)
 | 
			
		||||
| 
						 | 
				
			
			@ -853,7 +856,7 @@ static int aca_dump_show(struct seq_file *m, enum aca_smu_type type)
 | 
			
		|||
		.idx = 0,
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	return aca_banks_update(adev, type, handler_aca_bank_dump, (void *)&context);
 | 
			
		||||
	return aca_banks_update(adev, type, handler_aca_bank_dump, NULL, (void *)&context);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int aca_dump_ce_show(struct seq_file *m, void *unused)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -198,7 +198,7 @@ int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
 | 
			
		|||
			  const char *name, const struct aca_info *aca_info, void *data);
 | 
			
		||||
void amdgpu_aca_remove_handle(struct aca_handle *handle);
 | 
			
		||||
int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
 | 
			
		||||
				     enum aca_error_type type, void *data);
 | 
			
		||||
			      enum aca_error_type type, void *data, void *qctx);
 | 
			
		||||
int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en);
 | 
			
		||||
void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
 | 
			
		||||
int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1269,7 +1269,8 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
 | 
			
		||||
					 enum aca_error_type type, struct ras_err_data *err_data)
 | 
			
		||||
					 enum aca_error_type type, struct ras_err_data *err_data,
 | 
			
		||||
					 struct ras_query_context *qctx)
 | 
			
		||||
{
 | 
			
		||||
	struct ras_manager *obj;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1277,7 +1278,7 @@ static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu
 | 
			
		|||
	if (!obj)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data);
 | 
			
		||||
	return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data, qctx);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
 | 
			
		||||
| 
						 | 
				
			
			@ -1334,15 +1335,15 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
 | 
			
		|||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		if (amdgpu_aca_is_enabled(adev)) {
 | 
			
		||||
			ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data);
 | 
			
		||||
			ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data, qctx);
 | 
			
		||||
			if (ret)
 | 
			
		||||
				return ret;
 | 
			
		||||
 | 
			
		||||
			ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data);
 | 
			
		||||
			ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data, qctx);
 | 
			
		||||
			if (ret)
 | 
			
		||||
				return ret;
 | 
			
		||||
 | 
			
		||||
			ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data);
 | 
			
		||||
			ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data, qctx);
 | 
			
		||||
			if (ret)
 | 
			
		||||
				return ret;
 | 
			
		||||
		} else {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue