forked from mirrors/linux
		
	drm/amdgpu: Add a new flag to AMDGPU_CTX_OP_QUERY_STATE2
Add AMDGPU_CTX_QUERY2_FLAGS_RAS_CE/UE which indicate if any error happened between previous query and this query. Signed-off-by: xinhui pan <xinhui.pan@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
		
							parent
							
								
									791c47694f
								
							
						
					
					
						commit
						ae363a212b
					
				
					 3 changed files with 22 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -26,6 +26,7 @@
 | 
			
		|||
#include <drm/drm_auth.h>
 | 
			
		||||
#include "amdgpu.h"
 | 
			
		||||
#include "amdgpu_sched.h"
 | 
			
		||||
#include "amdgpu_ras.h"
 | 
			
		||||
 | 
			
		||||
#define to_amdgpu_ctx_entity(e)	\
 | 
			
		||||
	container_of((e), struct amdgpu_ctx_entity, entity)
 | 
			
		||||
| 
						 | 
				
			
			@ -344,6 +345,7 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
 | 
			
		|||
{
 | 
			
		||||
	struct amdgpu_ctx *ctx;
 | 
			
		||||
	struct amdgpu_ctx_mgr *mgr;
 | 
			
		||||
	uint32_t ras_counter;
 | 
			
		||||
 | 
			
		||||
	if (!fpriv)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
| 
						 | 
				
			
			@ -368,6 +370,21 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
 | 
			
		|||
	if (atomic_read(&ctx->guilty))
 | 
			
		||||
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
 | 
			
		||||
 | 
			
		||||
	/*query ue count*/
 | 
			
		||||
	ras_counter = amdgpu_ras_query_error_count(adev, false);
 | 
			
		||||
	/*ras counter is monotonic increasing*/
 | 
			
		||||
	if (ras_counter != ctx->ras_counter_ue) {
 | 
			
		||||
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
 | 
			
		||||
		ctx->ras_counter_ue = ras_counter;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*query ce count*/
 | 
			
		||||
	ras_counter = amdgpu_ras_query_error_count(adev, true);
 | 
			
		||||
	if (ras_counter != ctx->ras_counter_ce) {
 | 
			
		||||
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
 | 
			
		||||
		ctx->ras_counter_ce = ras_counter;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	mutex_unlock(&mgr->lock);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -49,6 +49,8 @@ struct amdgpu_ctx {
 | 
			
		|||
	enum drm_sched_priority		override_priority;
 | 
			
		||||
	struct mutex			lock;
 | 
			
		||||
	atomic_t			guilty;
 | 
			
		||||
	uint32_t			ras_counter_ce;
 | 
			
		||||
	uint32_t			ras_counter_ue;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct amdgpu_ctx_mgr {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -210,6 +210,9 @@ union drm_amdgpu_bo_list {
 | 
			
		|||
#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1)
 | 
			
		||||
/* indicate some job from this context once cause gpu hang */
 | 
			
		||||
#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY   (1<<2)
 | 
			
		||||
/* indicate some errors are detected by RAS */
 | 
			
		||||
#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE   (1<<3)
 | 
			
		||||
#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE   (1<<4)
 | 
			
		||||
 | 
			
		||||
/* Context priority level */
 | 
			
		||||
#define AMDGPU_CTX_PRIORITY_UNSET       -2048
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue