mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	Merge tag 'amd-drm-next-5.14-2021-05-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.14-2021-05-19: amdgpu: - Aldebaran updates - More LTTPR display work - Vangogh updates - SDMA 5.x GCR fixes - RAS fixes - PCIe ASPM support - Modifier fixes - Enable TMZ on Renoir - Buffer object code cleanup - Display overlay fixes - Initial support for multiple eDP panels - Initial SR-IOV support for Aldebaran - DP link training refactor - Misc code cleanups and bug fixes - SMU regression fixes for variable sized arrays - MAINTAINERS fixes for amdgpu amdkfd: - Initial SR-IOV support for Aldebaran - Topology fixes - Initial HMM SVM support - Misc code cleanups and bug fixes radeon: - Misc code cleanups and bug fixes - SMU regression fixes for variable sized arrays - Flickering fix for Oland with multiple 4K displays UAPI: - amdgpu: Drop AMDGPU_GEM_CREATE_SHADOW flag. This was always a kernel internal flag and userspace use of it has always been blocked. It's no longer needed so remove it. - amdkgd: HMM SVM support Overview: https://patchwork.freedesktop.org/series/85562/ Porposed userspace: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210520031258.231896-1-alexander.deucher@amd.com
This commit is contained in:
		
						commit
						c99c4d0ca5
					
				
					 159 changed files with 8889 additions and 1568 deletions
				
			
		| 
						 | 
				
			
			@ -878,7 +878,7 @@ M:	Harry Wentland <harry.wentland@amd.com>
 | 
			
		|||
M:	Leo Li <sunpeng.li@amd.com>
 | 
			
		||||
L:	amd-gfx@lists.freedesktop.org
 | 
			
		||||
S:	Supported
 | 
			
		||||
T:	git git://people.freedesktop.org/~agd5f/linux
 | 
			
		||||
T:	git https://gitlab.freedesktop.org/agd5f/linux.git
 | 
			
		||||
F:	drivers/gpu/drm/amd/display/
 | 
			
		||||
 | 
			
		||||
AMD FAM15H PROCESSOR POWER MONITORING DRIVER
 | 
			
		||||
| 
						 | 
				
			
			@ -954,7 +954,7 @@ AMD POWERPLAY
 | 
			
		|||
M:	Evan Quan <evan.quan@amd.com>
 | 
			
		||||
L:	amd-gfx@lists.freedesktop.org
 | 
			
		||||
S:	Supported
 | 
			
		||||
T:	git git://people.freedesktop.org/~agd5f/linux
 | 
			
		||||
T:	git https://gitlab.freedesktop.org/agd5f/linux.git
 | 
			
		||||
F:	drivers/gpu/drm/amd/pm/powerplay/
 | 
			
		||||
 | 
			
		||||
AMD SEATTLE DEVICE TREE SUPPORT
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -56,7 +56,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 | 
			
		|||
	amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
 | 
			
		||||
	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
 | 
			
		||||
	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
 | 
			
		||||
	amdgpu_fw_attestation.o amdgpu_securedisplay.o
 | 
			
		||||
	amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
 | 
			
		||||
 | 
			
		||||
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1075,7 +1075,8 @@ struct amdgpu_device {
 | 
			
		|||
 | 
			
		||||
	atomic_t			throttling_logging_enabled;
 | 
			
		||||
	struct ratelimit_state		throttling_logging_rs;
 | 
			
		||||
	uint32_t			ras_features;
 | 
			
		||||
	uint32_t                        ras_hw_enabled;
 | 
			
		||||
	uint32_t                        ras_enabled;
 | 
			
		||||
 | 
			
		||||
	bool                            in_pci_err_recovery;
 | 
			
		||||
	struct pci_saved_state          *pci_state;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -76,7 +76,7 @@ struct amdgpu_atif {
 | 
			
		|||
/**
 | 
			
		||||
 * amdgpu_atif_call - call an ATIF method
 | 
			
		||||
 *
 | 
			
		||||
 * @handle: acpi handle
 | 
			
		||||
 * @atif: acpi handle
 | 
			
		||||
 * @function: the ATIF function to execute
 | 
			
		||||
 * @params: ATIF function params
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -166,7 +166,6 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas
 | 
			
		|||
/**
 | 
			
		||||
 * amdgpu_atif_verify_interface - verify ATIF
 | 
			
		||||
 *
 | 
			
		||||
 * @handle: acpi handle
 | 
			
		||||
 * @atif: amdgpu atif struct
 | 
			
		||||
 *
 | 
			
		||||
 * Execute the ATIF_FUNCTION_VERIFY_INTERFACE ATIF function
 | 
			
		||||
| 
						 | 
				
			
			@ -240,8 +239,7 @@ static acpi_handle amdgpu_atif_probe_handle(acpi_handle dhandle)
 | 
			
		|||
/**
 | 
			
		||||
 * amdgpu_atif_get_notification_params - determine notify configuration
 | 
			
		||||
 *
 | 
			
		||||
 * @handle: acpi handle
 | 
			
		||||
 * @n: atif notification configuration struct
 | 
			
		||||
 * @atif: acpi handle
 | 
			
		||||
 *
 | 
			
		||||
 * Execute the ATIF_FUNCTION_GET_SYSTEM_PARAMETERS ATIF function
 | 
			
		||||
 * to determine if a notifier is used and if so which one
 | 
			
		||||
| 
						 | 
				
			
			@ -304,7 +302,7 @@ static int amdgpu_atif_get_notification_params(struct amdgpu_atif *atif)
 | 
			
		|||
/**
 | 
			
		||||
 * amdgpu_atif_query_backlight_caps - get min and max backlight input signal
 | 
			
		||||
 *
 | 
			
		||||
 * @handle: acpi handle
 | 
			
		||||
 * @atif: acpi handle
 | 
			
		||||
 *
 | 
			
		||||
 * Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function
 | 
			
		||||
 * to determine the acceptable range of backlight values
 | 
			
		||||
| 
						 | 
				
			
			@ -363,7 +361,7 @@ static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif)
 | 
			
		|||
/**
 | 
			
		||||
 * amdgpu_atif_get_sbios_requests - get requested sbios event
 | 
			
		||||
 *
 | 
			
		||||
 * @handle: acpi handle
 | 
			
		||||
 * @atif: acpi handle
 | 
			
		||||
 * @req: atif sbios request struct
 | 
			
		||||
 *
 | 
			
		||||
 * Execute the ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS ATIF function
 | 
			
		||||
| 
						 | 
				
			
			@ -899,6 +897,8 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev)
 | 
			
		|||
/**
 | 
			
		||||
 * amdgpu_acpi_is_s0ix_supported
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu_device_pointer
 | 
			
		||||
 *
 | 
			
		||||
 * returns true if supported, false if not.
 | 
			
		||||
 */
 | 
			
		||||
bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -75,6 +75,7 @@ struct amdgpu_amdkfd_fence {
 | 
			
		|||
	struct mm_struct *mm;
 | 
			
		||||
	spinlock_t lock;
 | 
			
		||||
	char timeline_name[TASK_COMM_LEN];
 | 
			
		||||
	struct svm_range_bo *svm_bo;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct amdgpu_kfd_dev {
 | 
			
		||||
| 
						 | 
				
			
			@ -148,7 +149,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
 | 
			
		|||
					int queue_bit);
 | 
			
		||||
 | 
			
		||||
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
 | 
			
		||||
								struct mm_struct *mm);
 | 
			
		||||
				struct mm_struct *mm,
 | 
			
		||||
				struct svm_range_bo *svm_bo);
 | 
			
		||||
#if IS_ENABLED(CONFIG_HSA_AMD)
 | 
			
		||||
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
 | 
			
		||||
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
 | 
			
		||||
| 
						 | 
				
			
			@ -234,22 +236,27 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s
 | 
			
		|||
	})
 | 
			
		||||
 | 
			
		||||
/* GPUVM API */
 | 
			
		||||
#define drm_priv_to_vm(drm_priv)					\
 | 
			
		||||
	(&((struct amdgpu_fpriv *)					\
 | 
			
		||||
		((struct drm_file *)(drm_priv))->driver_priv)->vm)
 | 
			
		||||
 | 
			
		||||
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
 | 
			
		||||
					struct file *filp, u32 pasid,
 | 
			
		||||
					void **vm, void **process_info,
 | 
			
		||||
					void **process_info,
 | 
			
		||||
					struct dma_fence **ef);
 | 
			
		||||
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm);
 | 
			
		||||
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
 | 
			
		||||
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv);
 | 
			
		||||
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
 | 
			
		||||
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 | 
			
		||||
		struct kgd_dev *kgd, uint64_t va, uint64_t size,
 | 
			
		||||
		void *vm, struct kgd_mem **mem,
 | 
			
		||||
		void *drm_priv, struct kgd_mem **mem,
 | 
			
		||||
		uint64_t *offset, uint32_t flags);
 | 
			
		||||
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 | 
			
		||||
		struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size);
 | 
			
		||||
		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
 | 
			
		||||
		uint64_t *size);
 | 
			
		||||
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 | 
			
		||||
		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
 | 
			
		||||
		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
 | 
			
		||||
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 | 
			
		||||
		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
 | 
			
		||||
		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
 | 
			
		||||
int amdgpu_amdkfd_gpuvm_sync_memory(
 | 
			
		||||
		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
 | 
			
		||||
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
 | 
			
		||||
| 
						 | 
				
			
			@ -260,7 +267,7 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
 | 
			
		|||
					      struct kfd_vm_fault_info *info);
 | 
			
		||||
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
 | 
			
		||||
				      struct dma_buf *dmabuf,
 | 
			
		||||
				      uint64_t va, void *vm,
 | 
			
		||||
				      uint64_t va, void *drm_priv,
 | 
			
		||||
				      struct kgd_mem **mem, uint64_t *size,
 | 
			
		||||
				      uint64_t *mmap_offset);
 | 
			
		||||
int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
 | 
			
		||||
| 
						 | 
				
			
			@ -270,6 +277,7 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 | 
			
		|||
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 | 
			
		||||
				struct amdgpu_vm *vm);
 | 
			
		||||
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
 | 
			
		||||
void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
 | 
			
		||||
#else
 | 
			
		||||
static inline
 | 
			
		||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -25,6 +25,7 @@
 | 
			
		|||
#include <linux/firmware.h>
 | 
			
		||||
#include "amdgpu.h"
 | 
			
		||||
#include "amdgpu_amdkfd.h"
 | 
			
		||||
#include "amdgpu_amdkfd_arcturus.h"
 | 
			
		||||
#include "sdma0/sdma0_4_2_2_offset.h"
 | 
			
		||||
#include "sdma0/sdma0_4_2_2_sh_mask.h"
 | 
			
		||||
#include "sdma1/sdma1_4_2_2_offset.h"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -28,6 +28,7 @@
 | 
			
		|||
#include <linux/slab.h>
 | 
			
		||||
#include <linux/sched/mm.h>
 | 
			
		||||
#include "amdgpu_amdkfd.h"
 | 
			
		||||
#include "kfd_svm.h"
 | 
			
		||||
 | 
			
		||||
static const struct dma_fence_ops amdkfd_fence_ops;
 | 
			
		||||
static atomic_t fence_seq = ATOMIC_INIT(0);
 | 
			
		||||
| 
						 | 
				
			
			@ -60,7 +61,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0);
 | 
			
		|||
 */
 | 
			
		||||
 | 
			
		||||
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
 | 
			
		||||
						       struct mm_struct *mm)
 | 
			
		||||
				struct mm_struct *mm,
 | 
			
		||||
				struct svm_range_bo *svm_bo)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_amdkfd_fence *fence;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -73,7 +75,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
 | 
			
		|||
	fence->mm = mm;
 | 
			
		||||
	get_task_comm(fence->timeline_name, current);
 | 
			
		||||
	spin_lock_init(&fence->lock);
 | 
			
		||||
 | 
			
		||||
	fence->svm_bo = svm_bo;
 | 
			
		||||
	dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
 | 
			
		||||
		   context, atomic_inc_return(&fence_seq));
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -111,6 +113,8 @@ static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
 | 
			
		|||
 *  a KFD BO and schedules a job to move the BO.
 | 
			
		||||
 *  If fence is already signaled return true.
 | 
			
		||||
 *  If fence is not signaled schedule a evict KFD process work item.
 | 
			
		||||
 *
 | 
			
		||||
 *  @f: dma_fence
 | 
			
		||||
 */
 | 
			
		||||
static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -122,16 +126,20 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
 | 
			
		|||
	if (dma_fence_is_signaled(f))
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	if (!fence->svm_bo) {
 | 
			
		||||
		if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
 | 
			
		||||
			return true;
 | 
			
		||||
	} else {
 | 
			
		||||
		if (!svm_range_schedule_evict_svm_bo(fence))
 | 
			
		||||
			return true;
 | 
			
		||||
	}
 | 
			
		||||
	return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdkfd_fence_release - callback that fence can be freed
 | 
			
		||||
 *
 | 
			
		||||
 * @fence: fence
 | 
			
		||||
 * @f: dma_fence
 | 
			
		||||
 *
 | 
			
		||||
 * This function is called when the reference count becomes zero.
 | 
			
		||||
 * Drops the mm_struct reference and RCU schedules freeing up the fence.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -719,7 +719,7 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * @get_wave_count: Read device registers to get number of waves in flight for
 | 
			
		||||
 * get_wave_count: Read device registers to get number of waves in flight for
 | 
			
		||||
 * a particular queue. The method also returns the VMID associated with the
 | 
			
		||||
 * queue.
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -755,19 +755,19 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * @kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
 | 
			
		||||
 * kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
 | 
			
		||||
 * shader engine and aggregates the number of waves that are in flight for the
 | 
			
		||||
 * process whose pasid is provided as a parameter. The process could have ZERO
 | 
			
		||||
 * or more queues running and submitting waves to compute units.
 | 
			
		||||
 *
 | 
			
		||||
 * @kgd: Handle of device from which to get number of waves in flight
 | 
			
		||||
 * @pasid: Identifies the process for which this query call is invoked
 | 
			
		||||
 * @wave_cnt: Output parameter updated with number of waves in flight that
 | 
			
		||||
 * @pasid_wave_cnt: Output parameter updated with number of waves in flight that
 | 
			
		||||
 * belong to process with given pasid
 | 
			
		||||
 * @max_waves_per_cu: Output parameter updated with maximum number of waves
 | 
			
		||||
 * possible per Compute Unit
 | 
			
		||||
 *
 | 
			
		||||
 * @note: It's possible that the device has too many queues (oversubscription)
 | 
			
		||||
 * Note: It's possible that the device has too many queues (oversubscription)
 | 
			
		||||
 * in which case a VMID could be remapped to a different PASID. This could lead
 | 
			
		||||
 * to an iaccurate wave count. Following is a high-level sequence:
 | 
			
		||||
 *    Time T1: vmid = getVmid(); vmid is associated with Pasid P1
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -33,9 +33,6 @@
 | 
			
		|||
#include <uapi/linux/kfd_ioctl.h>
 | 
			
		||||
#include "amdgpu_xgmi.h"
 | 
			
		||||
 | 
			
		||||
/* BO flag to indicate a KFD userptr BO */
 | 
			
		||||
#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
 | 
			
		||||
 | 
			
		||||
/* Userptr restore delay, just long enough to allow consecutive VM
 | 
			
		||||
 * changes to accumulate
 | 
			
		||||
 */
 | 
			
		||||
| 
						 | 
				
			
			@ -108,6 +105,11 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 | 
			
		|||
		(kfd_mem_limit.max_ttm_mem_limit >> 20));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
 | 
			
		||||
{
 | 
			
		||||
	kfd_mem_limit.system_mem_used += size;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Estimate page table size needed to represent a given memory size
 | 
			
		||||
 *
 | 
			
		||||
 * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
 | 
			
		||||
| 
						 | 
				
			
			@ -217,7 +219,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
 | 
			
		|||
	u32 domain = bo->preferred_domains;
 | 
			
		||||
	bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
 | 
			
		||||
 | 
			
		||||
	if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
 | 
			
		||||
	if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
 | 
			
		||||
		domain = AMDGPU_GEM_DOMAIN_CPU;
 | 
			
		||||
		sg = false;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -967,7 +969,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 | 
			
		|||
 | 
			
		||||
		info->eviction_fence =
 | 
			
		||||
			amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
 | 
			
		||||
						   current->mm);
 | 
			
		||||
						   current->mm,
 | 
			
		||||
						   NULL);
 | 
			
		||||
		if (!info->eviction_fence) {
 | 
			
		||||
			pr_err("Failed to create eviction fence\n");
 | 
			
		||||
			ret = -ENOMEM;
 | 
			
		||||
| 
						 | 
				
			
			@ -1036,15 +1039,19 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 | 
			
		|||
 | 
			
		||||
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
 | 
			
		||||
					   struct file *filp, u32 pasid,
 | 
			
		||||
					   void **vm, void **process_info,
 | 
			
		||||
					   void **process_info,
 | 
			
		||||
					   struct dma_fence **ef)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 | 
			
		||||
	struct drm_file *drm_priv = filp->private_data;
 | 
			
		||||
	struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
 | 
			
		||||
	struct amdgpu_vm *avm = &drv_priv->vm;
 | 
			
		||||
	struct amdgpu_fpriv *drv_priv;
 | 
			
		||||
	struct amdgpu_vm *avm;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	ret = amdgpu_file_to_fpriv(filp, &drv_priv);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		return ret;
 | 
			
		||||
	avm = &drv_priv->vm;
 | 
			
		||||
 | 
			
		||||
	/* Already a compute VM? */
 | 
			
		||||
	if (avm->process_info)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
| 
						 | 
				
			
			@ -1059,7 +1066,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
 | 
			
		|||
	if (ret)
 | 
			
		||||
		return ret;
 | 
			
		||||
 | 
			
		||||
	*vm = (void *)avm;
 | 
			
		||||
	amdgpu_vm_set_task_info(avm);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1100,15 +1107,17 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
 | 
			
		||||
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 | 
			
		||||
	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
 | 
			
		||||
	struct amdgpu_vm *avm;
 | 
			
		||||
 | 
			
		||||
	if (WARN_ON(!kgd || !vm))
 | 
			
		||||
	if (WARN_ON(!kgd || !drm_priv))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	pr_debug("Releasing process vm %p\n", vm);
 | 
			
		||||
	avm = drm_priv_to_vm(drm_priv);
 | 
			
		||||
 | 
			
		||||
	pr_debug("Releasing process vm %p\n", avm);
 | 
			
		||||
 | 
			
		||||
	/* The original pasid of amdgpu vm has already been
 | 
			
		||||
	 * released during making a amdgpu vm to a compute vm
 | 
			
		||||
| 
						 | 
				
			
			@ -1119,9 +1128,9 @@ void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
 | 
			
		|||
	amdgpu_vm_release_compute(adev, avm);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
 | 
			
		||||
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
 | 
			
		||||
	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
 | 
			
		||||
	struct amdgpu_bo *pd = avm->root.base.bo;
 | 
			
		||||
	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1132,11 +1141,11 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
 | 
			
		|||
 | 
			
		||||
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 | 
			
		||||
		struct kgd_dev *kgd, uint64_t va, uint64_t size,
 | 
			
		||||
		void *vm, struct kgd_mem **mem,
 | 
			
		||||
		void *drm_priv, struct kgd_mem **mem,
 | 
			
		||||
		uint64_t *offset, uint32_t flags)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 | 
			
		||||
	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
 | 
			
		||||
	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
 | 
			
		||||
	enum ttm_bo_type bo_type = ttm_bo_type_device;
 | 
			
		||||
	struct sg_table *sg = NULL;
 | 
			
		||||
	uint64_t user_addr = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -1216,6 +1225,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 | 
			
		|||
			 domain_string(alloc_domain), ret);
 | 
			
		||||
		goto err_bo_create;
 | 
			
		||||
	}
 | 
			
		||||
	ret = drm_vma_node_allow(&gobj->vma_node, drm_priv);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
		pr_debug("Failed to allow vma node access. ret %d\n", ret);
 | 
			
		||||
		goto err_node_allow;
 | 
			
		||||
	}
 | 
			
		||||
	bo = gem_to_amdgpu_bo(gobj);
 | 
			
		||||
	if (bo_type == ttm_bo_type_sg) {
 | 
			
		||||
		bo->tbo.sg = sg;
 | 
			
		||||
| 
						 | 
				
			
			@ -1224,7 +1238,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 | 
			
		|||
	bo->kfd_bo = *mem;
 | 
			
		||||
	(*mem)->bo = bo;
 | 
			
		||||
	if (user_addr)
 | 
			
		||||
		bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
 | 
			
		||||
		bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
 | 
			
		||||
 | 
			
		||||
	(*mem)->va = va;
 | 
			
		||||
	(*mem)->domain = domain;
 | 
			
		||||
| 
						 | 
				
			
			@ -1245,6 +1259,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 | 
			
		|||
 | 
			
		||||
allocate_init_user_pages_failed:
 | 
			
		||||
	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
 | 
			
		||||
	drm_vma_node_revoke(&gobj->vma_node, drm_priv);
 | 
			
		||||
err_node_allow:
 | 
			
		||||
	amdgpu_bo_unref(&bo);
 | 
			
		||||
	/* Don't unreserve system mem limit twice */
 | 
			
		||||
	goto err_reserve_limit;
 | 
			
		||||
| 
						 | 
				
			
			@ -1262,7 +1278,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 | 
			
		||||
		struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
 | 
			
		||||
		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
 | 
			
		||||
		uint64_t *size)
 | 
			
		||||
{
 | 
			
		||||
	struct amdkfd_process_info *process_info = mem->process_info;
 | 
			
		||||
	unsigned long bo_size = mem->bo->tbo.base.size;
 | 
			
		||||
| 
						 | 
				
			
			@ -1339,6 +1356,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	/* Free the BO*/
 | 
			
		||||
	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
 | 
			
		||||
	drm_gem_object_put(&mem->bo->tbo.base);
 | 
			
		||||
	mutex_destroy(&mem->lock);
 | 
			
		||||
	kfree(mem);
 | 
			
		||||
| 
						 | 
				
			
			@ -1347,10 +1365,10 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 | 
			
		||||
		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
 | 
			
		||||
		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 | 
			
		||||
	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
 | 
			
		||||
	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
 | 
			
		||||
	int ret;
 | 
			
		||||
	struct amdgpu_bo *bo;
 | 
			
		||||
	uint32_t domain;
 | 
			
		||||
| 
						 | 
				
			
			@ -1391,9 +1409,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 | 
			
		|||
	pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
 | 
			
		||||
			mem->va,
 | 
			
		||||
			mem->va + bo_size * (1 + mem->aql_queue),
 | 
			
		||||
			vm, domain_string(domain));
 | 
			
		||||
			avm, domain_string(domain));
 | 
			
		||||
 | 
			
		||||
	ret = reserve_bo_and_vm(mem, vm, &ctx);
 | 
			
		||||
	ret = reserve_bo_and_vm(mem, avm, &ctx);
 | 
			
		||||
	if (unlikely(ret))
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1437,7 +1455,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
 | 
			
		||||
		if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
 | 
			
		||||
		if (entry->bo_va->base.vm == avm && !entry->is_mapped) {
 | 
			
		||||
			pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
 | 
			
		||||
					entry->va, entry->va + bo_size,
 | 
			
		||||
					entry);
 | 
			
		||||
| 
						 | 
				
			
			@ -1449,7 +1467,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 | 
			
		|||
				goto map_bo_to_gpuvm_failed;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			ret = vm_update_pds(vm, ctx.sync);
 | 
			
		||||
			ret = vm_update_pds(avm, ctx.sync);
 | 
			
		||||
			if (ret) {
 | 
			
		||||
				pr_err("Failed to update page directories\n");
 | 
			
		||||
				goto map_bo_to_gpuvm_failed;
 | 
			
		||||
| 
						 | 
				
			
			@ -1485,11 +1503,11 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 | 
			
		||||
		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
 | 
			
		||||
		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 | 
			
		||||
	struct amdkfd_process_info *process_info =
 | 
			
		||||
		((struct amdgpu_vm *)vm)->process_info;
 | 
			
		||||
	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
 | 
			
		||||
	struct amdkfd_process_info *process_info = avm->process_info;
 | 
			
		||||
	unsigned long bo_size = mem->bo->tbo.base.size;
 | 
			
		||||
	struct kfd_bo_va_list *entry;
 | 
			
		||||
	struct bo_vm_reservation_context ctx;
 | 
			
		||||
| 
						 | 
				
			
			@ -1497,7 +1515,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 | 
			
		|||
 | 
			
		||||
	mutex_lock(&mem->lock);
 | 
			
		||||
 | 
			
		||||
	ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
 | 
			
		||||
	ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx);
 | 
			
		||||
	if (unlikely(ret))
 | 
			
		||||
		goto out;
 | 
			
		||||
	/* If no VMs were reserved, it means the BO wasn't actually mapped */
 | 
			
		||||
| 
						 | 
				
			
			@ -1506,17 +1524,17 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 | 
			
		|||
		goto unreserve_out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm);
 | 
			
		||||
	ret = vm_validate_pt_pd_bos(avm);
 | 
			
		||||
	if (unlikely(ret))
 | 
			
		||||
		goto unreserve_out;
 | 
			
		||||
 | 
			
		||||
	pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
 | 
			
		||||
		mem->va,
 | 
			
		||||
		mem->va + bo_size * (1 + mem->aql_queue),
 | 
			
		||||
		vm);
 | 
			
		||||
		avm);
 | 
			
		||||
 | 
			
		||||
	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
 | 
			
		||||
		if (entry->bo_va->base.vm == vm && entry->is_mapped) {
 | 
			
		||||
		if (entry->bo_va->base.vm == avm && entry->is_mapped) {
 | 
			
		||||
			pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
 | 
			
		||||
					entry->va,
 | 
			
		||||
					entry->va + bo_size,
 | 
			
		||||
| 
						 | 
				
			
			@ -1642,14 +1660,15 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
 | 
			
		|||
 | 
			
		||||
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
 | 
			
		||||
				      struct dma_buf *dma_buf,
 | 
			
		||||
				      uint64_t va, void *vm,
 | 
			
		||||
				      uint64_t va, void *drm_priv,
 | 
			
		||||
				      struct kgd_mem **mem, uint64_t *size,
 | 
			
		||||
				      uint64_t *mmap_offset)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 | 
			
		||||
	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
 | 
			
		||||
	struct drm_gem_object *obj;
 | 
			
		||||
	struct amdgpu_bo *bo;
 | 
			
		||||
	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	if (dma_buf->ops != &amdgpu_dmabuf_ops)
 | 
			
		||||
		/* Can't handle non-graphics buffers */
 | 
			
		||||
| 
						 | 
				
			
			@ -1670,6 +1689,12 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
 | 
			
		|||
	if (!*mem)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
		kfree(mem);
 | 
			
		||||
		return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (size)
 | 
			
		||||
		*size = amdgpu_bo_size(bo);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2135,7 +2160,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 | 
			
		|||
	 */
 | 
			
		||||
	new_fence = amdgpu_amdkfd_fence_create(
 | 
			
		||||
				process_info->eviction_fence->base.context,
 | 
			
		||||
				process_info->eviction_fence->mm);
 | 
			
		||||
				process_info->eviction_fence->mm,
 | 
			
		||||
				NULL);
 | 
			
		||||
	if (!new_fence) {
 | 
			
		||||
		pr_err("Failed to create eviction fence\n");
 | 
			
		||||
		ret = -ENOMEM;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -672,7 +672,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * cs_parser_fini() - clean parser states
 | 
			
		||||
 * amdgpu_cs_parser_fini() - clean parser states
 | 
			
		||||
 * @parser:	parser structure holding parsing context.
 | 
			
		||||
 * @error:	error number
 | 
			
		||||
 * @backoff:	indicator to backoff the reservation
 | 
			
		||||
| 
						 | 
				
			
			@ -1488,7 +1488,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_cs_wait_all_fence - wait on all fences to signal
 | 
			
		||||
 * amdgpu_cs_wait_all_fences - wait on all fences to signal
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu device
 | 
			
		||||
 * @filp: file private
 | 
			
		||||
| 
						 | 
				
			
			@ -1639,7 +1639,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_cs_find_bo_va - find bo_va for VM address
 | 
			
		||||
 * amdgpu_cs_find_mapping - find bo_va for VM address
 | 
			
		||||
 *
 | 
			
		||||
 * @parser: command submission parser context
 | 
			
		||||
 * @addr: VM address
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2856,7 +2856,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
 | 
			
		|||
		AMD_IP_BLOCK_TYPE_IH,
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
 | 
			
		||||
	for (i = 0; i < adev->num_ip_blocks; i++) {
 | 
			
		||||
		int j;
 | 
			
		||||
		struct amdgpu_ip_block *block;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -3179,8 +3179,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
 | 
			
		|||
	int ret = 0;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * By default timeout for non compute jobs is 10000.
 | 
			
		||||
	 * And there is no timeout enforced on compute jobs.
 | 
			
		||||
	 * By default timeout for non compute jobs is 10000
 | 
			
		||||
	 * and 60000 for compute jobs.
 | 
			
		||||
	 * In SR-IOV or passthrough mode, timeout for compute
 | 
			
		||||
	 * jobs are 60000 by default.
 | 
			
		||||
	 */
 | 
			
		||||
| 
						 | 
				
			
			@ -3189,10 +3189,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
 | 
			
		|||
	if (amdgpu_sriov_vf(adev))
 | 
			
		||||
		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
 | 
			
		||||
					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
 | 
			
		||||
	else if (amdgpu_passthrough(adev))
 | 
			
		||||
		adev->compute_timeout =  msecs_to_jiffies(60000);
 | 
			
		||||
	else
 | 
			
		||||
		adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
 | 
			
		||||
		adev->compute_timeout =  msecs_to_jiffies(60000);
 | 
			
		||||
 | 
			
		||||
	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
 | 
			
		||||
		while ((timeout_setting = strsep(&input, ",")) &&
 | 
			
		||||
| 
						 | 
				
			
			@ -3741,7 +3739,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 | 
			
		|||
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = drm_to_adev(dev);
 | 
			
		||||
	int r;
 | 
			
		||||
 | 
			
		||||
	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 | 
			
		||||
		return 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -3756,7 +3753,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 | 
			
		|||
 | 
			
		||||
	amdgpu_ras_suspend(adev);
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_device_ip_suspend_phase1(adev);
 | 
			
		||||
	amdgpu_device_ip_suspend_phase1(adev);
 | 
			
		||||
 | 
			
		||||
	if (!adev->in_s0ix)
 | 
			
		||||
		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
 | 
			
		||||
| 
						 | 
				
			
			@ -3766,7 +3763,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 | 
			
		|||
 | 
			
		||||
	amdgpu_fence_driver_suspend(adev);
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_device_ip_suspend_phase2(adev);
 | 
			
		||||
	amdgpu_device_ip_suspend_phase2(adev);
 | 
			
		||||
	/* evict remaining vram memory
 | 
			
		||||
	 * This second call to evict vram is to evict the gart page table
 | 
			
		||||
	 * using the CPU.
 | 
			
		||||
| 
						 | 
				
			
			@ -5124,7 +5121,8 @@ int amdgpu_device_baco_enter(struct drm_device *dev)
 | 
			
		|||
	if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
 | 
			
		||||
		return -ENOTSUPP;
 | 
			
		||||
 | 
			
		||||
	if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
 | 
			
		||||
	if (ras && adev->ras_enabled &&
 | 
			
		||||
	    adev->nbio.funcs->enable_doorbell_interrupt)
 | 
			
		||||
		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
 | 
			
		||||
 | 
			
		||||
	return amdgpu_dpm_baco_enter(adev);
 | 
			
		||||
| 
						 | 
				
			
			@ -5143,7 +5141,8 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
 | 
			
		|||
	if (ret)
 | 
			
		||||
		return ret;
 | 
			
		||||
 | 
			
		||||
	if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
 | 
			
		||||
	if (ras && adev->ras_enabled &&
 | 
			
		||||
	    adev->nbio.funcs->enable_doorbell_interrupt)
 | 
			
		||||
		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -288,9 +288,9 @@ module_param_named(msi, amdgpu_msi, int, 0444);
 | 
			
		|||
 *   for SDMA and Video.
 | 
			
		||||
 *
 | 
			
		||||
 * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
 | 
			
		||||
 * jobs is 10000. And there is no timeout enforced on compute jobs.
 | 
			
		||||
 * jobs is 10000. The timeout for compute is 60000.
 | 
			
		||||
 */
 | 
			
		||||
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; "
 | 
			
		||||
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; "
 | 
			
		||||
		"for passthrough or sriov, 10000 for all jobs."
 | 
			
		||||
		" 0: keep default value. negative: infinity timeout), "
 | 
			
		||||
		"format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
 | 
			
		||||
| 
						 | 
				
			
			@ -641,7 +641,8 @@ module_param_named(mes, amdgpu_mes, int, 0444);
 | 
			
		|||
 | 
			
		||||
/**
 | 
			
		||||
 * DOC: noretry (int)
 | 
			
		||||
 * Disable retry faults in the GPU memory controller.
 | 
			
		||||
 * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
 | 
			
		||||
 * do not support per-process XNACK this also disables retry page faults.
 | 
			
		||||
 * (0 = retry enabled, 1 = retry disabled, -1 auto (default))
 | 
			
		||||
 */
 | 
			
		||||
MODULE_PARM_DESC(noretry,
 | 
			
		||||
| 
						 | 
				
			
			@ -1186,6 +1187,7 @@ static const struct pci_device_id pciidlist[] = {
 | 
			
		|||
	{0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
 | 
			
		||||
	{0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
 | 
			
		||||
	{0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
 | 
			
		||||
	{0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
 | 
			
		||||
 | 
			
		||||
	{0, 0, 0}
 | 
			
		||||
};
 | 
			
		||||
| 
						 | 
				
			
			@ -1598,17 +1600,15 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
 | 
			
		|||
	if (amdgpu_device_has_dc_support(adev)) {
 | 
			
		||||
		struct drm_crtc *crtc;
 | 
			
		||||
 | 
			
		||||
		drm_modeset_lock_all(drm_dev);
 | 
			
		||||
 | 
			
		||||
		drm_for_each_crtc(crtc, drm_dev) {
 | 
			
		||||
			if (crtc->state->active) {
 | 
			
		||||
			drm_modeset_lock(&crtc->mutex, NULL);
 | 
			
		||||
			if (crtc->state->active)
 | 
			
		||||
				ret = -EBUSY;
 | 
			
		||||
			drm_modeset_unlock(&crtc->mutex);
 | 
			
		||||
			if (ret < 0)
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		drm_modeset_unlock_all(drm_dev);
 | 
			
		||||
 | 
			
		||||
	} else {
 | 
			
		||||
		struct drm_connector *list_connector;
 | 
			
		||||
		struct drm_connector_list_iter iter;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -434,6 +434,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 | 
			
		|||
 *
 | 
			
		||||
 * @ring: ring to init the fence driver on
 | 
			
		||||
 * @num_hw_submission: number of entries on the hardware queue
 | 
			
		||||
 * @sched_score: optional score atomic shared with other schedulers
 | 
			
		||||
 *
 | 
			
		||||
 * Init the fence driver for the requested ring (all asics).
 | 
			
		||||
 * Helper function for amdgpu_fence_driver_init().
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -60,7 +60,7 @@
 | 
			
		|||
 */
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_dummy_page_init - init dummy page used by the driver
 | 
			
		||||
 * amdgpu_gart_dummy_page_init - init dummy page used by the driver
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu_device pointer
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -86,7 +86,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_dummy_page_fini - free dummy page used by the driver
 | 
			
		||||
 * amdgpu_gart_dummy_page_fini - free dummy page used by the driver
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu_device pointer
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -332,6 +332,17 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
 | 
			
		|||
			mc->agp_size >> 20, mc->agp_start, mc->agp_end);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
 | 
			
		||||
 *
 | 
			
		||||
 * @addr: 48 bit physical address, page aligned (36 significant bits)
 | 
			
		||||
 * @pasid: 16 bit process address space identifier
 | 
			
		||||
 */
 | 
			
		||||
static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
 | 
			
		||||
{
 | 
			
		||||
	return addr << 4 | pasid;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_gmc_filter_faults - filter VM faults
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -348,8 +359,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
 | 
			
		|||
			      uint16_t pasid, uint64_t timestamp)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_gmc *gmc = &adev->gmc;
 | 
			
		||||
 | 
			
		||||
	uint64_t stamp, key = addr << 4 | pasid;
 | 
			
		||||
	uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid);
 | 
			
		||||
	struct amdgpu_gmc_fault *fault;
 | 
			
		||||
	uint32_t hash;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -365,7 +375,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
 | 
			
		|||
	while (fault->timestamp >= stamp) {
 | 
			
		||||
		uint64_t tmp;
 | 
			
		||||
 | 
			
		||||
		if (fault->key == key)
 | 
			
		||||
		if (atomic64_read(&fault->key) == key)
 | 
			
		||||
			return true;
 | 
			
		||||
 | 
			
		||||
		tmp = fault->timestamp;
 | 
			
		||||
| 
						 | 
				
			
			@ -378,7 +388,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
 | 
			
		|||
 | 
			
		||||
	/* Add the fault to the ring */
 | 
			
		||||
	fault = &gmc->fault_ring[gmc->last_fault];
 | 
			
		||||
	fault->key = key;
 | 
			
		||||
	atomic64_set(&fault->key, key);
 | 
			
		||||
	fault->timestamp = timestamp;
 | 
			
		||||
 | 
			
		||||
	/* And update the hash */
 | 
			
		||||
| 
						 | 
				
			
			@ -387,6 +397,36 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
 | 
			
		|||
	return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu device structure
 | 
			
		||||
 * @addr: address of the VM fault
 | 
			
		||||
 * @pasid: PASID of the process causing the fault
 | 
			
		||||
 *
 | 
			
		||||
 * Remove the address from fault filter, then future vm fault on this address
 | 
			
		||||
 * will pass to retry fault handler to recover.
 | 
			
		||||
 */
 | 
			
		||||
void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
 | 
			
		||||
				     uint16_t pasid)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_gmc *gmc = &adev->gmc;
 | 
			
		||||
	uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
 | 
			
		||||
	struct amdgpu_gmc_fault *fault;
 | 
			
		||||
	uint32_t hash;
 | 
			
		||||
	uint64_t tmp;
 | 
			
		||||
 | 
			
		||||
	hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
 | 
			
		||||
	fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
 | 
			
		||||
	do {
 | 
			
		||||
		if (atomic64_cmpxchg(&fault->key, key, 0) == key)
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		tmp = fault->timestamp;
 | 
			
		||||
		fault = &gmc->fault_ring[fault->next];
 | 
			
		||||
	} while (fault->timestamp < tmp);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	int r;
 | 
			
		||||
| 
						 | 
				
			
			@ -415,6 +455,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 | 
			
		|||
			return r;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (adev->hdp.ras_funcs &&
 | 
			
		||||
	    adev->hdp.ras_funcs->ras_late_init) {
 | 
			
		||||
		r = adev->hdp.ras_funcs->ras_late_init(adev);
 | 
			
		||||
		if (r)
 | 
			
		||||
			return r;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -426,11 +473,15 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 | 
			
		|||
 | 
			
		||||
	if (adev->mmhub.ras_funcs &&
 | 
			
		||||
	    adev->mmhub.ras_funcs->ras_fini)
 | 
			
		||||
		amdgpu_mmhub_ras_fini(adev);
 | 
			
		||||
		adev->mmhub.ras_funcs->ras_fini(adev);
 | 
			
		||||
 | 
			
		||||
	if (adev->gmc.xgmi.ras_funcs &&
 | 
			
		||||
	    adev->gmc.xgmi.ras_funcs->ras_fini)
 | 
			
		||||
		adev->gmc.xgmi.ras_funcs->ras_fini(adev);
 | 
			
		||||
 | 
			
		||||
	if (adev->hdp.ras_funcs &&
 | 
			
		||||
	    adev->hdp.ras_funcs->ras_fini)
 | 
			
		||||
		adev->hdp.ras_funcs->ras_fini(adev);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -66,9 +66,9 @@ struct firmware;
 | 
			
		|||
 * GMC page fault information
 | 
			
		||||
 */
 | 
			
		||||
struct amdgpu_gmc_fault {
 | 
			
		||||
	uint64_t	timestamp;
 | 
			
		||||
	uint64_t	timestamp:48;
 | 
			
		||||
	uint64_t	next:AMDGPU_GMC_FAULT_RING_ORDER;
 | 
			
		||||
	uint64_t	key:52;
 | 
			
		||||
	atomic64_t	key;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -318,6 +318,8 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
 | 
			
		|||
			     struct amdgpu_gmc *mc);
 | 
			
		||||
bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
 | 
			
		||||
			      uint16_t pasid, uint64_t timestamp);
 | 
			
		||||
void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
 | 
			
		||||
				     uint16_t pasid);
 | 
			
		||||
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
 | 
			
		||||
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
 | 
			
		||||
int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -24,7 +24,8 @@
 | 
			
		|||
 | 
			
		||||
#include "amdgpu.h"
 | 
			
		||||
 | 
			
		||||
static inline struct amdgpu_gtt_mgr *to_gtt_mgr(struct ttm_resource_manager *man)
 | 
			
		||||
static inline struct amdgpu_gtt_mgr *
 | 
			
		||||
to_gtt_mgr(struct ttm_resource_manager *man)
 | 
			
		||||
{
 | 
			
		||||
	return container_of(man, struct amdgpu_gtt_mgr, manager);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -43,12 +44,14 @@ struct amdgpu_gtt_node {
 | 
			
		|||
 * the GTT block, in bytes
 | 
			
		||||
 */
 | 
			
		||||
static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
 | 
			
		||||
		struct device_attribute *attr, char *buf)
 | 
			
		||||
					      struct device_attribute *attr,
 | 
			
		||||
					      char *buf)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_device *ddev = dev_get_drvdata(dev);
 | 
			
		||||
	struct amdgpu_device *adev = drm_to_adev(ddev);
 | 
			
		||||
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
 | 
			
		||||
	struct ttm_resource_manager *man;
 | 
			
		||||
 | 
			
		||||
	man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
 | 
			
		||||
	return sysfs_emit(buf, "%llu\n", man->size * PAGE_SIZE);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -61,12 +64,14 @@ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
 | 
			
		|||
 * size of the GTT block, in bytes
 | 
			
		||||
 */
 | 
			
		||||
static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev,
 | 
			
		||||
		struct device_attribute *attr, char *buf)
 | 
			
		||||
					     struct device_attribute *attr,
 | 
			
		||||
					     char *buf)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_device *ddev = dev_get_drvdata(dev);
 | 
			
		||||
	struct amdgpu_device *adev = drm_to_adev(ddev);
 | 
			
		||||
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
 | 
			
		||||
	struct ttm_resource_manager *man;
 | 
			
		||||
 | 
			
		||||
	man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
 | 
			
		||||
	return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(man));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -75,80 +80,6 @@ static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO,
 | 
			
		|||
static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO,
 | 
			
		||||
	           amdgpu_mem_info_gtt_used_show, NULL);
 | 
			
		||||
 | 
			
		||||
static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func;
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_gtt_mgr_init - init GTT manager and DRM MM
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu_device pointer
 | 
			
		||||
 * @gtt_size: maximum size of GTT
 | 
			
		||||
 *
 | 
			
		||||
 * Allocate and initialize the GTT manager.
 | 
			
		||||
 */
 | 
			
		||||
int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
 | 
			
		||||
	struct ttm_resource_manager *man = &mgr->manager;
 | 
			
		||||
	uint64_t start, size;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	man->use_tt = true;
 | 
			
		||||
	man->func = &amdgpu_gtt_mgr_func;
 | 
			
		||||
 | 
			
		||||
	ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT);
 | 
			
		||||
 | 
			
		||||
	start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
 | 
			
		||||
	size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
 | 
			
		||||
	drm_mm_init(&mgr->mm, start, size);
 | 
			
		||||
	spin_lock_init(&mgr->lock);
 | 
			
		||||
	atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT);
 | 
			
		||||
 | 
			
		||||
	ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
		DRM_ERROR("Failed to create device file mem_info_gtt_total\n");
 | 
			
		||||
		return ret;
 | 
			
		||||
	}
 | 
			
		||||
	ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
		DRM_ERROR("Failed to create device file mem_info_gtt_used\n");
 | 
			
		||||
		return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
 | 
			
		||||
	ttm_resource_manager_set_used(man, true);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_gtt_mgr_fini - free and destroy GTT manager
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu_device pointer
 | 
			
		||||
 *
 | 
			
		||||
 * Destroy and free the GTT manager, returns -EBUSY if ranges are still
 | 
			
		||||
 * allocated inside it.
 | 
			
		||||
 */
 | 
			
		||||
void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
 | 
			
		||||
	struct ttm_resource_manager *man = &mgr->manager;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	ttm_resource_manager_set_used(man, false);
 | 
			
		||||
 | 
			
		||||
	ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	spin_lock(&mgr->lock);
 | 
			
		||||
	drm_mm_takedown(&mgr->mm);
 | 
			
		||||
	spin_unlock(&mgr->lock);
 | 
			
		||||
 | 
			
		||||
	device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total);
 | 
			
		||||
	device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used);
 | 
			
		||||
 | 
			
		||||
	ttm_resource_manager_cleanup(man);
 | 
			
		||||
	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -265,6 +196,13 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man)
 | 
			
		|||
	return (result > 0 ? result : 0) * PAGE_SIZE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_gtt_mgr_recover - re-init gart
 | 
			
		||||
 *
 | 
			
		||||
 * @man: TTM memory type manager
 | 
			
		||||
 *
 | 
			
		||||
 * Re-init the gart for each known BO in the GTT.
 | 
			
		||||
 */
 | 
			
		||||
int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
 | 
			
		||||
| 
						 | 
				
			
			@ -311,3 +249,76 @@ static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = {
 | 
			
		|||
	.free = amdgpu_gtt_mgr_del,
 | 
			
		||||
	.debug = amdgpu_gtt_mgr_debug
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_gtt_mgr_init - init GTT manager and DRM MM
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu_device pointer
 | 
			
		||||
 * @gtt_size: maximum size of GTT
 | 
			
		||||
 *
 | 
			
		||||
 * Allocate and initialize the GTT manager.
 | 
			
		||||
 */
 | 
			
		||||
int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
 | 
			
		||||
	struct ttm_resource_manager *man = &mgr->manager;
 | 
			
		||||
	uint64_t start, size;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	man->use_tt = true;
 | 
			
		||||
	man->func = &amdgpu_gtt_mgr_func;
 | 
			
		||||
 | 
			
		||||
	ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT);
 | 
			
		||||
 | 
			
		||||
	start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
 | 
			
		||||
	size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
 | 
			
		||||
	drm_mm_init(&mgr->mm, start, size);
 | 
			
		||||
	spin_lock_init(&mgr->lock);
 | 
			
		||||
	atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT);
 | 
			
		||||
 | 
			
		||||
	ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
		DRM_ERROR("Failed to create device file mem_info_gtt_total\n");
 | 
			
		||||
		return ret;
 | 
			
		||||
	}
 | 
			
		||||
	ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
		DRM_ERROR("Failed to create device file mem_info_gtt_used\n");
 | 
			
		||||
		return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
 | 
			
		||||
	ttm_resource_manager_set_used(man, true);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_gtt_mgr_fini - free and destroy GTT manager
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu_device pointer
 | 
			
		||||
 *
 | 
			
		||||
 * Destroy and free the GTT manager, returns -EBUSY if ranges are still
 | 
			
		||||
 * allocated inside it.
 | 
			
		||||
 */
 | 
			
		||||
void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
 | 
			
		||||
	struct ttm_resource_manager *man = &mgr->manager;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	ttm_resource_manager_set_used(man, false);
 | 
			
		||||
 | 
			
		||||
	ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	spin_lock(&mgr->lock);
 | 
			
		||||
	drm_mm_takedown(&mgr->mm);
 | 
			
		||||
	spin_unlock(&mgr->lock);
 | 
			
		||||
 | 
			
		||||
	device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total);
 | 
			
		||||
	device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used);
 | 
			
		||||
 | 
			
		||||
	ttm_resource_manager_cleanup(man);
 | 
			
		||||
	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										69
									
								
								drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,69 @@
 | 
			
		|||
/*
 | 
			
		||||
 * Copyright 2021 Advanced Micro Devices, Inc.
 | 
			
		||||
 *
 | 
			
		||||
 * Permission is hereby granted, free of charge, to any person obtaining a
 | 
			
		||||
 * copy of this software and associated documentation files (the "Software"),
 | 
			
		||||
 * to deal in the Software without restriction, including without limitation
 | 
			
		||||
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 | 
			
		||||
 * and/or sell copies of the Software, and to permit persons to whom the
 | 
			
		||||
 * Software is furnished to do so, subject to the following conditions:
 | 
			
		||||
 *
 | 
			
		||||
 * The above copyright notice and this permission notice shall be included in
 | 
			
		||||
 * all copies or substantial portions of the Software.
 | 
			
		||||
 *
 | 
			
		||||
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
			
		||||
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
			
		||||
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 | 
			
		||||
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 | 
			
		||||
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 | 
			
		||||
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 | 
			
		||||
 * OTHER DEALINGS IN THE SOFTWARE.
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "amdgpu.h"
 | 
			
		||||
#include "amdgpu_ras.h"
 | 
			
		||||
 | 
			
		||||
int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	int r;
 | 
			
		||||
	struct ras_ih_if ih_info = {
 | 
			
		||||
		.cb = NULL,
 | 
			
		||||
	};
 | 
			
		||||
	struct ras_fs_if fs_info = {
 | 
			
		||||
		.sysfs_name = "hdp_err_count",
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	if (!adev->hdp.ras_if) {
 | 
			
		||||
		adev->hdp.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
 | 
			
		||||
		if (!adev->hdp.ras_if)
 | 
			
		||||
			return -ENOMEM;
 | 
			
		||||
		adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP;
 | 
			
		||||
		adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
 | 
			
		||||
		adev->hdp.ras_if->sub_block_index = 0;
 | 
			
		||||
		strcpy(adev->hdp.ras_if->name, "hdp");
 | 
			
		||||
	}
 | 
			
		||||
	ih_info.head = fs_info.head = *adev->hdp.ras_if;
 | 
			
		||||
	r = amdgpu_ras_late_init(adev, adev->hdp.ras_if,
 | 
			
		||||
				 &fs_info, &ih_info);
 | 
			
		||||
	if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) {
 | 
			
		||||
		kfree(adev->hdp.ras_if);
 | 
			
		||||
		adev->hdp.ras_if = NULL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void amdgpu_hdp_ras_fini(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
 | 
			
		||||
	    adev->hdp.ras_if) {
 | 
			
		||||
		struct ras_common_if *ras_if = adev->hdp.ras_if;
 | 
			
		||||
		struct ras_ih_if ih_info = {
 | 
			
		||||
			.cb = NULL,
 | 
			
		||||
		};
 | 
			
		||||
 | 
			
		||||
		amdgpu_ras_late_fini(adev, ras_if, &ih_info);
 | 
			
		||||
		kfree(ras_if);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -23,18 +23,29 @@
 | 
			
		|||
#ifndef __AMDGPU_HDP_H__
 | 
			
		||||
#define __AMDGPU_HDP_H__
 | 
			
		||||
 | 
			
		||||
struct amdgpu_hdp_ras_funcs {
 | 
			
		||||
	int (*ras_late_init)(struct amdgpu_device *adev);
 | 
			
		||||
	void (*ras_fini)(struct amdgpu_device *adev);
 | 
			
		||||
	void (*query_ras_error_count)(struct amdgpu_device *adev,
 | 
			
		||||
				      void *ras_error_status);
 | 
			
		||||
	void (*reset_ras_error_count)(struct amdgpu_device *adev);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct amdgpu_hdp_funcs {
 | 
			
		||||
	void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 | 
			
		||||
	void (*invalidate_hdp)(struct amdgpu_device *adev,
 | 
			
		||||
			       struct amdgpu_ring *ring);
 | 
			
		||||
	void (*reset_ras_error_count)(struct amdgpu_device *adev);
 | 
			
		||||
	void (*update_clock_gating)(struct amdgpu_device *adev, bool enable);
 | 
			
		||||
	void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
 | 
			
		||||
	void (*init_registers)(struct amdgpu_device *adev);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct amdgpu_hdp {
 | 
			
		||||
	struct ras_common_if			*ras_if;
 | 
			
		||||
	const struct amdgpu_hdp_funcs		*funcs;
 | 
			
		||||
	const struct amdgpu_hdp_ras_funcs	*ras_funcs;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev);
 | 
			
		||||
void amdgpu_hdp_ras_fini(struct amdgpu_device *adev);
 | 
			
		||||
#endif /* __AMDGPU_HDP_H__ */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -328,7 +328,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
 | 
			
		|||
 | 
			
		||||
	for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
 | 
			
		||||
		if (i == AMDGPU_IB_POOL_DIRECT)
 | 
			
		||||
			size = PAGE_SIZE * 2;
 | 
			
		||||
			size = PAGE_SIZE * 6;
 | 
			
		||||
		else
 | 
			
		||||
			size = AMDGPU_IB_POOL_SIZE;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -175,7 +175,9 @@ static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev,
 | 
			
		|||
		cur_rptr += ih->ptr_mask + 1;
 | 
			
		||||
	*prev_rptr = cur_rptr;
 | 
			
		||||
 | 
			
		||||
	return cur_rptr >= checkpoint_wptr;
 | 
			
		||||
	/* check ring is empty to workaround missing wptr overflow flag */
 | 
			
		||||
	return cur_rptr >= checkpoint_wptr ||
 | 
			
		||||
	       (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -986,7 +986,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 | 
			
		|||
 | 
			
		||||
		if (!ras)
 | 
			
		||||
			return -EINVAL;
 | 
			
		||||
		ras_mask = (uint64_t)ras->supported << 32 | ras->features;
 | 
			
		||||
		ras_mask = (uint64_t)adev->ras_enabled << 32 | ras->features;
 | 
			
		||||
 | 
			
		||||
		return copy_to_user(out, &ras_mask,
 | 
			
		||||
				min_t(u64, size, sizeof(ras_mask))) ?
 | 
			
		||||
| 
						 | 
				
			
			@ -1114,7 +1114,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 | 
			
		|||
		dev_warn(adev->dev, "No more PASIDs available!");
 | 
			
		||||
		pasid = 0;
 | 
			
		||||
	}
 | 
			
		||||
	r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid);
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_vm_init(adev, &fpriv->vm, pasid);
 | 
			
		||||
	if (r)
 | 
			
		||||
		goto error_pasid;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -28,6 +28,7 @@ struct amdgpu_mmhub_ras_funcs {
 | 
			
		|||
				      void *ras_error_status);
 | 
			
		||||
	void (*query_ras_error_status)(struct amdgpu_device *adev);
 | 
			
		||||
	void (*reset_ras_error_count)(struct amdgpu_device *adev);
 | 
			
		||||
	void (*reset_ras_error_status)(struct amdgpu_device *adev);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct amdgpu_mmhub_funcs {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -155,3 +155,89 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 | 
			
		|||
	mmu_interval_notifier_remove(&bo->notifier);
 | 
			
		||||
	bo->notifier.mm = NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
 | 
			
		||||
			       struct mm_struct *mm, struct page **pages,
 | 
			
		||||
			       uint64_t start, uint64_t npages,
 | 
			
		||||
			       struct hmm_range **phmm_range, bool readonly,
 | 
			
		||||
			       bool mmap_locked)
 | 
			
		||||
{
 | 
			
		||||
	struct hmm_range *hmm_range;
 | 
			
		||||
	unsigned long timeout;
 | 
			
		||||
	unsigned long i;
 | 
			
		||||
	unsigned long *pfns;
 | 
			
		||||
	int r = 0;
 | 
			
		||||
 | 
			
		||||
	hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL);
 | 
			
		||||
	if (unlikely(!hmm_range))
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
 | 
			
		||||
	if (unlikely(!pfns)) {
 | 
			
		||||
		r = -ENOMEM;
 | 
			
		||||
		goto out_free_range;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	hmm_range->notifier = notifier;
 | 
			
		||||
	hmm_range->default_flags = HMM_PFN_REQ_FAULT;
 | 
			
		||||
	if (!readonly)
 | 
			
		||||
		hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
 | 
			
		||||
	hmm_range->hmm_pfns = pfns;
 | 
			
		||||
	hmm_range->start = start;
 | 
			
		||||
	hmm_range->end = start + npages * PAGE_SIZE;
 | 
			
		||||
 | 
			
		||||
	/* Assuming 512MB takes maxmium 1 second to fault page address */
 | 
			
		||||
	timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT;
 | 
			
		||||
	timeout = jiffies + msecs_to_jiffies(timeout);
 | 
			
		||||
 | 
			
		||||
retry:
 | 
			
		||||
	hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
 | 
			
		||||
 | 
			
		||||
	if (likely(!mmap_locked))
 | 
			
		||||
		mmap_read_lock(mm);
 | 
			
		||||
 | 
			
		||||
	r = hmm_range_fault(hmm_range);
 | 
			
		||||
 | 
			
		||||
	if (likely(!mmap_locked))
 | 
			
		||||
		mmap_read_unlock(mm);
 | 
			
		||||
	if (unlikely(r)) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * FIXME: This timeout should encompass the retry from
 | 
			
		||||
		 * mmu_interval_read_retry() as well.
 | 
			
		||||
		 */
 | 
			
		||||
		if (r == -EBUSY && !time_after(jiffies, timeout))
 | 
			
		||||
			goto retry;
 | 
			
		||||
		goto out_free_pfns;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Due to default_flags, all pages are HMM_PFN_VALID or
 | 
			
		||||
	 * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
 | 
			
		||||
	 * the notifier_lock, and mmu_interval_read_retry() must be done first.
 | 
			
		||||
	 */
 | 
			
		||||
	for (i = 0; pages && i < npages; i++)
 | 
			
		||||
		pages[i] = hmm_pfn_to_page(pfns[i]);
 | 
			
		||||
 | 
			
		||||
	*phmm_range = hmm_range;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
 | 
			
		||||
out_free_pfns:
 | 
			
		||||
	kvfree(pfns);
 | 
			
		||||
out_free_range:
 | 
			
		||||
	kfree(hmm_range);
 | 
			
		||||
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
 | 
			
		||||
{
 | 
			
		||||
	int r;
 | 
			
		||||
 | 
			
		||||
	r = mmu_interval_read_retry(hmm_range->notifier,
 | 
			
		||||
				    hmm_range->notifier_seq);
 | 
			
		||||
	kvfree(hmm_range->hmm_pfns);
 | 
			
		||||
	kfree(hmm_range);
 | 
			
		||||
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -30,6 +30,13 @@
 | 
			
		|||
#include <linux/workqueue.h>
 | 
			
		||||
#include <linux/interval_tree.h>
 | 
			
		||||
 | 
			
		||||
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
 | 
			
		||||
			       struct mm_struct *mm, struct page **pages,
 | 
			
		||||
			       uint64_t start, uint64_t npages,
 | 
			
		||||
			       struct hmm_range **phmm_range, bool readonly,
 | 
			
		||||
			       bool mmap_locked);
 | 
			
		||||
int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
 | 
			
		||||
 | 
			
		||||
#if defined(CONFIG_HMM_MIRROR)
 | 
			
		||||
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 | 
			
		||||
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -491,7 +491,18 @@ bool amdgpu_bo_support_uswc(u64 bo_flags)
 | 
			
		|||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_bo_create - create an &amdgpu_bo buffer object
 | 
			
		||||
 * @adev: amdgpu device object
 | 
			
		||||
 * @bp: parameters to be used for the buffer object
 | 
			
		||||
 * @bo_ptr: pointer to the buffer object pointer
 | 
			
		||||
 *
 | 
			
		||||
 * Creates an &amdgpu_bo buffer object.
 | 
			
		||||
 *
 | 
			
		||||
 * Returns:
 | 
			
		||||
 * 0 for success or a negative error code on failure.
 | 
			
		||||
 */
 | 
			
		||||
int amdgpu_bo_create(struct amdgpu_device *adev,
 | 
			
		||||
			       struct amdgpu_bo_param *bp,
 | 
			
		||||
			       struct amdgpu_bo **bo_ptr)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -601,9 +612,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 | 
			
		|||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 | 
			
		||||
				   unsigned long size,
 | 
			
		||||
				   struct amdgpu_bo *bo)
 | 
			
		||||
int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 | 
			
		||||
			    unsigned long size,
 | 
			
		||||
			    struct amdgpu_bo *bo)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_bo_param bp;
 | 
			
		||||
	int r;
 | 
			
		||||
| 
						 | 
				
			
			@ -614,13 +625,12 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 | 
			
		|||
	memset(&bp, 0, sizeof(bp));
 | 
			
		||||
	bp.size = size;
 | 
			
		||||
	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
 | 
			
		||||
	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
 | 
			
		||||
		AMDGPU_GEM_CREATE_SHADOW;
 | 
			
		||||
	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 | 
			
		||||
	bp.type = ttm_bo_type_kernel;
 | 
			
		||||
	bp.resv = bo->tbo.base.resv;
 | 
			
		||||
	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
 | 
			
		||||
	r = amdgpu_bo_create(adev, &bp, &bo->shadow);
 | 
			
		||||
	if (!r) {
 | 
			
		||||
		bo->shadow->parent = amdgpu_bo_ref(bo);
 | 
			
		||||
		mutex_lock(&adev->shadow_list_lock);
 | 
			
		||||
| 
						 | 
				
			
			@ -631,50 +641,6 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 | 
			
		|||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_bo_create - create an &amdgpu_bo buffer object
 | 
			
		||||
 * @adev: amdgpu device object
 | 
			
		||||
 * @bp: parameters to be used for the buffer object
 | 
			
		||||
 * @bo_ptr: pointer to the buffer object pointer
 | 
			
		||||
 *
 | 
			
		||||
 * Creates an &amdgpu_bo buffer object; and if requested, also creates a
 | 
			
		||||
 * shadow object.
 | 
			
		||||
 * Shadow object is used to backup the original buffer object, and is always
 | 
			
		||||
 * in GTT.
 | 
			
		||||
 *
 | 
			
		||||
 * Returns:
 | 
			
		||||
 * 0 for success or a negative error code on failure.
 | 
			
		||||
 */
 | 
			
		||||
int amdgpu_bo_create(struct amdgpu_device *adev,
 | 
			
		||||
		     struct amdgpu_bo_param *bp,
 | 
			
		||||
		     struct amdgpu_bo **bo_ptr)
 | 
			
		||||
{
 | 
			
		||||
	u64 flags = bp->flags;
 | 
			
		||||
	int r;
 | 
			
		||||
 | 
			
		||||
	bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_bo_do_create(adev, bp, bo_ptr);
 | 
			
		||||
	if (r)
 | 
			
		||||
		return r;
 | 
			
		||||
 | 
			
		||||
	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) {
 | 
			
		||||
		if (!bp->resv)
 | 
			
		||||
			WARN_ON(dma_resv_lock((*bo_ptr)->tbo.base.resv,
 | 
			
		||||
							NULL));
 | 
			
		||||
 | 
			
		||||
		r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr);
 | 
			
		||||
 | 
			
		||||
		if (!bp->resv)
 | 
			
		||||
			dma_resv_unlock((*bo_ptr)->tbo.base.resv);
 | 
			
		||||
 | 
			
		||||
		if (r)
 | 
			
		||||
			amdgpu_bo_unref(bo_ptr);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_bo_create_user - create an &amdgpu_bo_user buffer object
 | 
			
		||||
 * @adev: amdgpu device object
 | 
			
		||||
| 
						 | 
				
			
			@ -694,9 +660,8 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev,
 | 
			
		|||
	struct amdgpu_bo *bo_ptr;
 | 
			
		||||
	int r;
 | 
			
		||||
 | 
			
		||||
	bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
 | 
			
		||||
	bp->bo_ptr_size = sizeof(struct amdgpu_bo_user);
 | 
			
		||||
	r = amdgpu_bo_do_create(adev, bp, &bo_ptr);
 | 
			
		||||
	r = amdgpu_bo_create(adev, bp, &bo_ptr);
 | 
			
		||||
	if (r)
 | 
			
		||||
		return r;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1595,7 +1560,6 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
 | 
			
		|||
	amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS);
 | 
			
		||||
	amdgpu_bo_print_flag(m, bo, CPU_GTT_USWC);
 | 
			
		||||
	amdgpu_bo_print_flag(m, bo, VRAM_CLEARED);
 | 
			
		||||
	amdgpu_bo_print_flag(m, bo, SHADOW);
 | 
			
		||||
	amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
 | 
			
		||||
	amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID);
 | 
			
		||||
	amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -37,6 +37,10 @@
 | 
			
		|||
#define AMDGPU_BO_INVALID_OFFSET	LONG_MAX
 | 
			
		||||
#define AMDGPU_BO_MAX_PLACEMENTS	3
 | 
			
		||||
 | 
			
		||||
/* BO flag to indicate a KFD userptr BO */
 | 
			
		||||
#define AMDGPU_AMDKFD_CREATE_USERPTR_BO	(1ULL << 63)
 | 
			
		||||
#define AMDGPU_AMDKFD_CREATE_SVM_BO	(1ULL << 62)
 | 
			
		||||
 | 
			
		||||
#define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
 | 
			
		||||
 | 
			
		||||
struct amdgpu_bo_param {
 | 
			
		||||
| 
						 | 
				
			
			@ -267,6 +271,9 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev,
 | 
			
		|||
			  struct amdgpu_bo_user **ubo_ptr);
 | 
			
		||||
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
 | 
			
		||||
			   void **cpu_addr);
 | 
			
		||||
int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 | 
			
		||||
			    unsigned long size,
 | 
			
		||||
			    struct amdgpu_bo *bo);
 | 
			
		||||
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
 | 
			
		||||
void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
 | 
			
		||||
void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -417,31 +417,12 @@ static int psp_tmr_init(struct psp_context *psp)
 | 
			
		|||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int psp_clear_vf_fw(struct psp_context *psp)
 | 
			
		||||
{
 | 
			
		||||
	int ret;
 | 
			
		||||
	struct psp_gfx_cmd_resp *cmd;
 | 
			
		||||
 | 
			
		||||
	if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type != CHIP_NAVI12)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
 | 
			
		||||
	if (!cmd)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW;
 | 
			
		||||
 | 
			
		||||
	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
 | 
			
		||||
	kfree(cmd);
 | 
			
		||||
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool psp_skip_tmr(struct psp_context *psp)
 | 
			
		||||
{
 | 
			
		||||
	switch (psp->adev->asic_type) {
 | 
			
		||||
	case CHIP_NAVI12:
 | 
			
		||||
	case CHIP_SIENNA_CICHLID:
 | 
			
		||||
	case CHIP_ALDEBARAN:
 | 
			
		||||
		return true;
 | 
			
		||||
	default:
 | 
			
		||||
		return false;
 | 
			
		||||
| 
						 | 
				
			
			@ -1037,6 +1018,13 @@ static int psp_ras_load(struct psp_context *psp)
 | 
			
		|||
	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
 | 
			
		||||
	memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size);
 | 
			
		||||
 | 
			
		||||
	ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
 | 
			
		||||
 | 
			
		||||
	if (psp->adev->gmc.xgmi.connected_to_cpu)
 | 
			
		||||
		ras_cmd->ras_in_message.init_flags.poison_mode_en = 1;
 | 
			
		||||
	else
 | 
			
		||||
		ras_cmd->ras_in_message.init_flags.dgpu_mode = 1;
 | 
			
		||||
 | 
			
		||||
	psp_prep_ta_load_cmd_buf(cmd,
 | 
			
		||||
				 psp->fw_pri_mc_addr,
 | 
			
		||||
				 psp->ta_ras_ucode_size,
 | 
			
		||||
| 
						 | 
				
			
			@ -1046,8 +1034,6 @@ static int psp_ras_load(struct psp_context *psp)
 | 
			
		|||
	ret = psp_cmd_submit_buf(psp, NULL, cmd,
 | 
			
		||||
			psp->fence_buf_mc_addr);
 | 
			
		||||
 | 
			
		||||
	ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
 | 
			
		||||
 | 
			
		||||
	if (!ret) {
 | 
			
		||||
		psp->ras.session_id = cmd->resp.session_id;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1128,6 +1114,31 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
 | 
			
		|||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int psp_ras_status_to_errno(struct amdgpu_device *adev,
 | 
			
		||||
					 enum ta_ras_status ras_status)
 | 
			
		||||
{
 | 
			
		||||
	int ret = -EINVAL;
 | 
			
		||||
 | 
			
		||||
	switch (ras_status) {
 | 
			
		||||
	case TA_RAS_STATUS__SUCCESS:
 | 
			
		||||
		ret = 0;
 | 
			
		||||
		break;
 | 
			
		||||
	case TA_RAS_STATUS__RESET_NEEDED:
 | 
			
		||||
		ret = -EAGAIN;
 | 
			
		||||
		break;
 | 
			
		||||
	case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE:
 | 
			
		||||
		dev_warn(adev->dev, "RAS WARN: ras function unavailable\n");
 | 
			
		||||
		break;
 | 
			
		||||
	case TA_RAS_STATUS__ERROR_ASD_READ_WRITE:
 | 
			
		||||
		dev_warn(adev->dev, "RAS WARN: asd read or write failed\n");
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		dev_err(adev->dev, "RAS ERROR: ras function failed ret 0x%X\n", ret);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int psp_ras_enable_features(struct psp_context *psp,
 | 
			
		||||
		union ta_ras_cmd_input *info, bool enable)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -1151,7 +1162,7 @@ int psp_ras_enable_features(struct psp_context *psp,
 | 
			
		|||
	if (ret)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	return ras_cmd->ras_status;
 | 
			
		||||
	return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int psp_ras_terminate(struct psp_context *psp)
 | 
			
		||||
| 
						 | 
				
			
			@ -1234,7 +1245,7 @@ int psp_ras_trigger_error(struct psp_context *psp,
 | 
			
		|||
	if (amdgpu_ras_intr_triggered())
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	return ras_cmd->ras_status;
 | 
			
		||||
	return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status);
 | 
			
		||||
}
 | 
			
		||||
// ras end
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1920,12 +1931,6 @@ static int psp_hw_start(struct psp_context *psp)
 | 
			
		|||
		return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ret = psp_clear_vf_fw(psp);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
		DRM_ERROR("PSP clear vf fw!\n");
 | 
			
		||||
		return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ret = psp_boot_config_set(adev);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
		DRM_WARN("PSP set boot config@\n");
 | 
			
		||||
| 
						 | 
				
			
			@ -2166,7 +2171,7 @@ static int psp_load_smu_fw(struct psp_context *psp)
 | 
			
		|||
		return 0;
 | 
			
		||||
 | 
			
		||||
	if ((amdgpu_in_reset(adev) &&
 | 
			
		||||
	     ras && ras->supported &&
 | 
			
		||||
	     ras && adev->ras_enabled &&
 | 
			
		||||
	     (adev->asic_type == CHIP_ARCTURUS ||
 | 
			
		||||
	      adev->asic_type == CHIP_VEGA20)) ||
 | 
			
		||||
	     (adev->in_runpm &&
 | 
			
		||||
| 
						 | 
				
			
			@ -2434,7 +2439,6 @@ static int psp_hw_fini(void *handle)
 | 
			
		|||
{
 | 
			
		||||
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 | 
			
		||||
	struct psp_context *psp = &adev->psp;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	if (psp->adev->psp.ta_fw) {
 | 
			
		||||
		psp_ras_terminate(psp);
 | 
			
		||||
| 
						 | 
				
			
			@ -2445,11 +2449,6 @@ static int psp_hw_fini(void *handle)
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	psp_asd_unload(psp);
 | 
			
		||||
	ret = psp_clear_vf_fw(psp);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
		DRM_ERROR("PSP clear vf fw!\n");
 | 
			
		||||
		return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	psp_tmr_terminate(psp);
 | 
			
		||||
	psp_ring_destroy(psp, PSP_RING_TYPE__KM);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -33,6 +33,7 @@
 | 
			
		|||
#include "amdgpu_atomfirmware.h"
 | 
			
		||||
#include "amdgpu_xgmi.h"
 | 
			
		||||
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
 | 
			
		||||
#include "atom.h"
 | 
			
		||||
 | 
			
		||||
static const char *RAS_FS_NAME = "ras";
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -320,11 +321,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
 | 
			
		|||
 * "disable" requires only the block.
 | 
			
		||||
 * "enable" requires the block and error type.
 | 
			
		||||
 * "inject" requires the block, error type, address, and value.
 | 
			
		||||
 *
 | 
			
		||||
 * The block is one of: umc, sdma, gfx, etc.
 | 
			
		||||
 *	see ras_block_string[] for details
 | 
			
		||||
 *
 | 
			
		||||
 * The error type is one of: ue, ce, where,
 | 
			
		||||
 *	ue is multi-uncorrectable
 | 
			
		||||
 *	ce is single-correctable
 | 
			
		||||
 *
 | 
			
		||||
 * The sub-block is a the sub-block index, pass 0 if there is no sub-block.
 | 
			
		||||
 * The address and value are hexadecimal numbers, leading 0x is optional.
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -531,7 +535,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
 | 
			
		|||
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 | 
			
		||||
	struct ras_manager *obj;
 | 
			
		||||
 | 
			
		||||
	if (!adev->ras_features || !con)
 | 
			
		||||
	if (!adev->ras_enabled || !con)
 | 
			
		||||
		return NULL;
 | 
			
		||||
 | 
			
		||||
	if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
 | 
			
		||||
| 
						 | 
				
			
			@ -558,7 +562,7 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
 | 
			
		|||
	struct ras_manager *obj;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	if (!adev->ras_features || !con)
 | 
			
		||||
	if (!adev->ras_enabled || !con)
 | 
			
		||||
		return NULL;
 | 
			
		||||
 | 
			
		||||
	if (head) {
 | 
			
		||||
| 
						 | 
				
			
			@ -585,36 +589,11 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
 | 
			
		|||
}
 | 
			
		||||
/* obj end */
 | 
			
		||||
 | 
			
		||||
static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev,
 | 
			
		||||
					 const char* invoke_type,
 | 
			
		||||
					 const char* block_name,
 | 
			
		||||
					 enum ta_ras_status ret)
 | 
			
		||||
{
 | 
			
		||||
	switch (ret) {
 | 
			
		||||
	case TA_RAS_STATUS__SUCCESS:
 | 
			
		||||
		return;
 | 
			
		||||
	case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE:
 | 
			
		||||
		dev_warn(adev->dev,
 | 
			
		||||
			"RAS WARN: %s %s currently unavailable\n",
 | 
			
		||||
			invoke_type,
 | 
			
		||||
			block_name);
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		dev_err(adev->dev,
 | 
			
		||||
			"RAS ERROR: %s %s error failed ret 0x%X\n",
 | 
			
		||||
			invoke_type,
 | 
			
		||||
			block_name,
 | 
			
		||||
			ret);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* feature ctl begin */
 | 
			
		||||
static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
 | 
			
		||||
		struct ras_common_if *head)
 | 
			
		||||
					 struct ras_common_if *head)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 | 
			
		||||
 | 
			
		||||
	return con->hw_supported & BIT(head->block);
 | 
			
		||||
	return adev->ras_hw_enabled & BIT(head->block);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
 | 
			
		||||
| 
						 | 
				
			
			@ -658,11 +637,7 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 | 
			
		|||
		con->features |= BIT(head->block);
 | 
			
		||||
	} else {
 | 
			
		||||
		if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
 | 
			
		||||
			/* skip clean gfx ras context feature for VEGA20 Gaming.
 | 
			
		||||
			 * will clean later
 | 
			
		||||
			 */
 | 
			
		||||
			if (!(!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)))
 | 
			
		||||
				con->features &= ~BIT(head->block);
 | 
			
		||||
			con->features &= ~BIT(head->block);
 | 
			
		||||
			put_obj(obj);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -708,15 +683,10 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 | 
			
		|||
	if (!amdgpu_ras_intr_triggered()) {
 | 
			
		||||
		ret = psp_ras_enable_features(&adev->psp, info, enable);
 | 
			
		||||
		if (ret) {
 | 
			
		||||
			amdgpu_ras_parse_status_code(adev,
 | 
			
		||||
						     enable ? "enable":"disable",
 | 
			
		||||
						     ras_block_str(head->block),
 | 
			
		||||
						    (enum ta_ras_status)ret);
 | 
			
		||||
			if (ret == TA_RAS_STATUS__RESET_NEEDED)
 | 
			
		||||
				ret = -EAGAIN;
 | 
			
		||||
			else
 | 
			
		||||
				ret = -EINVAL;
 | 
			
		||||
 | 
			
		||||
			dev_err(adev->dev, "ras %s %s failed %d\n",
 | 
			
		||||
				enable ? "enable":"disable",
 | 
			
		||||
				ras_block_str(head->block),
 | 
			
		||||
				ret);
 | 
			
		||||
			goto out;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -770,6 +740,10 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
 | 
			
		|||
				con->features |= BIT(head->block);
 | 
			
		||||
 | 
			
		||||
			ret = amdgpu_ras_feature_enable(adev, head, 0);
 | 
			
		||||
 | 
			
		||||
			/* clean gfx block ras features flag */
 | 
			
		||||
			if (adev->ras_enabled && head->block == AMDGPU_RAS_BLOCK__GFX)
 | 
			
		||||
				con->features &= ~BIT(head->block);
 | 
			
		||||
		}
 | 
			
		||||
	} else
 | 
			
		||||
		ret = amdgpu_ras_feature_enable(adev, head, enable);
 | 
			
		||||
| 
						 | 
				
			
			@ -890,6 +864,11 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 | 
			
		|||
		    adev->gmc.xgmi.ras_funcs->query_ras_error_count)
 | 
			
		||||
			adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data);
 | 
			
		||||
		break;
 | 
			
		||||
	case AMDGPU_RAS_BLOCK__HDP:
 | 
			
		||||
		if (adev->hdp.ras_funcs &&
 | 
			
		||||
		    adev->hdp.ras_funcs->query_ras_error_count)
 | 
			
		||||
			adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data);
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -901,17 +880,42 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 | 
			
		|||
	info->ce_count = obj->err_data.ce_count;
 | 
			
		||||
 | 
			
		||||
	if (err_data.ce_count) {
 | 
			
		||||
		dev_info(adev->dev, "%ld correctable hardware errors "
 | 
			
		||||
		if (adev->smuio.funcs &&
 | 
			
		||||
		    adev->smuio.funcs->get_socket_id &&
 | 
			
		||||
		    adev->smuio.funcs->get_die_id) {
 | 
			
		||||
			dev_info(adev->dev, "socket: %d, die: %d "
 | 
			
		||||
					"%ld correctable hardware errors "
 | 
			
		||||
					"detected in %s block, no user "
 | 
			
		||||
					"action is needed.\n",
 | 
			
		||||
					adev->smuio.funcs->get_socket_id(adev),
 | 
			
		||||
					adev->smuio.funcs->get_die_id(adev),
 | 
			
		||||
					obj->err_data.ce_count,
 | 
			
		||||
					ras_block_str(info->head.block));
 | 
			
		||||
		} else {
 | 
			
		||||
			dev_info(adev->dev, "%ld correctable hardware errors "
 | 
			
		||||
					"detected in %s block, no user "
 | 
			
		||||
					"action is needed.\n",
 | 
			
		||||
					obj->err_data.ce_count,
 | 
			
		||||
					ras_block_str(info->head.block));
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if (err_data.ue_count) {
 | 
			
		||||
		dev_info(adev->dev, "%ld uncorrectable hardware errors "
 | 
			
		||||
		if (adev->smuio.funcs &&
 | 
			
		||||
		    adev->smuio.funcs->get_socket_id &&
 | 
			
		||||
		    adev->smuio.funcs->get_die_id) {
 | 
			
		||||
			dev_info(adev->dev, "socket: %d, die: %d "
 | 
			
		||||
					"%ld uncorrectable hardware errors "
 | 
			
		||||
					"detected in %s block\n",
 | 
			
		||||
					adev->smuio.funcs->get_socket_id(adev),
 | 
			
		||||
					adev->smuio.funcs->get_die_id(adev),
 | 
			
		||||
					obj->err_data.ue_count,
 | 
			
		||||
					ras_block_str(info->head.block));
 | 
			
		||||
		} else {
 | 
			
		||||
			dev_info(adev->dev, "%ld uncorrectable hardware errors "
 | 
			
		||||
					"detected in %s block\n",
 | 
			
		||||
					obj->err_data.ue_count,
 | 
			
		||||
					ras_block_str(info->head.block));
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -937,11 +941,20 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
 | 
			
		|||
		if (adev->mmhub.ras_funcs &&
 | 
			
		||||
		    adev->mmhub.ras_funcs->reset_ras_error_count)
 | 
			
		||||
			adev->mmhub.ras_funcs->reset_ras_error_count(adev);
 | 
			
		||||
 | 
			
		||||
		if (adev->mmhub.ras_funcs &&
 | 
			
		||||
		    adev->mmhub.ras_funcs->reset_ras_error_status)
 | 
			
		||||
			adev->mmhub.ras_funcs->reset_ras_error_status(adev);
 | 
			
		||||
		break;
 | 
			
		||||
	case AMDGPU_RAS_BLOCK__SDMA:
 | 
			
		||||
		if (adev->sdma.funcs->reset_ras_error_count)
 | 
			
		||||
			adev->sdma.funcs->reset_ras_error_count(adev);
 | 
			
		||||
		break;
 | 
			
		||||
	case AMDGPU_RAS_BLOCK__HDP:
 | 
			
		||||
		if (adev->hdp.ras_funcs &&
 | 
			
		||||
		    adev->hdp.ras_funcs->reset_ras_error_count)
 | 
			
		||||
			adev->hdp.ras_funcs->reset_ras_error_count(adev);
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -1022,10 +1035,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 | 
			
		|||
		ret = -EINVAL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	amdgpu_ras_parse_status_code(adev,
 | 
			
		||||
				     "inject",
 | 
			
		||||
				     ras_block_str(info->head.block),
 | 
			
		||||
				     (enum ta_ras_status)ret);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		dev_err(adev->dev, "ras inject %s failed %d\n",
 | 
			
		||||
			ras_block_str(info->head.block), ret);
 | 
			
		||||
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1038,7 +1050,7 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
 | 
			
		|||
	struct ras_manager *obj;
 | 
			
		||||
	struct ras_err_data data = {0, 0};
 | 
			
		||||
 | 
			
		||||
	if (!adev->ras_features || !con)
 | 
			
		||||
	if (!adev->ras_enabled || !con)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	list_for_each_entry(obj, &con->head, node) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1265,8 +1277,8 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
 | 
			
		|||
static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 | 
			
		||||
	struct dentry *dir;
 | 
			
		||||
	struct drm_minor *minor = adev_to_drm(adev)->primary;
 | 
			
		||||
	struct drm_minor  *minor = adev_to_drm(adev)->primary;
 | 
			
		||||
	struct dentry     *dir;
 | 
			
		||||
 | 
			
		||||
	dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root);
 | 
			
		||||
	debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, dir, adev,
 | 
			
		||||
| 
						 | 
				
			
			@ -1275,6 +1287,8 @@ static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *
 | 
			
		|||
			    &amdgpu_ras_debugfs_eeprom_ops);
 | 
			
		||||
	debugfs_create_u32("bad_page_cnt_threshold", 0444, dir,
 | 
			
		||||
			   &con->bad_page_cnt_threshold);
 | 
			
		||||
	debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled);
 | 
			
		||||
	debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * After one uncorrectable error happens, usually GPU recovery will
 | 
			
		||||
| 
						 | 
				
			
			@ -1561,7 +1575,7 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
 | 
			
		|||
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 | 
			
		||||
	struct ras_manager *obj;
 | 
			
		||||
 | 
			
		||||
	if (!adev->ras_features || !con)
 | 
			
		||||
	if (!adev->ras_enabled || !con)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	list_for_each_entry(obj, &con->head, node) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1611,7 +1625,7 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
 | 
			
		|||
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 | 
			
		||||
	struct ras_manager *obj;
 | 
			
		||||
 | 
			
		||||
	if (!adev->ras_features || !con)
 | 
			
		||||
	if (!adev->ras_enabled || !con)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	list_for_each_entry(obj, &con->head, node) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1925,7 +1939,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
 | 
			
		|||
	bool exc_err_limit = false;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	if (adev->ras_features && con)
 | 
			
		||||
	if (adev->ras_enabled && con)
 | 
			
		||||
		data = &con->eh_data;
 | 
			
		||||
	else
 | 
			
		||||
		return 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -2028,6 +2042,23 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
 | 
			
		|||
		adev->asic_type == CHIP_SIENNA_CICHLID;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * this is workaround for vega20 workstation sku,
 | 
			
		||||
 * force enable gfx ras, ignore vbios gfx ras flag
 | 
			
		||||
 * due to GC EDC can not write
 | 
			
		||||
 */
 | 
			
		||||
static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	struct atom_context *ctx = adev->mode_info.atom_context;
 | 
			
		||||
 | 
			
		||||
	if (!ctx)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (strnstr(ctx->vbios_version, "D16406",
 | 
			
		||||
		    sizeof(ctx->vbios_version)))
 | 
			
		||||
		adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * check hardware's ras ability which will be saved in hw_supported.
 | 
			
		||||
 * if hardware does not support ras, we can skip some ras initializtion and
 | 
			
		||||
| 
						 | 
				
			
			@ -2037,11 +2068,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
 | 
			
		|||
 * we have to initialize ras as normal. but need check if operation is
 | 
			
		||||
 * allowed or not in each function.
 | 
			
		||||
 */
 | 
			
		||||
static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
 | 
			
		||||
		uint32_t *hw_supported, uint32_t *supported)
 | 
			
		||||
static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	*hw_supported = 0;
 | 
			
		||||
	*supported = 0;
 | 
			
		||||
	adev->ras_hw_enabled = adev->ras_enabled = 0;
 | 
			
		||||
 | 
			
		||||
	if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
 | 
			
		||||
	    !amdgpu_ras_asic_supported(adev))
 | 
			
		||||
| 
						 | 
				
			
			@ -2050,33 +2079,34 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
 | 
			
		|||
	if (!adev->gmc.xgmi.connected_to_cpu) {
 | 
			
		||||
		if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
 | 
			
		||||
			dev_info(adev->dev, "MEM ECC is active.\n");
 | 
			
		||||
			*hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC |
 | 
			
		||||
					1 << AMDGPU_RAS_BLOCK__DF);
 | 
			
		||||
			adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
 | 
			
		||||
						   1 << AMDGPU_RAS_BLOCK__DF);
 | 
			
		||||
		} else {
 | 
			
		||||
			dev_info(adev->dev, "MEM ECC is not presented.\n");
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
 | 
			
		||||
			dev_info(adev->dev, "SRAM ECC is active.\n");
 | 
			
		||||
			*hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
 | 
			
		||||
					1 << AMDGPU_RAS_BLOCK__DF);
 | 
			
		||||
			adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
 | 
			
		||||
						    1 << AMDGPU_RAS_BLOCK__DF);
 | 
			
		||||
		} else {
 | 
			
		||||
			dev_info(adev->dev, "SRAM ECC is not presented.\n");
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		/* driver only manages a few IP blocks RAS feature
 | 
			
		||||
		 * when GPU is connected cpu through XGMI */
 | 
			
		||||
		*hw_supported |= (1 << AMDGPU_RAS_BLOCK__GFX |
 | 
			
		||||
				1 << AMDGPU_RAS_BLOCK__SDMA |
 | 
			
		||||
				1 << AMDGPU_RAS_BLOCK__MMHUB);
 | 
			
		||||
		adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX |
 | 
			
		||||
					   1 << AMDGPU_RAS_BLOCK__SDMA |
 | 
			
		||||
					   1 << AMDGPU_RAS_BLOCK__MMHUB);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* hw_supported needs to be aligned with RAS block mask. */
 | 
			
		||||
	*hw_supported &= AMDGPU_RAS_BLOCK_MASK;
 | 
			
		||||
	amdgpu_ras_get_quirks(adev);
 | 
			
		||||
 | 
			
		||||
	*supported = amdgpu_ras_enable == 0 ?
 | 
			
		||||
			0 : *hw_supported & amdgpu_ras_mask;
 | 
			
		||||
	adev->ras_features = *supported;
 | 
			
		||||
	/* hw_supported needs to be aligned with RAS block mask. */
 | 
			
		||||
	adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
 | 
			
		||||
 | 
			
		||||
	adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
 | 
			
		||||
		adev->ras_hw_enabled & amdgpu_ras_mask;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int amdgpu_ras_init(struct amdgpu_device *adev)
 | 
			
		||||
| 
						 | 
				
			
			@ -2097,13 +2127,13 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 | 
			
		|||
 | 
			
		||||
	amdgpu_ras_set_context(adev, con);
 | 
			
		||||
 | 
			
		||||
	amdgpu_ras_check_supported(adev, &con->hw_supported,
 | 
			
		||||
			&con->supported);
 | 
			
		||||
	if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) {
 | 
			
		||||
	amdgpu_ras_check_supported(adev);
 | 
			
		||||
 | 
			
		||||
	if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) {
 | 
			
		||||
		/* set gfx block ras context feature for VEGA20 Gaming
 | 
			
		||||
		 * send ras disable cmd to ras ta during ras late init.
 | 
			
		||||
		 */
 | 
			
		||||
		if (!adev->ras_features && adev->asic_type == CHIP_VEGA20) {
 | 
			
		||||
		if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) {
 | 
			
		||||
			con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
 | 
			
		||||
 | 
			
		||||
			return 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -2153,8 +2183,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
 | 
			
		||||
			"hardware ability[%x] ras_mask[%x]\n",
 | 
			
		||||
			con->hw_supported, con->supported);
 | 
			
		||||
		 "hardware ability[%x] ras_mask[%x]\n",
 | 
			
		||||
		 adev->ras_hw_enabled, adev->ras_enabled);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
release_con:
 | 
			
		||||
	amdgpu_ras_set_context(adev, NULL);
 | 
			
		||||
| 
						 | 
				
			
			@ -2268,7 +2299,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev)
 | 
			
		|||
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 | 
			
		||||
	struct ras_manager *obj, *tmp;
 | 
			
		||||
 | 
			
		||||
	if (!adev->ras_features || !con) {
 | 
			
		||||
	if (!adev->ras_enabled || !con) {
 | 
			
		||||
		/* clean ras context for VEGA20 Gaming after send ras disable cmd */
 | 
			
		||||
		amdgpu_release_ras_context(adev);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2314,7 +2345,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
 | 
			
		|||
{
 | 
			
		||||
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 | 
			
		||||
 | 
			
		||||
	if (!adev->ras_features || !con)
 | 
			
		||||
	if (!adev->ras_enabled || !con)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	amdgpu_ras_disable_all_features(adev, 0);
 | 
			
		||||
| 
						 | 
				
			
			@ -2328,7 +2359,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
 | 
			
		|||
{
 | 
			
		||||
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 | 
			
		||||
 | 
			
		||||
	if (!adev->ras_features || !con)
 | 
			
		||||
	if (!adev->ras_enabled || !con)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	/* Need disable ras on all IPs here before ip [hw/sw]fini */
 | 
			
		||||
| 
						 | 
				
			
			@ -2341,7 +2372,7 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
 | 
			
		|||
{
 | 
			
		||||
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 | 
			
		||||
 | 
			
		||||
	if (!adev->ras_features || !con)
 | 
			
		||||
	if (!adev->ras_enabled || !con)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	amdgpu_ras_fs_fini(adev);
 | 
			
		||||
| 
						 | 
				
			
			@ -2360,10 +2391,8 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
 | 
			
		|||
 | 
			
		||||
void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	uint32_t hw_supported, supported;
 | 
			
		||||
 | 
			
		||||
	amdgpu_ras_check_supported(adev, &hw_supported, &supported);
 | 
			
		||||
	if (!hw_supported)
 | 
			
		||||
	amdgpu_ras_check_supported(adev);
 | 
			
		||||
	if (!adev->ras_hw_enabled)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
 | 
			
		||||
| 
						 | 
				
			
			@ -2392,7 +2421,7 @@ void amdgpu_release_ras_context(struct amdgpu_device *adev)
 | 
			
		|||
	if (!con)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
 | 
			
		||||
	if (!adev->ras_enabled && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
 | 
			
		||||
		con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX);
 | 
			
		||||
		amdgpu_ras_set_context(adev, NULL);
 | 
			
		||||
		kfree(con);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -313,9 +313,6 @@ struct ras_common_if {
 | 
			
		|||
struct amdgpu_ras {
 | 
			
		||||
	/* ras infrastructure */
 | 
			
		||||
	/* for ras itself. */
 | 
			
		||||
	uint32_t hw_supported;
 | 
			
		||||
	/* for IP to check its ras ability. */
 | 
			
		||||
	uint32_t supported;
 | 
			
		||||
	uint32_t features;
 | 
			
		||||
	struct list_head head;
 | 
			
		||||
	/* sysfs */
 | 
			
		||||
| 
						 | 
				
			
			@ -478,7 +475,7 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
 | 
			
		|||
 | 
			
		||||
	if (block >= AMDGPU_RAS_BLOCK_COUNT)
 | 
			
		||||
		return 0;
 | 
			
		||||
	return ras && (ras->supported & (1 << block));
 | 
			
		||||
	return ras && (adev->ras_enabled & (1 << block));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -158,6 +158,7 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
 | 
			
		|||
 * @irq_src: interrupt source to use for this ring
 | 
			
		||||
 * @irq_type: interrupt type to use for this ring
 | 
			
		||||
 * @hw_prio: ring priority (NORMAL/HIGH)
 | 
			
		||||
 * @sched_score: optional score atomic shared with other schedulers
 | 
			
		||||
 *
 | 
			
		||||
 * Initialize the driver information for the selected ring (all asics).
 | 
			
		||||
 * Returns 0 on success, error on failure.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -29,6 +29,7 @@ struct amdgpu_smuio_funcs {
 | 
			
		|||
	void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable);
 | 
			
		||||
	void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
 | 
			
		||||
	u32 (*get_die_id)(struct amdgpu_device *adev);
 | 
			
		||||
	u32 (*get_socket_id)(struct amdgpu_device *adev);
 | 
			
		||||
	bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -32,7 +32,6 @@
 | 
			
		|||
 | 
			
		||||
#include <linux/dma-mapping.h>
 | 
			
		||||
#include <linux/iommu.h>
 | 
			
		||||
#include <linux/hmm.h>
 | 
			
		||||
#include <linux/pagemap.h>
 | 
			
		||||
#include <linux/sched/task.h>
 | 
			
		||||
#include <linux/sched/mm.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -112,6 +111,20 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	abo = ttm_to_amdgpu_bo(bo);
 | 
			
		||||
	if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
 | 
			
		||||
		struct dma_fence *fence;
 | 
			
		||||
		struct dma_resv *resv = &bo->base._resv;
 | 
			
		||||
 | 
			
		||||
		rcu_read_lock();
 | 
			
		||||
		fence = rcu_dereference(resv->fence_excl);
 | 
			
		||||
		if (fence && !fence->ops->signaled)
 | 
			
		||||
			dma_fence_enable_sw_signaling(fence);
 | 
			
		||||
 | 
			
		||||
		placement->num_placement = 0;
 | 
			
		||||
		placement->num_busy_placement = 0;
 | 
			
		||||
		rcu_read_unlock();
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
	switch (bo->mem.mem_type) {
 | 
			
		||||
	case AMDGPU_PL_GDS:
 | 
			
		||||
	case AMDGPU_PL_GWS:
 | 
			
		||||
| 
						 | 
				
			
			@ -165,13 +178,6 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 | 
			
		|||
{
 | 
			
		||||
	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Don't verify access for KFD BOs. They don't have a GEM
 | 
			
		||||
	 * object associated with them.
 | 
			
		||||
	 */
 | 
			
		||||
	if (abo->kfd_bo)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	if (amdgpu_ttm_tt_get_usermm(bo->ttm))
 | 
			
		||||
		return -EPERM;
 | 
			
		||||
	return drm_vma_node_verify_access(&abo->tbo.base.vma_node,
 | 
			
		||||
| 
						 | 
				
			
			@ -288,7 +294,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
 | 
			
		||||
 * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
 | 
			
		||||
 * @adev: amdgpu device
 | 
			
		||||
 * @src: buffer/address where to read from
 | 
			
		||||
 * @dst: buffer/address where to write to
 | 
			
		||||
| 
						 | 
				
			
			@ -670,10 +676,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
 | 
			
		|||
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 | 
			
		||||
	unsigned long start = gtt->userptr;
 | 
			
		||||
	struct vm_area_struct *vma;
 | 
			
		||||
	struct hmm_range *range;
 | 
			
		||||
	unsigned long timeout;
 | 
			
		||||
	struct mm_struct *mm;
 | 
			
		||||
	unsigned long i;
 | 
			
		||||
	bool readonly;
 | 
			
		||||
	int r = 0;
 | 
			
		||||
 | 
			
		||||
	mm = bo->notifier.mm;
 | 
			
		||||
| 
						 | 
				
			
			@ -689,76 +693,26 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
 | 
			
		|||
	if (!mmget_not_zero(mm)) /* Happens during process shutdown */
 | 
			
		||||
		return -ESRCH;
 | 
			
		||||
 | 
			
		||||
	range = kzalloc(sizeof(*range), GFP_KERNEL);
 | 
			
		||||
	if (unlikely(!range)) {
 | 
			
		||||
		r = -ENOMEM;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
	range->notifier = &bo->notifier;
 | 
			
		||||
	range->start = bo->notifier.interval_tree.start;
 | 
			
		||||
	range->end = bo->notifier.interval_tree.last + 1;
 | 
			
		||||
	range->default_flags = HMM_PFN_REQ_FAULT;
 | 
			
		||||
	if (!amdgpu_ttm_tt_is_readonly(ttm))
 | 
			
		||||
		range->default_flags |= HMM_PFN_REQ_WRITE;
 | 
			
		||||
 | 
			
		||||
	range->hmm_pfns = kvmalloc_array(ttm->num_pages,
 | 
			
		||||
					 sizeof(*range->hmm_pfns), GFP_KERNEL);
 | 
			
		||||
	if (unlikely(!range->hmm_pfns)) {
 | 
			
		||||
		r = -ENOMEM;
 | 
			
		||||
		goto out_free_ranges;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	mmap_read_lock(mm);
 | 
			
		||||
	vma = find_vma(mm, start);
 | 
			
		||||
	mmap_read_unlock(mm);
 | 
			
		||||
	if (unlikely(!vma || start < vma->vm_start)) {
 | 
			
		||||
		r = -EFAULT;
 | 
			
		||||
		goto out_unlock;
 | 
			
		||||
		goto out_putmm;
 | 
			
		||||
	}
 | 
			
		||||
	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
 | 
			
		||||
		vma->vm_file)) {
 | 
			
		||||
		r = -EPERM;
 | 
			
		||||
		goto out_unlock;
 | 
			
		||||
	}
 | 
			
		||||
	mmap_read_unlock(mm);
 | 
			
		||||
	timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
 | 
			
		||||
 | 
			
		||||
retry:
 | 
			
		||||
	range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
 | 
			
		||||
 | 
			
		||||
	mmap_read_lock(mm);
 | 
			
		||||
	r = hmm_range_fault(range);
 | 
			
		||||
	mmap_read_unlock(mm);
 | 
			
		||||
	if (unlikely(r)) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * FIXME: This timeout should encompass the retry from
 | 
			
		||||
		 * mmu_interval_read_retry() as well.
 | 
			
		||||
		 */
 | 
			
		||||
		if (r == -EBUSY && !time_after(jiffies, timeout))
 | 
			
		||||
			goto retry;
 | 
			
		||||
		goto out_free_pfns;
 | 
			
		||||
		goto out_putmm;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Due to default_flags, all pages are HMM_PFN_VALID or
 | 
			
		||||
	 * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
 | 
			
		||||
	 * the notifier_lock, and mmu_interval_read_retry() must be done first.
 | 
			
		||||
	 */
 | 
			
		||||
	for (i = 0; i < ttm->num_pages; i++)
 | 
			
		||||
		pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
 | 
			
		||||
 | 
			
		||||
	gtt->range = range;
 | 
			
		||||
	readonly = amdgpu_ttm_tt_is_readonly(ttm);
 | 
			
		||||
	r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start,
 | 
			
		||||
				       ttm->num_pages, >t->range, readonly,
 | 
			
		||||
				       false);
 | 
			
		||||
out_putmm:
 | 
			
		||||
	mmput(mm);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
 | 
			
		||||
out_unlock:
 | 
			
		||||
	mmap_read_unlock(mm);
 | 
			
		||||
out_free_pfns:
 | 
			
		||||
	kvfree(range->hmm_pfns);
 | 
			
		||||
out_free_ranges:
 | 
			
		||||
	kfree(range);
 | 
			
		||||
out:
 | 
			
		||||
	mmput(mm);
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -787,10 +741,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
 | 
			
		|||
		 * FIXME: Must always hold notifier_lock for this, and must
 | 
			
		||||
		 * not ignore the return code.
 | 
			
		||||
		 */
 | 
			
		||||
		r = mmu_interval_read_retry(gtt->range->notifier,
 | 
			
		||||
					 gtt->range->notifier_seq);
 | 
			
		||||
		kvfree(gtt->range->hmm_pfns);
 | 
			
		||||
		kfree(gtt->range);
 | 
			
		||||
		r = amdgpu_hmm_range_get_pages_done(gtt->range);
 | 
			
		||||
		gtt->range = NULL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -50,9 +50,12 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 | 
			
		|||
	struct drm_device *ddev = adev_to_drm(adev);
 | 
			
		||||
 | 
			
		||||
	/* enable virtual display */
 | 
			
		||||
	if (adev->mode_info.num_crtc == 0)
 | 
			
		||||
		adev->mode_info.num_crtc = 1;
 | 
			
		||||
	adev->enable_virtual_display = true;
 | 
			
		||||
	if (adev->asic_type != CHIP_ALDEBARAN &&
 | 
			
		||||
	    adev->asic_type != CHIP_ARCTURUS) {
 | 
			
		||||
		if (adev->mode_info.num_crtc == 0)
 | 
			
		||||
			adev->mode_info.num_crtc = 1;
 | 
			
		||||
		adev->enable_virtual_display = true;
 | 
			
		||||
	}
 | 
			
		||||
	ddev->driver_features &= ~DRIVER_ATOMIC;
 | 
			
		||||
	adev->cg_flags = 0;
 | 
			
		||||
	adev->pg_flags = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -679,6 +682,7 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
 | 
			
		|||
		case CHIP_VEGA10:
 | 
			
		||||
		case CHIP_VEGA20:
 | 
			
		||||
		case CHIP_ARCTURUS:
 | 
			
		||||
		case CHIP_ALDEBARAN:
 | 
			
		||||
			soc15_set_virt_ops(adev);
 | 
			
		||||
			break;
 | 
			
		||||
		case CHIP_NAVI10:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -38,6 +38,7 @@
 | 
			
		|||
#include "amdgpu_gmc.h"
 | 
			
		||||
#include "amdgpu_xgmi.h"
 | 
			
		||||
#include "amdgpu_dma_buf.h"
 | 
			
		||||
#include "kfd_svm.h"
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * DOC: GPUVM
 | 
			
		||||
| 
						 | 
				
			
			@ -850,35 +851,60 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation
 | 
			
		||||
 * amdgpu_vm_pt_create - create bo for PD/PT
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu_device pointer
 | 
			
		||||
 * @vm: requesting vm
 | 
			
		||||
 * @level: the page table level
 | 
			
		||||
 * @immediate: use a immediate update
 | 
			
		||||
 * @bp: resulting BO allocation parameters
 | 
			
		||||
 * @bo: pointer to the buffer object pointer
 | 
			
		||||
 */
 | 
			
		||||
static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 | 
			
		||||
static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
 | 
			
		||||
			       struct amdgpu_vm *vm,
 | 
			
		||||
			       int level, bool immediate,
 | 
			
		||||
			       struct amdgpu_bo_param *bp)
 | 
			
		||||
			       struct amdgpu_bo **bo)
 | 
			
		||||
{
 | 
			
		||||
	memset(bp, 0, sizeof(*bp));
 | 
			
		||||
	struct amdgpu_bo_param bp;
 | 
			
		||||
	int r;
 | 
			
		||||
 | 
			
		||||
	bp->size = amdgpu_vm_bo_size(adev, level);
 | 
			
		||||
	bp->byte_align = AMDGPU_GPU_PAGE_SIZE;
 | 
			
		||||
	bp->domain = AMDGPU_GEM_DOMAIN_VRAM;
 | 
			
		||||
	bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain);
 | 
			
		||||
	bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 | 
			
		||||
	memset(&bp, 0, sizeof(bp));
 | 
			
		||||
 | 
			
		||||
	bp.size = amdgpu_vm_bo_size(adev, level);
 | 
			
		||||
	bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
 | 
			
		||||
	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
 | 
			
		||||
	bp.domain = amdgpu_bo_get_preferred_pin_domain(adev, bp.domain);
 | 
			
		||||
	bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 | 
			
		||||
		AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 | 
			
		||||
	bp->bo_ptr_size = sizeof(struct amdgpu_bo);
 | 
			
		||||
	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
 | 
			
		||||
	if (vm->use_cpu_for_update)
 | 
			
		||||
		bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 | 
			
		||||
	else if (!vm->root.base.bo || vm->root.base.bo->shadow)
 | 
			
		||||
		bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
 | 
			
		||||
	bp->type = ttm_bo_type_kernel;
 | 
			
		||||
	bp->no_wait_gpu = immediate;
 | 
			
		||||
		bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 | 
			
		||||
 | 
			
		||||
	bp.type = ttm_bo_type_kernel;
 | 
			
		||||
	bp.no_wait_gpu = immediate;
 | 
			
		||||
	if (vm->root.base.bo)
 | 
			
		||||
		bp->resv = vm->root.base.bo->tbo.base.resv;
 | 
			
		||||
		bp.resv = vm->root.base.bo->tbo.base.resv;
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_bo_create(adev, &bp, bo);
 | 
			
		||||
	if (r)
 | 
			
		||||
		return r;
 | 
			
		||||
 | 
			
		||||
	if (vm->is_compute_context && (adev->flags & AMD_IS_APU))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	if (!bp.resv)
 | 
			
		||||
		WARN_ON(dma_resv_lock((*bo)->tbo.base.resv,
 | 
			
		||||
				      NULL));
 | 
			
		||||
	r = amdgpu_bo_create_shadow(adev, bp.size, *bo);
 | 
			
		||||
 | 
			
		||||
	if (!bp.resv)
 | 
			
		||||
		dma_resv_unlock((*bo)->tbo.base.resv);
 | 
			
		||||
 | 
			
		||||
	if (r) {
 | 
			
		||||
		amdgpu_bo_unref(bo);
 | 
			
		||||
		return r;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			@ -901,7 +927,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 | 
			
		|||
			       bool immediate)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_vm_pt *entry = cursor->entry;
 | 
			
		||||
	struct amdgpu_bo_param bp;
 | 
			
		||||
	struct amdgpu_bo *pt;
 | 
			
		||||
	int r;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -919,9 +944,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 | 
			
		|||
	if (entry->base.bo)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp);
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_bo_create(adev, &bp, &pt);
 | 
			
		||||
	r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
 | 
			
		||||
	if (r)
 | 
			
		||||
		return r;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1593,15 +1616,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
 | 
			
		|||
 * Returns:
 | 
			
		||||
 * 0 for success, -EINVAL for failure.
 | 
			
		||||
 */
 | 
			
		||||
static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 | 
			
		||||
				       struct amdgpu_device *bo_adev,
 | 
			
		||||
				       struct amdgpu_vm *vm, bool immediate,
 | 
			
		||||
				       bool unlocked, struct dma_resv *resv,
 | 
			
		||||
				       uint64_t start, uint64_t last,
 | 
			
		||||
				       uint64_t flags, uint64_t offset,
 | 
			
		||||
				       struct drm_mm_node *nodes,
 | 
			
		||||
				       dma_addr_t *pages_addr,
 | 
			
		||||
				       struct dma_fence **fence)
 | 
			
		||||
int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 | 
			
		||||
				struct amdgpu_device *bo_adev,
 | 
			
		||||
				struct amdgpu_vm *vm, bool immediate,
 | 
			
		||||
				bool unlocked, struct dma_resv *resv,
 | 
			
		||||
				uint64_t start, uint64_t last,
 | 
			
		||||
				uint64_t flags, uint64_t offset,
 | 
			
		||||
				struct drm_mm_node *nodes,
 | 
			
		||||
				dma_addr_t *pages_addr,
 | 
			
		||||
				struct dma_fence **fence)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_vm_update_params params;
 | 
			
		||||
	enum amdgpu_sync_mode sync_mode;
 | 
			
		||||
| 
						 | 
				
			
			@ -2818,7 +2841,6 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
 | 
			
		|||
 *
 | 
			
		||||
 * @adev: amdgpu_device pointer
 | 
			
		||||
 * @vm: requested vm
 | 
			
		||||
 * @vm_context: Indicates if it GFX or Compute context
 | 
			
		||||
 * @pasid: Process address space identifier
 | 
			
		||||
 *
 | 
			
		||||
 * Init @vm fields.
 | 
			
		||||
| 
						 | 
				
			
			@ -2826,10 +2848,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
 | 
			
		|||
 * Returns:
 | 
			
		||||
 * 0 for success, error for failure.
 | 
			
		||||
 */
 | 
			
		||||
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 | 
			
		||||
		   int vm_context, u32 pasid)
 | 
			
		||||
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_bo_param bp;
 | 
			
		||||
	struct amdgpu_bo *root;
 | 
			
		||||
	int r, i;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2861,16 +2881,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 | 
			
		|||
	vm->pte_support_ats = false;
 | 
			
		||||
	vm->is_compute_context = false;
 | 
			
		||||
 | 
			
		||||
	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
 | 
			
		||||
		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
 | 
			
		||||
						AMDGPU_VM_USE_CPU_FOR_COMPUTE);
 | 
			
		||||
	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
 | 
			
		||||
				    AMDGPU_VM_USE_CPU_FOR_GFX);
 | 
			
		||||
 | 
			
		||||
		if (adev->asic_type == CHIP_RAVEN)
 | 
			
		||||
			vm->pte_support_ats = true;
 | 
			
		||||
	} else {
 | 
			
		||||
		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
 | 
			
		||||
						AMDGPU_VM_USE_CPU_FOR_GFX);
 | 
			
		||||
	}
 | 
			
		||||
	DRM_DEBUG_DRIVER("VM update mode is %s\n",
 | 
			
		||||
			 vm->use_cpu_for_update ? "CPU" : "SDMA");
 | 
			
		||||
	WARN_ONCE((vm->use_cpu_for_update &&
 | 
			
		||||
| 
						 | 
				
			
			@ -2887,10 +2900,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 | 
			
		|||
	mutex_init(&vm->eviction_lock);
 | 
			
		||||
	vm->evicting = false;
 | 
			
		||||
 | 
			
		||||
	amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
 | 
			
		||||
	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
 | 
			
		||||
		bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
 | 
			
		||||
	r = amdgpu_bo_create(adev, &bp, &root);
 | 
			
		||||
	r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
 | 
			
		||||
				false, &root);
 | 
			
		||||
	if (r)
 | 
			
		||||
		goto error_free_delayed;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -3349,6 +3360,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
 | 
			
		|||
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 | 
			
		||||
			    uint64_t addr)
 | 
			
		||||
{
 | 
			
		||||
	bool is_compute_context = false;
 | 
			
		||||
	struct amdgpu_bo *root;
 | 
			
		||||
	uint64_t value, flags;
 | 
			
		||||
	struct amdgpu_vm *vm;
 | 
			
		||||
| 
						 | 
				
			
			@ -3356,15 +3368,25 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 | 
			
		|||
 | 
			
		||||
	spin_lock(&adev->vm_manager.pasid_lock);
 | 
			
		||||
	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
 | 
			
		||||
	if (vm)
 | 
			
		||||
	if (vm) {
 | 
			
		||||
		root = amdgpu_bo_ref(vm->root.base.bo);
 | 
			
		||||
	else
 | 
			
		||||
		is_compute_context = vm->is_compute_context;
 | 
			
		||||
	} else {
 | 
			
		||||
		root = NULL;
 | 
			
		||||
	}
 | 
			
		||||
	spin_unlock(&adev->vm_manager.pasid_lock);
 | 
			
		||||
 | 
			
		||||
	if (!root)
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	addr /= AMDGPU_GPU_PAGE_SIZE;
 | 
			
		||||
 | 
			
		||||
	if (is_compute_context &&
 | 
			
		||||
	    !svm_range_restore_pages(adev, pasid, addr)) {
 | 
			
		||||
		amdgpu_bo_unref(&root);
 | 
			
		||||
		return true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_bo_reserve(root, true);
 | 
			
		||||
	if (r)
 | 
			
		||||
		goto error_unref;
 | 
			
		||||
| 
						 | 
				
			
			@ -3378,18 +3400,16 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 | 
			
		|||
	if (!vm)
 | 
			
		||||
		goto error_unlock;
 | 
			
		||||
 | 
			
		||||
	addr /= AMDGPU_GPU_PAGE_SIZE;
 | 
			
		||||
	flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
 | 
			
		||||
		AMDGPU_PTE_SYSTEM;
 | 
			
		||||
 | 
			
		||||
	if (vm->is_compute_context) {
 | 
			
		||||
	if (is_compute_context) {
 | 
			
		||||
		/* Intentionally setting invalid PTE flag
 | 
			
		||||
		 * combination to force a no-retry-fault
 | 
			
		||||
		 */
 | 
			
		||||
		flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
 | 
			
		||||
			AMDGPU_PTE_TF;
 | 
			
		||||
		value = 0;
 | 
			
		||||
 | 
			
		||||
	} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
 | 
			
		||||
		/* Redirect the access to the dummy page */
 | 
			
		||||
		value = adev->dummy_page_addr;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -121,9 +121,6 @@ struct amdgpu_bo_list_entry;
 | 
			
		|||
/* max vmids dedicated for process */
 | 
			
		||||
#define AMDGPU_VM_MAX_RESERVED_VMID	1
 | 
			
		||||
 | 
			
		||||
#define AMDGPU_VM_CONTEXT_GFX 0
 | 
			
		||||
#define AMDGPU_VM_CONTEXT_COMPUTE 1
 | 
			
		||||
 | 
			
		||||
/* See vm_update_mode */
 | 
			
		||||
#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
 | 
			
		||||
#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
 | 
			
		||||
| 
						 | 
				
			
			@ -367,6 +364,8 @@ struct amdgpu_vm_manager {
 | 
			
		|||
	spinlock_t				pasid_lock;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct amdgpu_bo_va_mapping;
 | 
			
		||||
 | 
			
		||||
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 | 
			
		||||
#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
 | 
			
		||||
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
 | 
			
		||||
| 
						 | 
				
			
			@ -378,8 +377,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev);
 | 
			
		|||
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 | 
			
		||||
 | 
			
		||||
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
 | 
			
		||||
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 | 
			
		||||
		   int vm_context, u32 pasid);
 | 
			
		||||
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid);
 | 
			
		||||
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid);
 | 
			
		||||
void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 | 
			
		||||
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 | 
			
		||||
| 
						 | 
				
			
			@ -398,6 +396,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 | 
			
		|||
			  struct dma_fence **fence);
 | 
			
		||||
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 | 
			
		||||
			   struct amdgpu_vm *vm);
 | 
			
		||||
int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 | 
			
		||||
				struct amdgpu_device *bo_adev,
 | 
			
		||||
				struct amdgpu_vm *vm, bool immediate,
 | 
			
		||||
				bool unlocked, struct dma_resv *resv,
 | 
			
		||||
				uint64_t start, uint64_t last,
 | 
			
		||||
				uint64_t flags, uint64_t offset,
 | 
			
		||||
				struct drm_mm_node *nodes,
 | 
			
		||||
				dma_addr_t *pages_addr,
 | 
			
		||||
				struct dma_fence **fence);
 | 
			
		||||
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 | 
			
		||||
			struct amdgpu_bo_va *bo_va,
 | 
			
		||||
			bool clear);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -29,12 +29,14 @@
 | 
			
		|||
#include "amdgpu_atomfirmware.h"
 | 
			
		||||
#include "atom.h"
 | 
			
		||||
 | 
			
		||||
static inline struct amdgpu_vram_mgr *to_vram_mgr(struct ttm_resource_manager *man)
 | 
			
		||||
static inline struct amdgpu_vram_mgr *
 | 
			
		||||
to_vram_mgr(struct ttm_resource_manager *man)
 | 
			
		||||
{
 | 
			
		||||
	return container_of(man, struct amdgpu_vram_mgr, manager);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline struct amdgpu_device *to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
 | 
			
		||||
static inline struct amdgpu_device *
 | 
			
		||||
to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
 | 
			
		||||
{
 | 
			
		||||
	return container_of(mgr, struct amdgpu_device, mman.vram_mgr);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -82,12 +84,14 @@ static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev,
 | 
			
		|||
 * amount of currently used VRAM in bytes
 | 
			
		||||
 */
 | 
			
		||||
static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
 | 
			
		||||
		struct device_attribute *attr, char *buf)
 | 
			
		||||
					      struct device_attribute *attr,
 | 
			
		||||
					      char *buf)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_device *ddev = dev_get_drvdata(dev);
 | 
			
		||||
	struct amdgpu_device *adev = drm_to_adev(ddev);
 | 
			
		||||
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
 | 
			
		||||
	struct ttm_resource_manager *man;
 | 
			
		||||
 | 
			
		||||
	man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
 | 
			
		||||
	return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_usage(man));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -100,18 +104,28 @@ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
 | 
			
		|||
 * amount of currently used visible VRAM in bytes
 | 
			
		||||
 */
 | 
			
		||||
static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev,
 | 
			
		||||
		struct device_attribute *attr, char *buf)
 | 
			
		||||
						  struct device_attribute *attr,
 | 
			
		||||
						  char *buf)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_device *ddev = dev_get_drvdata(dev);
 | 
			
		||||
	struct amdgpu_device *adev = drm_to_adev(ddev);
 | 
			
		||||
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
 | 
			
		||||
	struct ttm_resource_manager *man;
 | 
			
		||||
 | 
			
		||||
	man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
 | 
			
		||||
	return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_vis_usage(man));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * DOC: mem_info_vram_vendor
 | 
			
		||||
 *
 | 
			
		||||
 * The amdgpu driver provides a sysfs API for reporting the vendor of the
 | 
			
		||||
 * installed VRAM
 | 
			
		||||
 * The file mem_info_vram_vendor is used for this and returns the name of the
 | 
			
		||||
 * vendor.
 | 
			
		||||
 */
 | 
			
		||||
static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev,
 | 
			
		||||
						 struct device_attribute *attr,
 | 
			
		||||
						 char *buf)
 | 
			
		||||
					   struct device_attribute *attr,
 | 
			
		||||
					   char *buf)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_device *ddev = dev_get_drvdata(dev);
 | 
			
		||||
	struct amdgpu_device *adev = drm_to_adev(ddev);
 | 
			
		||||
| 
						 | 
				
			
			@ -162,78 +176,6 @@ static const struct attribute *amdgpu_vram_mgr_attributes[] = {
 | 
			
		|||
	NULL
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_vram_mgr_init - init VRAM manager and DRM MM
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu_device pointer
 | 
			
		||||
 *
 | 
			
		||||
 * Allocate and initialize the VRAM manager.
 | 
			
		||||
 */
 | 
			
		||||
int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
 | 
			
		||||
	struct ttm_resource_manager *man = &mgr->manager;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT);
 | 
			
		||||
 | 
			
		||||
	man->func = &amdgpu_vram_mgr_func;
 | 
			
		||||
 | 
			
		||||
	drm_mm_init(&mgr->mm, 0, man->size);
 | 
			
		||||
	spin_lock_init(&mgr->lock);
 | 
			
		||||
	INIT_LIST_HEAD(&mgr->reservations_pending);
 | 
			
		||||
	INIT_LIST_HEAD(&mgr->reserved_pages);
 | 
			
		||||
 | 
			
		||||
	/* Add the two VRAM-related sysfs files */
 | 
			
		||||
	ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		DRM_ERROR("Failed to register sysfs\n");
 | 
			
		||||
 | 
			
		||||
	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
 | 
			
		||||
	ttm_resource_manager_set_used(man, true);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_vram_mgr_fini - free and destroy VRAM manager
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu_device pointer
 | 
			
		||||
 *
 | 
			
		||||
 * Destroy and free the VRAM manager, returns -EBUSY if ranges are still
 | 
			
		||||
 * allocated inside it.
 | 
			
		||||
 */
 | 
			
		||||
void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
 | 
			
		||||
	struct ttm_resource_manager *man = &mgr->manager;
 | 
			
		||||
	int ret;
 | 
			
		||||
	struct amdgpu_vram_reservation *rsv, *temp;
 | 
			
		||||
 | 
			
		||||
	ttm_resource_manager_set_used(man, false);
 | 
			
		||||
 | 
			
		||||
	ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	spin_lock(&mgr->lock);
 | 
			
		||||
	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
 | 
			
		||||
		kfree(rsv);
 | 
			
		||||
 | 
			
		||||
	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
 | 
			
		||||
		drm_mm_remove_node(&rsv->mm_node);
 | 
			
		||||
		kfree(rsv);
 | 
			
		||||
	}
 | 
			
		||||
	drm_mm_takedown(&mgr->mm);
 | 
			
		||||
	spin_unlock(&mgr->lock);
 | 
			
		||||
 | 
			
		||||
	sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
 | 
			
		||||
 | 
			
		||||
	ttm_resource_manager_cleanup(man);
 | 
			
		||||
	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_vram_mgr_vis_size - Calculate visible node size
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -283,6 +225,7 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
 | 
			
		|||
	return usage;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Commit the reservation of VRAM pages */
 | 
			
		||||
static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 | 
			
		||||
| 
						 | 
				
			
			@ -415,13 +358,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 | 
			
		|||
			       const struct ttm_place *place,
 | 
			
		||||
			       struct ttm_resource *mem)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
 | 
			
		||||
	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 | 
			
		||||
	struct amdgpu_device *adev = to_amdgpu_device(mgr);
 | 
			
		||||
	struct drm_mm *mm = &mgr->mm;
 | 
			
		||||
	struct drm_mm_node *nodes;
 | 
			
		||||
	enum drm_mm_insert_mode mode;
 | 
			
		||||
	unsigned long lpfn, num_nodes, pages_per_node, pages_left;
 | 
			
		||||
	uint64_t vis_usage = 0, mem_bytes, max_bytes;
 | 
			
		||||
	struct drm_mm *mm = &mgr->mm;
 | 
			
		||||
	enum drm_mm_insert_mode mode;
 | 
			
		||||
	struct drm_mm_node *nodes;
 | 
			
		||||
	unsigned i;
 | 
			
		||||
	int r;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -448,10 +391,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 | 
			
		|||
		pages_per_node = HPAGE_PMD_NR;
 | 
			
		||||
#else
 | 
			
		||||
		/* default to 2MB */
 | 
			
		||||
		pages_per_node = (2UL << (20UL - PAGE_SHIFT));
 | 
			
		||||
		pages_per_node = 2UL << (20UL - PAGE_SHIFT);
 | 
			
		||||
#endif
 | 
			
		||||
		pages_per_node = max((uint32_t)pages_per_node,
 | 
			
		||||
				     tbo->page_alignment);
 | 
			
		||||
		pages_per_node = max_t(uint32_t, pages_per_node,
 | 
			
		||||
				       tbo->page_alignment);
 | 
			
		||||
		num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -469,42 +412,37 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 | 
			
		|||
	mem->start = 0;
 | 
			
		||||
	pages_left = mem->num_pages;
 | 
			
		||||
 | 
			
		||||
	/* Limit maximum size to 2GB due to SG table limitations */
 | 
			
		||||
	pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
 | 
			
		||||
 | 
			
		||||
	i = 0;
 | 
			
		||||
	spin_lock(&mgr->lock);
 | 
			
		||||
	for (i = 0; pages_left >= pages_per_node; ++i) {
 | 
			
		||||
		unsigned long pages = rounddown_pow_of_two(pages_left);
 | 
			
		||||
 | 
			
		||||
		/* Limit maximum size to 2GB due to SG table limitations */
 | 
			
		||||
		pages = min(pages, (2UL << (30 - PAGE_SHIFT)));
 | 
			
		||||
 | 
			
		||||
		r = drm_mm_insert_node_in_range(mm, &nodes[i], pages,
 | 
			
		||||
						pages_per_node, 0,
 | 
			
		||||
						place->fpfn, lpfn,
 | 
			
		||||
						mode);
 | 
			
		||||
		if (unlikely(r))
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
 | 
			
		||||
		amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
 | 
			
		||||
		pages_left -= pages;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for (; pages_left; ++i) {
 | 
			
		||||
		unsigned long pages = min(pages_left, pages_per_node);
 | 
			
		||||
	while (pages_left) {
 | 
			
		||||
		uint32_t alignment = tbo->page_alignment;
 | 
			
		||||
 | 
			
		||||
		if (pages == pages_per_node)
 | 
			
		||||
		if (pages >= pages_per_node)
 | 
			
		||||
			alignment = pages_per_node;
 | 
			
		||||
 | 
			
		||||
		r = drm_mm_insert_node_in_range(mm, &nodes[i],
 | 
			
		||||
						pages, alignment, 0,
 | 
			
		||||
						place->fpfn, lpfn,
 | 
			
		||||
						mode);
 | 
			
		||||
		if (unlikely(r))
 | 
			
		||||
		r = drm_mm_insert_node_in_range(mm, &nodes[i], pages, alignment,
 | 
			
		||||
						0, place->fpfn, lpfn, mode);
 | 
			
		||||
		if (unlikely(r)) {
 | 
			
		||||
			if (pages > pages_per_node) {
 | 
			
		||||
				if (is_power_of_2(pages))
 | 
			
		||||
					pages = pages / 2;
 | 
			
		||||
				else
 | 
			
		||||
					pages = rounddown_pow_of_two(pages);
 | 
			
		||||
				continue;
 | 
			
		||||
			}
 | 
			
		||||
			goto error;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
 | 
			
		||||
		amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
 | 
			
		||||
		pages_left -= pages;
 | 
			
		||||
		++i;
 | 
			
		||||
 | 
			
		||||
		if (pages > pages_left)
 | 
			
		||||
			pages = pages_left;
 | 
			
		||||
	}
 | 
			
		||||
	spin_unlock(&mgr->lock);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -728,3 +666,73 @@ static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
 | 
			
		|||
	.free	= amdgpu_vram_mgr_del,
 | 
			
		||||
	.debug	= amdgpu_vram_mgr_debug
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_vram_mgr_init - init VRAM manager and DRM MM
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu_device pointer
 | 
			
		||||
 *
 | 
			
		||||
 * Allocate and initialize the VRAM manager.
 | 
			
		||||
 */
 | 
			
		||||
int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
 | 
			
		||||
	struct ttm_resource_manager *man = &mgr->manager;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT);
 | 
			
		||||
 | 
			
		||||
	man->func = &amdgpu_vram_mgr_func;
 | 
			
		||||
 | 
			
		||||
	drm_mm_init(&mgr->mm, 0, man->size);
 | 
			
		||||
	spin_lock_init(&mgr->lock);
 | 
			
		||||
	INIT_LIST_HEAD(&mgr->reservations_pending);
 | 
			
		||||
	INIT_LIST_HEAD(&mgr->reserved_pages);
 | 
			
		||||
 | 
			
		||||
	/* Add the two VRAM-related sysfs files */
 | 
			
		||||
	ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		DRM_ERROR("Failed to register sysfs\n");
 | 
			
		||||
 | 
			
		||||
	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
 | 
			
		||||
	ttm_resource_manager_set_used(man, true);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_vram_mgr_fini - free and destroy VRAM manager
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu_device pointer
 | 
			
		||||
 *
 | 
			
		||||
 * Destroy and free the VRAM manager, returns -EBUSY if ranges are still
 | 
			
		||||
 * allocated inside it.
 | 
			
		||||
 */
 | 
			
		||||
void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
 | 
			
		||||
	struct ttm_resource_manager *man = &mgr->manager;
 | 
			
		||||
	int ret;
 | 
			
		||||
	struct amdgpu_vram_reservation *rsv, *temp;
 | 
			
		||||
 | 
			
		||||
	ttm_resource_manager_set_used(man, false);
 | 
			
		||||
 | 
			
		||||
	ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	spin_lock(&mgr->lock);
 | 
			
		||||
	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
 | 
			
		||||
		kfree(rsv);
 | 
			
		||||
 | 
			
		||||
	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
 | 
			
		||||
		drm_mm_remove_node(&rsv->mm_node);
 | 
			
		||||
		kfree(rsv);
 | 
			
		||||
	}
 | 
			
		||||
	drm_mm_takedown(&mgr->mm);
 | 
			
		||||
	spin_unlock(&mgr->lock);
 | 
			
		||||
 | 
			
		||||
	sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
 | 
			
		||||
 | 
			
		||||
	ttm_resource_manager_cleanup(man);
 | 
			
		||||
	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -98,9 +98,9 @@ union amd_sriov_msg_feature_flags {
 | 
			
		|||
 | 
			
		||||
union amd_sriov_reg_access_flags {
 | 
			
		||||
	struct {
 | 
			
		||||
		uint32_t vf_reg_access_ih    : 1;
 | 
			
		||||
		uint32_t vf_reg_access_mmhub : 1;
 | 
			
		||||
		uint32_t vf_reg_access_gc    : 1;
 | 
			
		||||
		uint32_t vf_reg_psp_access_ih    : 1;
 | 
			
		||||
		uint32_t vf_reg_rlc_access_mmhub : 1;
 | 
			
		||||
		uint32_t vf_reg_rlc_access_gc    : 1;
 | 
			
		||||
		uint32_t reserved            : 29;
 | 
			
		||||
	} flags;
 | 
			
		||||
	uint32_t all;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -421,6 +421,11 @@ static int dce_virtual_sw_init(void *handle)
 | 
			
		|||
static int dce_virtual_sw_fini(void *handle)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 | 
			
		||||
	int i = 0;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < adev->mode_info.num_crtc; i++)
 | 
			
		||||
		if (adev->mode_info.crtcs[i])
 | 
			
		||||
			hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
 | 
			
		||||
 | 
			
		||||
	kfree(adev->mode_info.bios_hardcoded_edid);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -480,13 +485,6 @@ static int dce_virtual_hw_init(void *handle)
 | 
			
		|||
 | 
			
		||||
static int dce_virtual_hw_fini(void *handle)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 | 
			
		||||
	int i = 0;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i<adev->mode_info.num_crtc; i++)
 | 
			
		||||
		if (adev->mode_info.crtcs[i])
 | 
			
		||||
			hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -219,11 +219,11 @@ static void df_v3_6_query_hashes(struct amdgpu_device *adev)
 | 
			
		|||
	adev->df.hash_status.hash_2m = false;
 | 
			
		||||
	adev->df.hash_status.hash_1g = false;
 | 
			
		||||
 | 
			
		||||
	if (adev->asic_type != CHIP_ARCTURUS)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	/* encoding for hash-enabled on Arcturus */
 | 
			
		||||
	if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
 | 
			
		||||
	/* encoding for hash-enabled on Arcturus and Aldebaran */
 | 
			
		||||
	if ((adev->asic_type == CHIP_ARCTURUS &&
 | 
			
		||||
	     adev->df.funcs->get_fb_channel_number(adev) == 0xe) ||
 | 
			
		||||
	     (adev->asic_type == CHIP_ALDEBARAN &&
 | 
			
		||||
	      adev->df.funcs->get_fb_channel_number(adev) == 0x1e)) {
 | 
			
		||||
		tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
 | 
			
		||||
		adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
 | 
			
		||||
						DF_CS_UMC_AON0_DfGlobalCtrl,
 | 
			
		||||
| 
						 | 
				
			
			@ -278,7 +278,12 @@ static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
 | 
			
		|||
	u32 tmp;
 | 
			
		||||
 | 
			
		||||
	tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
 | 
			
		||||
	tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
 | 
			
		||||
	if (adev->asic_type == CHIP_ALDEBARAN)
 | 
			
		||||
		tmp &=
 | 
			
		||||
		ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
 | 
			
		||||
	else
 | 
			
		||||
		tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
 | 
			
		||||
 | 
			
		||||
	tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
 | 
			
		||||
 | 
			
		||||
	return tmp;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3937,7 +3937,8 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
 | 
			
		|||
{
 | 
			
		||||
	u32 tmp;
 | 
			
		||||
 | 
			
		||||
	if (adev->asic_type != CHIP_ARCTURUS)
 | 
			
		||||
	if (adev->asic_type != CHIP_ARCTURUS &&
 | 
			
		||||
	    adev->asic_type != CHIP_ALDEBARAN)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
 | 
			
		||||
| 
						 | 
				
			
			@ -4559,8 +4560,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 | 
			
		|||
	if (!ring->sched.ready)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	if (adev->asic_type == CHIP_ARCTURUS ||
 | 
			
		||||
	    adev->asic_type == CHIP_ALDEBARAN) {
 | 
			
		||||
	if (adev->asic_type == CHIP_ARCTURUS) {
 | 
			
		||||
		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
 | 
			
		||||
		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
 | 
			
		||||
		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
 | 
			
		||||
| 
						 | 
				
			
			@ -4745,7 +4745,11 @@ static int gfx_v9_0_ecc_late_init(void *handle)
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	/* requires IBs so do in late init after IB pool is initialized */
 | 
			
		||||
	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
 | 
			
		||||
	if (adev->asic_type == CHIP_ALDEBARAN)
 | 
			
		||||
		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
 | 
			
		||||
	else
 | 
			
		||||
		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
 | 
			
		||||
 | 
			
		||||
	if (r)
 | 
			
		||||
		return r;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -22,6 +22,7 @@
 | 
			
		|||
 */
 | 
			
		||||
#include "amdgpu.h"
 | 
			
		||||
#include "soc15.h"
 | 
			
		||||
#include "soc15d.h"
 | 
			
		||||
 | 
			
		||||
#include "gc/gc_9_4_2_offset.h"
 | 
			
		||||
#include "gc/gc_9_4_2_sh_mask.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -31,6 +32,11 @@
 | 
			
		|||
#include "amdgpu_ras.h"
 | 
			
		||||
#include "amdgpu_gfx.h"
 | 
			
		||||
 | 
			
		||||
#define SE_ID_MAX 8
 | 
			
		||||
#define CU_ID_MAX 16
 | 
			
		||||
#define SIMD_ID_MAX 4
 | 
			
		||||
#define WAVE_ID_MAX 10
 | 
			
		||||
 | 
			
		||||
enum gfx_v9_4_2_utc_type {
 | 
			
		||||
	VML2_MEM,
 | 
			
		||||
	VML2_WALKER_MEM,
 | 
			
		||||
| 
						 | 
				
			
			@ -79,6 +85,634 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde[] = {
 | 
			
		|||
	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20),
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * This shader is used to clear VGPRS and LDS, and also write the input
 | 
			
		||||
 * pattern into the write back buffer, which will be used by driver to
 | 
			
		||||
 * check whether all SIMDs have been covered.
 | 
			
		||||
*/
 | 
			
		||||
static const u32 vgpr_init_compute_shader_aldebaran[] = {
 | 
			
		||||
	0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
 | 
			
		||||
	0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
 | 
			
		||||
	0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xd3d94000,
 | 
			
		||||
	0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 0xd3d94003,
 | 
			
		||||
	0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 0xd3d94006,
 | 
			
		||||
	0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 0xd3d94009,
 | 
			
		||||
	0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 0xd3d9400c,
 | 
			
		||||
	0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 0xd3d9400f,
 | 
			
		||||
	0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 0xd3d94012,
 | 
			
		||||
	0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 0xd3d94015,
 | 
			
		||||
	0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 0xd3d94018,
 | 
			
		||||
	0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 0xd3d9401b,
 | 
			
		||||
	0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 0xd3d9401e,
 | 
			
		||||
	0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 0xd3d94021,
 | 
			
		||||
	0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 0xd3d94024,
 | 
			
		||||
	0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 0xd3d94027,
 | 
			
		||||
	0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 0xd3d9402a,
 | 
			
		||||
	0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 0xd3d9402d,
 | 
			
		||||
	0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 0xd3d94030,
 | 
			
		||||
	0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 0xd3d94033,
 | 
			
		||||
	0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 0xd3d94036,
 | 
			
		||||
	0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 0xd3d94039,
 | 
			
		||||
	0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 0xd3d9403c,
 | 
			
		||||
	0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 0xd3d9403f,
 | 
			
		||||
	0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 0xd3d94042,
 | 
			
		||||
	0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 0xd3d94045,
 | 
			
		||||
	0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 0xd3d94048,
 | 
			
		||||
	0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 0xd3d9404b,
 | 
			
		||||
	0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 0xd3d9404e,
 | 
			
		||||
	0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 0xd3d94051,
 | 
			
		||||
	0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 0xd3d94054,
 | 
			
		||||
	0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 0xd3d94057,
 | 
			
		||||
	0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 0xd3d9405a,
 | 
			
		||||
	0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 0xd3d9405d,
 | 
			
		||||
	0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 0xd3d94060,
 | 
			
		||||
	0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 0xd3d94063,
 | 
			
		||||
	0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 0xd3d94066,
 | 
			
		||||
	0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 0xd3d94069,
 | 
			
		||||
	0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 0xd3d9406c,
 | 
			
		||||
	0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 0xd3d9406f,
 | 
			
		||||
	0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 0xd3d94072,
 | 
			
		||||
	0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 0xd3d94075,
 | 
			
		||||
	0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 0xd3d94078,
 | 
			
		||||
	0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 0xd3d9407b,
 | 
			
		||||
	0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 0xd3d9407e,
 | 
			
		||||
	0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 0xd3d94081,
 | 
			
		||||
	0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 0xd3d94084,
 | 
			
		||||
	0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 0xd3d94087,
 | 
			
		||||
	0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 0xd3d9408a,
 | 
			
		||||
	0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 0xd3d9408d,
 | 
			
		||||
	0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 0xd3d94090,
 | 
			
		||||
	0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 0xd3d94093,
 | 
			
		||||
	0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 0xd3d94096,
 | 
			
		||||
	0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 0xd3d94099,
 | 
			
		||||
	0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 0xd3d9409c,
 | 
			
		||||
	0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 0xd3d9409f,
 | 
			
		||||
	0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 0xd3d940a2,
 | 
			
		||||
	0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 0xd3d940a5,
 | 
			
		||||
	0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 0xd3d940a8,
 | 
			
		||||
	0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 0xd3d940ab,
 | 
			
		||||
	0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 0xd3d940ae,
 | 
			
		||||
	0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 0xd3d940b1,
 | 
			
		||||
	0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 0xd3d940b4,
 | 
			
		||||
	0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 0xd3d940b7,
 | 
			
		||||
	0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 0xd3d940ba,
 | 
			
		||||
	0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 0xd3d940bd,
 | 
			
		||||
	0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 0xd3d940c0,
 | 
			
		||||
	0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 0xd3d940c3,
 | 
			
		||||
	0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 0xd3d940c6,
 | 
			
		||||
	0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 0xd3d940c9,
 | 
			
		||||
	0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 0xd3d940cc,
 | 
			
		||||
	0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 0xd3d940cf,
 | 
			
		||||
	0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 0xd3d940d2,
 | 
			
		||||
	0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 0xd3d940d5,
 | 
			
		||||
	0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 0xd3d940d8,
 | 
			
		||||
	0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 0xd3d940db,
 | 
			
		||||
	0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 0xd3d940de,
 | 
			
		||||
	0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 0xd3d940e1,
 | 
			
		||||
	0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 0xd3d940e4,
 | 
			
		||||
	0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 0xd3d940e7,
 | 
			
		||||
	0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 0xd3d940ea,
 | 
			
		||||
	0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 0xd3d940ed,
 | 
			
		||||
	0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 0xd3d940f0,
 | 
			
		||||
	0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 0xd3d940f3,
 | 
			
		||||
	0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 0xd3d940f6,
 | 
			
		||||
	0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 0xd3d940f9,
 | 
			
		||||
	0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 0xd3d940fc,
 | 
			
		||||
	0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 0xd3d940ff,
 | 
			
		||||
	0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 0x7e000280,
 | 
			
		||||
	0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 0x7e0c0280,
 | 
			
		||||
	0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 0xd28c0001,
 | 
			
		||||
	0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xbe8b0004, 0xb78b4000,
 | 
			
		||||
	0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 0x00020201,
 | 
			
		||||
	0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 0xbf84fff8,
 | 
			
		||||
	0xbf810000,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = {
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 4 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0xbf },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x400006 },  /* 64KB LDS */
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /*  63 - accum-offset = 256 */
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * The below shaders are used to clear SGPRS, and also write the input
 | 
			
		||||
 * pattern into the write back buffer. The first two dispatch should be
 | 
			
		||||
 * scheduled simultaneously which make sure that all SGPRS could be
 | 
			
		||||
 * allocated, so the dispatch 1 need check write back buffer before scheduled,
 | 
			
		||||
 * make sure that waves of dispatch 0 are all dispacthed to all simds
 | 
			
		||||
 * balanced. both dispatch 0 and dispatch 1 should be halted until all waves
 | 
			
		||||
 * are dispatched, and then driver write a pattern to the shared memory to make
 | 
			
		||||
 * all waves continue.
 | 
			
		||||
*/
 | 
			
		||||
static const u32 sgpr112_init_compute_shader_aldebaran[] = {
 | 
			
		||||
	0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
 | 
			
		||||
	0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
 | 
			
		||||
	0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
 | 
			
		||||
	0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
 | 
			
		||||
	0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
 | 
			
		||||
	0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
 | 
			
		||||
	0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
 | 
			
		||||
	0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
 | 
			
		||||
	0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
 | 
			
		||||
	0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
 | 
			
		||||
	0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
 | 
			
		||||
	0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
 | 
			
		||||
	0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
 | 
			
		||||
	0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
 | 
			
		||||
	0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080,
 | 
			
		||||
	0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080,
 | 
			
		||||
	0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080,
 | 
			
		||||
	0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080,
 | 
			
		||||
	0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080,
 | 
			
		||||
	0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080,
 | 
			
		||||
	0xbeda0080, 0xbedb0080, 0xbedc0080, 0xbedd0080, 0xbede0080, 0xbedf0080,
 | 
			
		||||
	0xbee00080, 0xbee10080, 0xbee20080, 0xbee30080, 0xbee40080, 0xbee50080,
 | 
			
		||||
	0xbf810000
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const struct soc15_reg_entry sgpr112_init_regs_aldebaran[] = {
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x340 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const u32 sgpr96_init_compute_shader_aldebaran[] = {
 | 
			
		||||
	0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
 | 
			
		||||
	0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
 | 
			
		||||
	0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
 | 
			
		||||
	0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
 | 
			
		||||
	0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
 | 
			
		||||
	0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
 | 
			
		||||
	0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
 | 
			
		||||
	0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
 | 
			
		||||
	0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
 | 
			
		||||
	0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
 | 
			
		||||
	0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
 | 
			
		||||
	0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
 | 
			
		||||
	0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
 | 
			
		||||
	0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
 | 
			
		||||
	0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080,
 | 
			
		||||
	0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080,
 | 
			
		||||
	0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080,
 | 
			
		||||
	0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080,
 | 
			
		||||
	0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080,
 | 
			
		||||
	0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080,
 | 
			
		||||
	0xbf810000,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const struct soc15_reg_entry sgpr96_init_regs_aldebaran[] = {
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0xc },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x2c0 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * This shader is used to clear the uninitiated sgprs after the above
 | 
			
		||||
 * two dispatches, because of hardware feature, dispath 0 couldn't clear
 | 
			
		||||
 * top hole sgprs. Therefore need 4 waves per SIMD to cover these sgprs
 | 
			
		||||
*/
 | 
			
		||||
static const u32 sgpr64_init_compute_shader_aldebaran[] = {
 | 
			
		||||
	0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
 | 
			
		||||
	0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
 | 
			
		||||
	0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
 | 
			
		||||
	0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
 | 
			
		||||
	0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
 | 
			
		||||
	0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
 | 
			
		||||
	0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
 | 
			
		||||
	0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
 | 
			
		||||
	0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
 | 
			
		||||
	0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
 | 
			
		||||
	0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
 | 
			
		||||
	0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
 | 
			
		||||
	0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
 | 
			
		||||
	0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
 | 
			
		||||
	0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbf810000,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const struct soc15_reg_entry sgpr64_init_regs_aldebaran[] = {
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0x10 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x1c0 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev,
 | 
			
		||||
				 struct amdgpu_ring *ring,
 | 
			
		||||
				 struct amdgpu_ib *ib,
 | 
			
		||||
				 const u32 *shader_ptr, u32 shader_size,
 | 
			
		||||
				 const struct soc15_reg_entry *init_regs, u32 regs_size,
 | 
			
		||||
				 u32 compute_dim_x, u64 wb_gpu_addr, u32 pattern,
 | 
			
		||||
				 struct dma_fence **fence_ptr)
 | 
			
		||||
{
 | 
			
		||||
	int r, i;
 | 
			
		||||
	uint32_t total_size, shader_offset;
 | 
			
		||||
	u64 gpu_addr;
 | 
			
		||||
 | 
			
		||||
	total_size = (regs_size * 3 + 4 + 5 + 5) * 4;
 | 
			
		||||
	total_size = ALIGN(total_size, 256);
 | 
			
		||||
	shader_offset = total_size;
 | 
			
		||||
	total_size += ALIGN(shader_size, 256);
 | 
			
		||||
 | 
			
		||||
	/* allocate an indirect buffer to put the commands in */
 | 
			
		||||
	memset(ib, 0, sizeof(*ib));
 | 
			
		||||
	r = amdgpu_ib_get(adev, NULL, total_size,
 | 
			
		||||
					AMDGPU_IB_POOL_DIRECT, ib);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "failed to get ib (%d).\n", r);
 | 
			
		||||
		return r;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* load the compute shaders */
 | 
			
		||||
	for (i = 0; i < shader_size/sizeof(u32); i++)
 | 
			
		||||
		ib->ptr[i + (shader_offset / 4)] = shader_ptr[i];
 | 
			
		||||
 | 
			
		||||
	/* init the ib length to 0 */
 | 
			
		||||
	ib->length_dw = 0;
 | 
			
		||||
 | 
			
		||||
	/* write the register state for the compute dispatch */
 | 
			
		||||
	for (i = 0; i < regs_size; i++) {
 | 
			
		||||
		ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
 | 
			
		||||
		ib->ptr[ib->length_dw++] = SOC15_REG_ENTRY_OFFSET(init_regs[i])
 | 
			
		||||
								- PACKET3_SET_SH_REG_START;
 | 
			
		||||
		ib->ptr[ib->length_dw++] = init_regs[i].reg_value;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
 | 
			
		||||
	gpu_addr = (ib->gpu_addr + (u64)shader_offset) >> 8;
 | 
			
		||||
	ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
 | 
			
		||||
	ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO)
 | 
			
		||||
							- PACKET3_SET_SH_REG_START;
 | 
			
		||||
	ib->ptr[ib->length_dw++] = lower_32_bits(gpu_addr);
 | 
			
		||||
	ib->ptr[ib->length_dw++] = upper_32_bits(gpu_addr);
 | 
			
		||||
 | 
			
		||||
	/* write the wb buffer address */
 | 
			
		||||
	ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 3);
 | 
			
		||||
	ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0)
 | 
			
		||||
							- PACKET3_SET_SH_REG_START;
 | 
			
		||||
	ib->ptr[ib->length_dw++] = lower_32_bits(wb_gpu_addr);
 | 
			
		||||
	ib->ptr[ib->length_dw++] = upper_32_bits(wb_gpu_addr);
 | 
			
		||||
	ib->ptr[ib->length_dw++] = pattern;
 | 
			
		||||
 | 
			
		||||
	/* write dispatch packet */
 | 
			
		||||
	ib->ptr[ib->length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
 | 
			
		||||
	ib->ptr[ib->length_dw++] = compute_dim_x; /* x */
 | 
			
		||||
	ib->ptr[ib->length_dw++] = 1; /* y */
 | 
			
		||||
	ib->ptr[ib->length_dw++] = 1; /* z */
 | 
			
		||||
	ib->ptr[ib->length_dw++] =
 | 
			
		||||
		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
 | 
			
		||||
 | 
			
		||||
	/* shedule the ib on the ring */
 | 
			
		||||
	r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "ib submit failed (%d).\n", r);
 | 
			
		||||
		amdgpu_ib_free(adev, ib, NULL);
 | 
			
		||||
	}
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gfx_v9_4_2_log_wave_assignment(struct amdgpu_device *adev, uint32_t *wb_ptr)
 | 
			
		||||
{
 | 
			
		||||
	uint32_t se, cu, simd, wave;
 | 
			
		||||
	uint32_t offset = 0;
 | 
			
		||||
	char *str;
 | 
			
		||||
	int size;
 | 
			
		||||
 | 
			
		||||
	str = kmalloc(256, GFP_KERNEL);
 | 
			
		||||
	if (!str)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	dev_dbg(adev->dev, "wave assignment:\n");
 | 
			
		||||
 | 
			
		||||
	for (se = 0; se < adev->gfx.config.max_shader_engines; se++) {
 | 
			
		||||
		for (cu = 0; cu < CU_ID_MAX; cu++) {
 | 
			
		||||
			memset(str, 0, 256);
 | 
			
		||||
			size = sprintf(str, "SE[%02d]CU[%02d]: ", se, cu);
 | 
			
		||||
			for (simd = 0; simd < SIMD_ID_MAX; simd++) {
 | 
			
		||||
				size += sprintf(str + size, "[");
 | 
			
		||||
				for (wave = 0; wave < WAVE_ID_MAX; wave++) {
 | 
			
		||||
					size += sprintf(str + size, "%x", wb_ptr[offset]);
 | 
			
		||||
					offset++;
 | 
			
		||||
				}
 | 
			
		||||
				size += sprintf(str + size, "]  ");
 | 
			
		||||
			}
 | 
			
		||||
			dev_dbg(adev->dev, "%s\n", str);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	kfree(str);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int gfx_v9_4_2_wait_for_waves_assigned(struct amdgpu_device *adev,
 | 
			
		||||
					      uint32_t *wb_ptr, uint32_t mask,
 | 
			
		||||
					      uint32_t pattern, uint32_t num_wave, bool wait)
 | 
			
		||||
{
 | 
			
		||||
	uint32_t se, cu, simd, wave;
 | 
			
		||||
	uint32_t loop = 0;
 | 
			
		||||
	uint32_t wave_cnt;
 | 
			
		||||
	uint32_t offset;
 | 
			
		||||
 | 
			
		||||
	do {
 | 
			
		||||
		wave_cnt = 0;
 | 
			
		||||
		offset = 0;
 | 
			
		||||
 | 
			
		||||
		for (se = 0; se < adev->gfx.config.max_shader_engines; se++)
 | 
			
		||||
			for (cu = 0; cu < CU_ID_MAX; cu++)
 | 
			
		||||
				for (simd = 0; simd < SIMD_ID_MAX; simd++)
 | 
			
		||||
					for (wave = 0; wave < WAVE_ID_MAX; wave++) {
 | 
			
		||||
						if (((1 << wave) & mask) &&
 | 
			
		||||
						    (wb_ptr[offset] == pattern))
 | 
			
		||||
							wave_cnt++;
 | 
			
		||||
 | 
			
		||||
						offset++;
 | 
			
		||||
					}
 | 
			
		||||
 | 
			
		||||
		if (wave_cnt == num_wave)
 | 
			
		||||
			return 0;
 | 
			
		||||
 | 
			
		||||
		mdelay(1);
 | 
			
		||||
	} while (++loop < 2000 && wait);
 | 
			
		||||
 | 
			
		||||
	dev_err(adev->dev, "actual wave num: %d, expected wave num: %d\n",
 | 
			
		||||
		wave_cnt, num_wave);
 | 
			
		||||
 | 
			
		||||
	gfx_v9_4_2_log_wave_assignment(adev, wb_ptr);
 | 
			
		||||
 | 
			
		||||
	return -EBADSLT;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	int r;
 | 
			
		||||
	int wb_size = adev->gfx.config.max_shader_engines *
 | 
			
		||||
			 CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX;
 | 
			
		||||
	struct amdgpu_ib wb_ib;
 | 
			
		||||
	struct amdgpu_ib disp_ibs[3];
 | 
			
		||||
	struct dma_fence *fences[3];
 | 
			
		||||
	u32 pattern[3] = { 0x1, 0x5, 0xa };
 | 
			
		||||
 | 
			
		||||
	/* bail if the compute ring is not ready */
 | 
			
		||||
	if (!adev->gfx.compute_ring[0].sched.ready ||
 | 
			
		||||
		 !adev->gfx.compute_ring[1].sched.ready)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	/* allocate the write-back buffer from IB */
 | 
			
		||||
	memset(&wb_ib, 0, sizeof(wb_ib));
 | 
			
		||||
	r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t),
 | 
			
		||||
			  AMDGPU_IB_POOL_DIRECT, &wb_ib);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "failed to get ib (%d) for wb\n", r);
 | 
			
		||||
		return r;
 | 
			
		||||
	}
 | 
			
		||||
	memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
 | 
			
		||||
 | 
			
		||||
	r = gfx_v9_4_2_run_shader(adev,
 | 
			
		||||
			&adev->gfx.compute_ring[0],
 | 
			
		||||
			&disp_ibs[0],
 | 
			
		||||
			sgpr112_init_compute_shader_aldebaran,
 | 
			
		||||
			sizeof(sgpr112_init_compute_shader_aldebaran),
 | 
			
		||||
			sgpr112_init_regs_aldebaran,
 | 
			
		||||
			ARRAY_SIZE(sgpr112_init_regs_aldebaran),
 | 
			
		||||
			adev->gfx.cu_info.number,
 | 
			
		||||
			wb_ib.gpu_addr, pattern[0], &fences[0]);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "failed to clear first 224 sgprs\n");
 | 
			
		||||
		goto pro_end;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	r = gfx_v9_4_2_wait_for_waves_assigned(adev,
 | 
			
		||||
			&wb_ib.ptr[1], 0b11,
 | 
			
		||||
			pattern[0],
 | 
			
		||||
			adev->gfx.cu_info.number * SIMD_ID_MAX * 2,
 | 
			
		||||
			true);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "wave coverage failed when clear first 224 sgprs\n");
 | 
			
		||||
		wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
 | 
			
		||||
		goto disp0_failed;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	r = gfx_v9_4_2_run_shader(adev,
 | 
			
		||||
			&adev->gfx.compute_ring[1],
 | 
			
		||||
			&disp_ibs[1],
 | 
			
		||||
			sgpr96_init_compute_shader_aldebaran,
 | 
			
		||||
			sizeof(sgpr96_init_compute_shader_aldebaran),
 | 
			
		||||
			sgpr96_init_regs_aldebaran,
 | 
			
		||||
			ARRAY_SIZE(sgpr96_init_regs_aldebaran),
 | 
			
		||||
			adev->gfx.cu_info.number * 2,
 | 
			
		||||
			wb_ib.gpu_addr, pattern[1], &fences[1]);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "failed to clear next 576 sgprs\n");
 | 
			
		||||
		goto disp0_failed;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	r = gfx_v9_4_2_wait_for_waves_assigned(adev,
 | 
			
		||||
			&wb_ib.ptr[1], 0b11111100,
 | 
			
		||||
			pattern[1], adev->gfx.cu_info.number * SIMD_ID_MAX * 6,
 | 
			
		||||
			true);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "wave coverage failed when clear first 576 sgprs\n");
 | 
			
		||||
		wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
 | 
			
		||||
		goto disp1_failed;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
 | 
			
		||||
 | 
			
		||||
	/* wait for the GPU to finish processing the IB */
 | 
			
		||||
	r = dma_fence_wait(fences[0], false);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "timeout to clear first 224 sgprs\n");
 | 
			
		||||
		goto disp1_failed;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	r = dma_fence_wait(fences[1], false);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "timeout to clear first 576 sgprs\n");
 | 
			
		||||
		goto disp1_failed;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
 | 
			
		||||
	r = gfx_v9_4_2_run_shader(adev,
 | 
			
		||||
			&adev->gfx.compute_ring[0],
 | 
			
		||||
			&disp_ibs[2],
 | 
			
		||||
			sgpr64_init_compute_shader_aldebaran,
 | 
			
		||||
			sizeof(sgpr64_init_compute_shader_aldebaran),
 | 
			
		||||
			sgpr64_init_regs_aldebaran,
 | 
			
		||||
			ARRAY_SIZE(sgpr64_init_regs_aldebaran),
 | 
			
		||||
			adev->gfx.cu_info.number,
 | 
			
		||||
			wb_ib.gpu_addr, pattern[2], &fences[2]);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "failed to clear first 256 sgprs\n");
 | 
			
		||||
		goto disp1_failed;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	r = gfx_v9_4_2_wait_for_waves_assigned(adev,
 | 
			
		||||
			&wb_ib.ptr[1], 0b1111,
 | 
			
		||||
			pattern[2],
 | 
			
		||||
			adev->gfx.cu_info.number * SIMD_ID_MAX * 4,
 | 
			
		||||
			true);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "wave coverage failed when clear first 256 sgprs\n");
 | 
			
		||||
		wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
 | 
			
		||||
		goto disp2_failed;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
 | 
			
		||||
 | 
			
		||||
	r = dma_fence_wait(fences[2], false);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "timeout to clear first 256 sgprs\n");
 | 
			
		||||
		goto disp2_failed;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
disp2_failed:
 | 
			
		||||
	amdgpu_ib_free(adev, &disp_ibs[2], NULL);
 | 
			
		||||
	dma_fence_put(fences[2]);
 | 
			
		||||
disp1_failed:
 | 
			
		||||
	amdgpu_ib_free(adev, &disp_ibs[1], NULL);
 | 
			
		||||
	dma_fence_put(fences[1]);
 | 
			
		||||
disp0_failed:
 | 
			
		||||
	amdgpu_ib_free(adev, &disp_ibs[0], NULL);
 | 
			
		||||
	dma_fence_put(fences[0]);
 | 
			
		||||
pro_end:
 | 
			
		||||
	amdgpu_ib_free(adev, &wb_ib, NULL);
 | 
			
		||||
 | 
			
		||||
	if (r)
 | 
			
		||||
		dev_info(adev->dev, "Init SGPRS Failed\n");
 | 
			
		||||
	else
 | 
			
		||||
		dev_info(adev->dev, "Init SGPRS Successfully\n");
 | 
			
		||||
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	int r;
 | 
			
		||||
	/* CU_ID: 0~15, SIMD_ID: 0~3, WAVE_ID: 0 ~ 9 */
 | 
			
		||||
	int wb_size = adev->gfx.config.max_shader_engines *
 | 
			
		||||
			 CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX;
 | 
			
		||||
	struct amdgpu_ib wb_ib;
 | 
			
		||||
	struct amdgpu_ib disp_ib;
 | 
			
		||||
	struct dma_fence *fence;
 | 
			
		||||
	u32 pattern = 0xa;
 | 
			
		||||
 | 
			
		||||
	/* bail if the compute ring is not ready */
 | 
			
		||||
	if (!adev->gfx.compute_ring[0].sched.ready)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	/* allocate the write-back buffer from IB */
 | 
			
		||||
	memset(&wb_ib, 0, sizeof(wb_ib));
 | 
			
		||||
	r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t),
 | 
			
		||||
			  AMDGPU_IB_POOL_DIRECT, &wb_ib);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "failed to get ib (%d) for wb.\n", r);
 | 
			
		||||
		return r;
 | 
			
		||||
	}
 | 
			
		||||
	memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
 | 
			
		||||
 | 
			
		||||
	r = gfx_v9_4_2_run_shader(adev,
 | 
			
		||||
			&adev->gfx.compute_ring[0],
 | 
			
		||||
			&disp_ib,
 | 
			
		||||
			vgpr_init_compute_shader_aldebaran,
 | 
			
		||||
			sizeof(vgpr_init_compute_shader_aldebaran),
 | 
			
		||||
			vgpr_init_regs_aldebaran,
 | 
			
		||||
			ARRAY_SIZE(vgpr_init_regs_aldebaran),
 | 
			
		||||
			adev->gfx.cu_info.number,
 | 
			
		||||
			wb_ib.gpu_addr, pattern, &fence);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "failed to clear vgprs\n");
 | 
			
		||||
		goto pro_end;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* wait for the GPU to finish processing the IB */
 | 
			
		||||
	r = dma_fence_wait(fence, false);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "timeout to clear vgprs\n");
 | 
			
		||||
		goto disp_failed;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	r = gfx_v9_4_2_wait_for_waves_assigned(adev,
 | 
			
		||||
			&wb_ib.ptr[1], 0b1,
 | 
			
		||||
			pattern,
 | 
			
		||||
			adev->gfx.cu_info.number * SIMD_ID_MAX,
 | 
			
		||||
			false);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		dev_err(adev->dev, "failed to cover all simds when clearing vgprs\n");
 | 
			
		||||
		goto disp_failed;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
disp_failed:
 | 
			
		||||
	amdgpu_ib_free(adev, &disp_ib, NULL);
 | 
			
		||||
	dma_fence_put(fence);
 | 
			
		||||
pro_end:
 | 
			
		||||
	amdgpu_ib_free(adev, &wb_ib, NULL);
 | 
			
		||||
 | 
			
		||||
	if (r)
 | 
			
		||||
		dev_info(adev->dev, "Init VGPRS Failed\n");
 | 
			
		||||
	else
 | 
			
		||||
		dev_info(adev->dev, "Init VGPRS Successfully\n");
 | 
			
		||||
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	/* only support when RAS is enabled */
 | 
			
		||||
	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	gfx_v9_4_2_do_sgprs_init(adev);
 | 
			
		||||
 | 
			
		||||
	gfx_v9_4_2_do_vgprs_init(adev);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev);
 | 
			
		||||
static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -808,8 +1442,9 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = {
 | 
			
		|||
	  REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) },
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs =
 | 
			
		||||
	{ SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 };
 | 
			
		||||
static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = {
 | 
			
		||||
	SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev,
 | 
			
		||||
					  const struct soc15_reg_entry *reg,
 | 
			
		||||
| 
						 | 
				
			
			@ -1039,13 +1674,16 @@ static void gfx_v9_4_2_reset_utc_err_status(struct amdgpu_device *adev)
 | 
			
		|||
static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	uint32_t i, j;
 | 
			
		||||
	uint32_t value;
 | 
			
		||||
 | 
			
		||||
	value = REG_SET_FIELD(0, GCEA_ERR_STATUS, CLEAR_ERROR_STATUS, 0x1);
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&adev->grbm_idx_mutex);
 | 
			
		||||
	for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
 | 
			
		||||
		for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
 | 
			
		||||
		     j++) {
 | 
			
		||||
			gfx_v9_4_2_select_se_sh(adev, i, 0, j);
 | 
			
		||||
			WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10);
 | 
			
		||||
			WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), value);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -29,6 +29,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
 | 
			
		|||
void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
 | 
			
		||||
				      uint32_t die_id);
 | 
			
		||||
void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev);
 | 
			
		||||
int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev);
 | 
			
		||||
 | 
			
		||||
extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -283,10 +283,14 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
 | 
			
		|||
		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 | 
			
		||||
				    PAGE_TABLE_BLOCK_SIZE,
 | 
			
		||||
				    block_size);
 | 
			
		||||
		/* Send no-retry XNACK on fault to suppress VM fault storm. */
 | 
			
		||||
		/* Send no-retry XNACK on fault to suppress VM fault storm.
 | 
			
		||||
		 * On Aldebaran, XNACK can be enabled in the SQ per-process.
 | 
			
		||||
		 * Retry faults need to be enabled for that to work.
 | 
			
		||||
		 */
 | 
			
		||||
		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 | 
			
		||||
				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
 | 
			
		||||
				    !adev->gmc.noretry);
 | 
			
		||||
				    !adev->gmc.noretry ||
 | 
			
		||||
				    adev->asic_type == CHIP_ALDEBARAN);
 | 
			
		||||
		WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL,
 | 
			
		||||
				    i * hub->ctx_distance, tmp);
 | 
			
		||||
		WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -53,6 +53,7 @@
 | 
			
		|||
#include "mmhub_v1_7.h"
 | 
			
		||||
#include "umc_v6_1.h"
 | 
			
		||||
#include "umc_v6_0.h"
 | 
			
		||||
#include "hdp_v4_0.h"
 | 
			
		||||
 | 
			
		||||
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1210,6 +1211,11 @@ static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
 | 
			
		|||
	adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int gmc_v9_0_early_init(void *handle)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 | 
			
		||||
| 
						 | 
				
			
			@ -1230,6 +1236,7 @@ static int gmc_v9_0_early_init(void *handle)
 | 
			
		|||
	gmc_v9_0_set_mmhub_funcs(adev);
 | 
			
		||||
	gmc_v9_0_set_mmhub_ras_funcs(adev);
 | 
			
		||||
	gmc_v9_0_set_gfxhub_funcs(adev);
 | 
			
		||||
	gmc_v9_0_set_hdp_ras_funcs(adev);
 | 
			
		||||
 | 
			
		||||
	adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
 | 
			
		||||
	adev->gmc.shared_aperture_end =
 | 
			
		||||
| 
						 | 
				
			
			@ -1255,7 +1262,7 @@ static int gmc_v9_0_late_init(void *handle)
 | 
			
		|||
	 * writes, while disables HBM ECC for vega10.
 | 
			
		||||
	 */
 | 
			
		||||
	if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
 | 
			
		||||
		if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) {
 | 
			
		||||
		if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) {
 | 
			
		||||
			if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
 | 
			
		||||
				adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -1265,6 +1272,10 @@ static int gmc_v9_0_late_init(void *handle)
 | 
			
		|||
	    adev->mmhub.ras_funcs->reset_ras_error_count)
 | 
			
		||||
		adev->mmhub.ras_funcs->reset_ras_error_count(adev);
 | 
			
		||||
 | 
			
		||||
	if (adev->hdp.ras_funcs &&
 | 
			
		||||
	    adev->hdp.ras_funcs->reset_ras_error_count)
 | 
			
		||||
		adev->hdp.ras_funcs->reset_ras_error_count(adev);
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_gmc_ras_late_init(adev);
 | 
			
		||||
	if (r)
 | 
			
		||||
		return r;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -59,12 +59,31 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
 | 
			
		|||
			HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
 | 
			
		||||
					   void *ras_error_status)
 | 
			
		||||
{
 | 
			
		||||
	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
 | 
			
		||||
 | 
			
		||||
	err_data->ue_count = 0;
 | 
			
		||||
	err_data->ce_count = 0;
 | 
			
		||||
 | 
			
		||||
	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	/* HDP SRAM errors are uncorrectable ones (i.e. fatal errors) */
 | 
			
		||||
	err_data->ue_count += RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
 | 
			
		||||
		return;
 | 
			
		||||
	/*read back hdp ras counter to reset it to 0 */
 | 
			
		||||
	RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
 | 
			
		||||
 | 
			
		||||
	if (adev->asic_type >= CHIP_ALDEBARAN)
 | 
			
		||||
		WREG32_SOC15(HDP, 0, mmHDP_EDC_CNT, 0);
 | 
			
		||||
	else
 | 
			
		||||
		/*read back hdp ras counter to reset it to 0 */
 | 
			
		||||
		RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev,
 | 
			
		||||
| 
						 | 
				
			
			@ -130,10 +149,16 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
 | 
			
		|||
	WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs = {
 | 
			
		||||
	.ras_late_init = amdgpu_hdp_ras_late_init,
 | 
			
		||||
	.ras_fini = amdgpu_hdp_ras_fini,
 | 
			
		||||
	.query_ras_error_count = hdp_v4_0_query_ras_error_count,
 | 
			
		||||
	.reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
 | 
			
		||||
	.flush_hdp = hdp_v4_0_flush_hdp,
 | 
			
		||||
	.invalidate_hdp = hdp_v4_0_invalidate_hdp,
 | 
			
		||||
	.reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
 | 
			
		||||
	.update_clock_gating = hdp_v4_0_update_clock_gating,
 | 
			
		||||
	.get_clock_gating_state = hdp_v4_0_get_clockgating_state,
 | 
			
		||||
	.init_registers = hdp_v4_0_init_registers,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -27,5 +27,6 @@
 | 
			
		|||
#include "soc15_common.h"
 | 
			
		||||
 | 
			
		||||
extern const struct amdgpu_hdp_funcs hdp_v4_0_funcs;
 | 
			
		||||
extern const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs;
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -296,10 +296,12 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev)
 | 
			
		|||
		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 | 
			
		||||
				    PAGE_TABLE_BLOCK_SIZE,
 | 
			
		||||
				    block_size);
 | 
			
		||||
		/* Send no-retry XNACK on fault to suppress VM fault storm. */
 | 
			
		||||
		/* On Aldebaran, XNACK can be enabled in the SQ per-process.
 | 
			
		||||
		 * Retry faults need to be enabled for that to work.
 | 
			
		||||
		 */
 | 
			
		||||
		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 | 
			
		||||
				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
 | 
			
		||||
				    !adev->gmc.noretry);
 | 
			
		||||
				    1);
 | 
			
		||||
		WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL,
 | 
			
		||||
				    i * hub->ctx_distance, tmp);
 | 
			
		||||
		WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
 | 
			
		||||
| 
						 | 
				
			
			@ -1313,12 +1315,31 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
	uint32_t reg_value;
 | 
			
		||||
 | 
			
		||||
	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
 | 
			
		||||
		reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
 | 
			
		||||
			mmhub_v1_7_ea_err_status_regs[i]));
 | 
			
		||||
		reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
 | 
			
		||||
					  CLEAR_ERROR_STATUS, 0x01);
 | 
			
		||||
		WREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]),
 | 
			
		||||
		       reg_value);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = {
 | 
			
		||||
	.ras_late_init = amdgpu_mmhub_ras_late_init,
 | 
			
		||||
	.ras_fini = amdgpu_mmhub_ras_fini,
 | 
			
		||||
	.query_ras_error_count = mmhub_v1_7_query_ras_error_count,
 | 
			
		||||
	.reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
 | 
			
		||||
	.query_ras_error_status = mmhub_v1_7_query_ras_error_status,
 | 
			
		||||
	.reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -569,9 +569,9 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev,
 | 
			
		|||
		return 0;
 | 
			
		||||
 | 
			
		||||
	mmhub_v2_3_update_medium_grain_clock_gating(adev,
 | 
			
		||||
			state == AMD_CG_STATE_GATE ? true : false);
 | 
			
		||||
				state == AMD_CG_STATE_GATE);
 | 
			
		||||
	mmhub_v2_3_update_medium_grain_light_sleep(adev,
 | 
			
		||||
			state == AMD_CG_STATE_GATE ? true : false);
 | 
			
		||||
				state == AMD_CG_STATE_GATE);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -598,7 +598,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev)
 | 
			
		|||
 | 
			
		||||
static void nv_program_aspm(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	if (amdgpu_aspm != 1)
 | 
			
		||||
	if (!amdgpu_aspm)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (!(adev->flags & AMD_IS_APU) &&
 | 
			
		||||
| 
						 | 
				
			
			@ -1068,6 +1068,7 @@ static int nv_common_early_init(void *handle)
 | 
			
		|||
	case CHIP_SIENNA_CICHLID:
 | 
			
		||||
		adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_GFX_CGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_GFX_CGLS |
 | 
			
		||||
			AMD_CG_SUPPORT_GFX_3D_CGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_MC_MGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_VCN_MGCG |
 | 
			
		||||
| 
						 | 
				
			
			@ -1091,6 +1092,7 @@ static int nv_common_early_init(void *handle)
 | 
			
		|||
	case CHIP_NAVY_FLOUNDER:
 | 
			
		||||
		adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_GFX_CGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_GFX_CGLS |
 | 
			
		||||
			AMD_CG_SUPPORT_GFX_3D_CGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_VCN_MGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_JPEG_MGCG |
 | 
			
		||||
| 
						 | 
				
			
			@ -1121,6 +1123,8 @@ static int nv_common_early_init(void *handle)
 | 
			
		|||
			AMD_CG_SUPPORT_MC_LS |
 | 
			
		||||
			AMD_CG_SUPPORT_GFX_FGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_VCN_MGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_SDMA_MGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_SDMA_LS |
 | 
			
		||||
			AMD_CG_SUPPORT_JPEG_MGCG;
 | 
			
		||||
		adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
 | 
			
		||||
			AMD_PG_SUPPORT_VCN |
 | 
			
		||||
| 
						 | 
				
			
			@ -1132,6 +1136,7 @@ static int nv_common_early_init(void *handle)
 | 
			
		|||
	case CHIP_DIMGREY_CAVEFISH:
 | 
			
		||||
		adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_GFX_CGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_GFX_CGLS |
 | 
			
		||||
			AMD_CG_SUPPORT_GFX_3D_CGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_VCN_MGCG |
 | 
			
		||||
			AMD_CG_SUPPORT_JPEG_MGCG |
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -97,7 +97,6 @@ enum psp_gfx_cmd_id
 | 
			
		|||
    GFX_CMD_ID_SETUP_VMR          = 0x00000009,   /* setup VMR region */
 | 
			
		||||
    GFX_CMD_ID_DESTROY_VMR        = 0x0000000A,   /* destroy VMR region */
 | 
			
		||||
    GFX_CMD_ID_PROG_REG           = 0x0000000B,   /* program regs */
 | 
			
		||||
    GFX_CMD_ID_CLEAR_VF_FW        = 0x0000000D,   /* Clear VF FW, to be used on VF shutdown. */
 | 
			
		||||
    GFX_CMD_ID_GET_FW_ATTESTATION = 0x0000000F,   /* Query GPUVA of the Fw Attestation DB */
 | 
			
		||||
    /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */
 | 
			
		||||
    GFX_CMD_ID_LOAD_TOC           = 0x00000020,   /* Load TOC and obtain TMR size */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1109,6 +1109,8 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
 | 
			
		|||
		if (adev->asic_type == CHIP_ARCTURUS &&
 | 
			
		||||
		    adev->sdma.instance[i].fw_version >= 14)
 | 
			
		||||
			WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable);
 | 
			
		||||
		/* Extend page fault timeout to avoid interrupt storm */
 | 
			
		||||
		WREG32_SDMA(i, mmSDMA0_UTCL1_TIMEOUT, 0x00800080);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -2227,7 +2229,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
 | 
			
		|||
	memset(&task_info, 0, sizeof(struct amdgpu_task_info));
 | 
			
		||||
	amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
 | 
			
		||||
 | 
			
		||||
	dev_info(adev->dev,
 | 
			
		||||
	dev_dbg_ratelimited(adev->dev,
 | 
			
		||||
		   "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
 | 
			
		||||
		   "pasid:%u, for process %s pid %d thread %s pid %d\n",
 | 
			
		||||
		   instance, addr, entry->src_id, entry->ring_id, entry->vmid,
 | 
			
		||||
| 
						 | 
				
			
			@ -2240,7 +2242,7 @@ static int sdma_v4_0_process_vm_hole_irq(struct amdgpu_device *adev,
 | 
			
		|||
					      struct amdgpu_irq_src *source,
 | 
			
		||||
					      struct amdgpu_iv_entry *entry)
 | 
			
		||||
{
 | 
			
		||||
	dev_err(adev->dev, "MC or SEM address in VM hole\n");
 | 
			
		||||
	dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n");
 | 
			
		||||
	sdma_v4_0_print_iv_entry(adev, entry);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -2249,7 +2251,7 @@ static int sdma_v4_0_process_doorbell_invalid_irq(struct amdgpu_device *adev,
 | 
			
		|||
					      struct amdgpu_irq_src *source,
 | 
			
		||||
					      struct amdgpu_iv_entry *entry)
 | 
			
		||||
{
 | 
			
		||||
	dev_err(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
 | 
			
		||||
	dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
 | 
			
		||||
	sdma_v4_0_print_iv_entry(adev, entry);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -2258,7 +2260,7 @@ static int sdma_v4_0_process_pool_timeout_irq(struct amdgpu_device *adev,
 | 
			
		|||
					      struct amdgpu_irq_src *source,
 | 
			
		||||
					      struct amdgpu_iv_entry *entry)
 | 
			
		||||
{
 | 
			
		||||
	dev_err(adev->dev,
 | 
			
		||||
	dev_dbg_ratelimited(adev->dev,
 | 
			
		||||
		"Polling register/memory timeout executing POLL_REG/MEM with finite timer\n");
 | 
			
		||||
	sdma_v4_0_print_iv_entry(adev, entry);
 | 
			
		||||
	return 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -2268,7 +2270,7 @@ static int sdma_v4_0_process_srbm_write_irq(struct amdgpu_device *adev,
 | 
			
		|||
					      struct amdgpu_irq_src *source,
 | 
			
		||||
					      struct amdgpu_iv_entry *entry)
 | 
			
		||||
{
 | 
			
		||||
	dev_err(adev->dev,
 | 
			
		||||
	dev_dbg_ratelimited(adev->dev,
 | 
			
		||||
		"SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n");
 | 
			
		||||
	sdma_v4_0_print_iv_entry(adev, entry);
 | 
			
		||||
	return 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -2597,27 +2599,18 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_srbm_write_irq_funcs = {
 | 
			
		|||
 | 
			
		||||
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	adev->sdma.trap_irq.num_types = adev->sdma.num_instances;
 | 
			
		||||
	adev->sdma.ecc_irq.num_types = adev->sdma.num_instances;
 | 
			
		||||
	/*For Arcturus and Aldebaran, add another 4 irq handler*/
 | 
			
		||||
	switch (adev->sdma.num_instances) {
 | 
			
		||||
	case 1:
 | 
			
		||||
		adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
 | 
			
		||||
		adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
 | 
			
		||||
		break;
 | 
			
		||||
	case 5:
 | 
			
		||||
		adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
 | 
			
		||||
		adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
 | 
			
		||||
		break;
 | 
			
		||||
	case 8:
 | 
			
		||||
		adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
 | 
			
		||||
		adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
 | 
			
		||||
		adev->sdma.vm_hole_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
 | 
			
		||||
		adev->sdma.doorbell_invalid_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
 | 
			
		||||
		adev->sdma.pool_timeout_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
 | 
			
		||||
		adev->sdma.srbm_write_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
 | 
			
		||||
		adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances;
 | 
			
		||||
		adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances;
 | 
			
		||||
		adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances;
 | 
			
		||||
		adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances;
 | 
			
		||||
		break;
 | 
			
		||||
	case 2:
 | 
			
		||||
	default:
 | 
			
		||||
		adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
 | 
			
		||||
		adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
	adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -405,18 +405,6 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
 | 
			
		|||
	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 | 
			
		||||
	uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
 | 
			
		||||
 | 
			
		||||
	/* Invalidate L2, because if we don't do it, we might get stale cache
 | 
			
		||||
	 * lines from previous IBs.
 | 
			
		||||
	 */
 | 
			
		||||
	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
 | 
			
		||||
	amdgpu_ring_write(ring, 0);
 | 
			
		||||
	amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV |
 | 
			
		||||
				 SDMA_GCR_GL2_WB |
 | 
			
		||||
				 SDMA_GCR_GLM_INV |
 | 
			
		||||
				 SDMA_GCR_GLM_WB) << 16);
 | 
			
		||||
	amdgpu_ring_write(ring, 0xffffff80);
 | 
			
		||||
	amdgpu_ring_write(ring, 0xffff);
 | 
			
		||||
 | 
			
		||||
	/* An IB packet must end on a 8 DW boundary--the next dword
 | 
			
		||||
	 * must be on a 8-dword boundary. Our IB packet below is 6
 | 
			
		||||
	 * dwords long, thus add x number of NOPs, such that, in
 | 
			
		||||
| 
						 | 
				
			
			@ -437,6 +425,33 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
 | 
			
		|||
	amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
 | 
			
		||||
 *
 | 
			
		||||
 * @ring: amdgpu ring pointer
 | 
			
		||||
 * @job: job to retrieve vmid from
 | 
			
		||||
 * @ib: IB object to schedule
 | 
			
		||||
 *
 | 
			
		||||
 * flush the IB by graphics cache rinse.
 | 
			
		||||
 */
 | 
			
		||||
static void sdma_v5_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
 | 
			
		||||
{
 | 
			
		||||
    uint32_t gcr_cntl =
 | 
			
		||||
		    SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
 | 
			
		||||
			SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
 | 
			
		||||
			SDMA_GCR_GLI_INV(1);
 | 
			
		||||
 | 
			
		||||
	/* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
 | 
			
		||||
	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
 | 
			
		||||
	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
 | 
			
		||||
	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
 | 
			
		||||
			SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
 | 
			
		||||
	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
 | 
			
		||||
			SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
 | 
			
		||||
	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
 | 
			
		||||
			SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -1643,6 +1658,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 | 
			
		|||
		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
 | 
			
		||||
	.emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */
 | 
			
		||||
	.emit_ib = sdma_v5_0_ring_emit_ib,
 | 
			
		||||
	.emit_mem_sync = sdma_v5_0_ring_emit_mem_sync,
 | 
			
		||||
	.emit_fence = sdma_v5_0_ring_emit_fence,
 | 
			
		||||
	.emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,
 | 
			
		||||
	.emit_vm_flush = sdma_v5_0_ring_emit_vm_flush,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1556,6 +1556,10 @@ static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *ade
 | 
			
		|||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < adev->sdma.num_instances; i++) {
 | 
			
		||||
 | 
			
		||||
		if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH)
 | 
			
		||||
			adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG;
 | 
			
		||||
 | 
			
		||||
		if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
 | 
			
		||||
			/* Enable sdma clock gating */
 | 
			
		||||
			def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
 | 
			
		||||
| 
						 | 
				
			
			@ -1589,6 +1593,10 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
 | 
			
		|||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < adev->sdma.num_instances; i++) {
 | 
			
		||||
 | 
			
		||||
		if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH)
 | 
			
		||||
			adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS;
 | 
			
		||||
 | 
			
		||||
		if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
 | 
			
		||||
			/* Enable sdma mem light sleep */
 | 
			
		||||
			def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -88,6 +88,23 @@ static u32 smuio_v13_0_get_die_id(struct amdgpu_device *adev)
 | 
			
		|||
	return die_id;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * smuio_v13_0_get_socket_id - query socket id from FCH
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu device pointer
 | 
			
		||||
 *
 | 
			
		||||
 * Returns socket id
 | 
			
		||||
 */
 | 
			
		||||
static u32 smuio_v13_0_get_socket_id(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	u32 data, socket_id;
 | 
			
		||||
 | 
			
		||||
	data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
 | 
			
		||||
	socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID);
 | 
			
		||||
 | 
			
		||||
	return socket_id;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * smuio_v13_0_supports_host_gpu_xgmi - detect xgmi interface between cpu and gpu/s.
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -115,6 +132,7 @@ const struct amdgpu_smuio_funcs smuio_v13_0_funcs = {
 | 
			
		|||
	.get_rom_index_offset = smuio_v13_0_get_rom_index_offset,
 | 
			
		||||
	.get_rom_data_offset = smuio_v13_0_get_rom_data_offset,
 | 
			
		||||
	.get_die_id = smuio_v13_0_get_die_id,
 | 
			
		||||
	.get_socket_id = smuio_v13_0_get_socket_id,
 | 
			
		||||
	.is_host_gpu_xgmi_supported = smuio_v13_0_is_host_gpu_xgmi_supported,
 | 
			
		||||
	.update_rom_clock_gating = smuio_v13_0_update_rom_clock_gating,
 | 
			
		||||
	.get_clock_gating_state = smuio_v13_0_get_clock_gating_state,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -655,7 +655,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
 | 
			
		|||
	int ret = 0;
 | 
			
		||||
 | 
			
		||||
	/* avoid NBIF got stuck when do RAS recovery in BACO reset */
 | 
			
		||||
	if (ras && ras->supported)
 | 
			
		||||
	if (ras && adev->ras_enabled)
 | 
			
		||||
		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
 | 
			
		||||
 | 
			
		||||
	ret = amdgpu_dpm_baco_reset(adev);
 | 
			
		||||
| 
						 | 
				
			
			@ -663,7 +663,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
 | 
			
		|||
		return ret;
 | 
			
		||||
 | 
			
		||||
	/* re-enable doorbell interrupt after BACO exit */
 | 
			
		||||
	if (ras && ras->supported)
 | 
			
		||||
	if (ras && adev->ras_enabled)
 | 
			
		||||
		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -710,7 +710,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
 | 
			
		|||
		 * 1. PMFW version > 0x284300: all cases use baco
 | 
			
		||||
		 * 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco
 | 
			
		||||
		 */
 | 
			
		||||
		if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400)
 | 
			
		||||
		if (ras && adev->ras_enabled &&
 | 
			
		||||
		    adev->pm.fw_version <= 0x283400)
 | 
			
		||||
			baco_reset = false;
 | 
			
		||||
		break;
 | 
			
		||||
	case CHIP_ALDEBARAN:
 | 
			
		||||
| 
						 | 
				
			
			@ -816,7 +817,7 @@ static void soc15_pcie_gen3_enable(struct amdgpu_device *adev)
 | 
			
		|||
 | 
			
		||||
static void soc15_program_aspm(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	if (amdgpu_aspm != 1)
 | 
			
		||||
	if (!amdgpu_aspm)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (!(adev->flags & AMD_IS_APU) &&
 | 
			
		||||
| 
						 | 
				
			
			@ -1522,9 +1523,6 @@ static int soc15_common_late_init(void *handle)
 | 
			
		|||
	if (amdgpu_sriov_vf(adev))
 | 
			
		||||
		xgpu_ai_mailbox_get_irq(adev);
 | 
			
		||||
 | 
			
		||||
	if (adev->hdp.funcs->reset_ras_error_count)
 | 
			
		||||
		adev->hdp.funcs->reset_ras_error_count(adev);
 | 
			
		||||
 | 
			
		||||
	if (adev->nbio.ras_funcs &&
 | 
			
		||||
	    adev->nbio.ras_funcs->ras_late_init)
 | 
			
		||||
		r = adev->nbio.ras_funcs->ras_late_init(adev);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -105,6 +105,12 @@ struct ta_ras_trigger_error_input {
 | 
			
		|||
	uint64_t		value;			// method if error injection. i.e persistent, coherent etc.
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct ta_ras_init_flags
 | 
			
		||||
{
 | 
			
		||||
    uint8_t     poison_mode_en;
 | 
			
		||||
    uint8_t     dgpu_mode;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct ta_ras_output_flags
 | 
			
		||||
{
 | 
			
		||||
	uint8_t    ras_init_success_flag;
 | 
			
		||||
| 
						 | 
				
			
			@ -115,6 +121,7 @@ struct ta_ras_output_flags
 | 
			
		|||
/* Common input structure for RAS callbacks */
 | 
			
		||||
/**********************************************************/
 | 
			
		||||
union ta_ras_cmd_input {
 | 
			
		||||
	struct ta_ras_init_flags		init_flags;
 | 
			
		||||
	struct ta_ras_enable_features_input	enable_features;
 | 
			
		||||
	struct ta_ras_disable_features_input	disable_features;
 | 
			
		||||
	struct ta_ras_trigger_error_input	trigger_error;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -104,6 +104,7 @@ static int vega10_ih_toggle_ring_interrupts(struct amdgpu_device *adev,
 | 
			
		|||
 | 
			
		||||
	tmp = RREG32(ih_regs->ih_rb_cntl);
 | 
			
		||||
	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
 | 
			
		||||
	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
 | 
			
		||||
	/* enable_intr field is only valid in ring0 */
 | 
			
		||||
	if (ih == &adev->irq.ih)
 | 
			
		||||
		tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1136,7 +1136,7 @@ static void vi_program_aspm(struct amdgpu_device *adev)
 | 
			
		|||
	bool bL1SS = false;
 | 
			
		||||
	bool bClkReqSupport = true;
 | 
			
		||||
 | 
			
		||||
	if (amdgpu_aspm != 1)
 | 
			
		||||
	if (!amdgpu_aspm)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (adev->flags & AMD_IS_APU ||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,3 +12,16 @@ config HSA_AMD
 | 
			
		|||
	select DRM_AMDGPU_USERPTR
 | 
			
		||||
	help
 | 
			
		||||
	  Enable this if you want to use HSA features on AMD GPU devices.
 | 
			
		||||
 | 
			
		||||
config HSA_AMD_SVM
 | 
			
		||||
	bool "Enable HMM-based shared virtual memory manager"
 | 
			
		||||
	depends on HSA_AMD && DEVICE_PRIVATE
 | 
			
		||||
	default y
 | 
			
		||||
	select HMM_MIRROR
 | 
			
		||||
	select MMU_NOTIFIER
 | 
			
		||||
	help
 | 
			
		||||
	  Enable this to use unified memory and managed memory in HIP. This
 | 
			
		||||
	  memory manager supports two modes of operation. One based on
 | 
			
		||||
	  preemptions and one based on page faults. To enable page fault
 | 
			
		||||
	  based memory management on most GFXv9 GPUs, set the module
 | 
			
		||||
	  parameter amdgpu.noretry=0.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -63,3 +63,8 @@ endif
 | 
			
		|||
ifneq ($(CONFIG_DEBUG_FS),)
 | 
			
		||||
AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
ifneq ($(CONFIG_HSA_AMD_SVM),)
 | 
			
		||||
AMDKFD_FILES += $(AMDKFD_PATH)/kfd_svm.o \
 | 
			
		||||
		$(AMDKFD_PATH)/kfd_migrate.o
 | 
			
		||||
endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -38,6 +38,7 @@
 | 
			
		|||
#include "kfd_priv.h"
 | 
			
		||||
#include "kfd_device_queue_manager.h"
 | 
			
		||||
#include "kfd_dbgmgr.h"
 | 
			
		||||
#include "kfd_svm.h"
 | 
			
		||||
#include "amdgpu_amdkfd.h"
 | 
			
		||||
#include "kfd_smi_events.h"
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1297,7 +1298,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
 | 
			
		|||
 | 
			
		||||
	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 | 
			
		||||
		dev->kgd, args->va_addr, args->size,
 | 
			
		||||
		pdd->vm, (struct kgd_mem **) &mem, &offset,
 | 
			
		||||
		pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
 | 
			
		||||
		flags);
 | 
			
		||||
 | 
			
		||||
	if (err)
 | 
			
		||||
| 
						 | 
				
			
			@ -1328,7 +1329,8 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
 | 
			
		|||
	return 0;
 | 
			
		||||
 | 
			
		||||
err_free:
 | 
			
		||||
	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
 | 
			
		||||
	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
 | 
			
		||||
					       pdd->drm_priv, NULL);
 | 
			
		||||
err_unlock:
 | 
			
		||||
	mutex_unlock(&p->mutex);
 | 
			
		||||
	return err;
 | 
			
		||||
| 
						 | 
				
			
			@ -1365,7 +1367,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
 | 
			
		||||
						(struct kgd_mem *)mem, &size);
 | 
			
		||||
				(struct kgd_mem *)mem, pdd->drm_priv, &size);
 | 
			
		||||
 | 
			
		||||
	/* If freeing the buffer failed, leave the handle in place for
 | 
			
		||||
	 * clean-up during process tear-down.
 | 
			
		||||
| 
						 | 
				
			
			@ -1448,7 +1450,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
 | 
			
		|||
			goto get_mem_obj_from_handle_failed;
 | 
			
		||||
		}
 | 
			
		||||
		err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 | 
			
		||||
			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
 | 
			
		||||
			peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
 | 
			
		||||
		if (err) {
 | 
			
		||||
			pr_err("Failed to map to gpu %d/%d\n",
 | 
			
		||||
			       i, args->n_devices);
 | 
			
		||||
| 
						 | 
				
			
			@ -1555,7 +1557,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 | 
			
		|||
			goto get_mem_obj_from_handle_failed;
 | 
			
		||||
		}
 | 
			
		||||
		err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 | 
			
		||||
			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
 | 
			
		||||
			peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
 | 
			
		||||
		if (err) {
 | 
			
		||||
			pr_err("Failed to unmap from gpu %d/%d\n",
 | 
			
		||||
			       i, args->n_devices);
 | 
			
		||||
| 
						 | 
				
			
			@ -1701,7 +1703,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
 | 
			
		||||
					      args->va_addr, pdd->vm,
 | 
			
		||||
					      args->va_addr, pdd->drm_priv,
 | 
			
		||||
					      (struct kgd_mem **)&mem, &size,
 | 
			
		||||
					      NULL);
 | 
			
		||||
	if (r)
 | 
			
		||||
| 
						 | 
				
			
			@ -1721,7 +1723,8 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
 | 
			
		|||
	return 0;
 | 
			
		||||
 | 
			
		||||
err_free:
 | 
			
		||||
	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
 | 
			
		||||
	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
 | 
			
		||||
					       pdd->drm_priv, NULL);
 | 
			
		||||
err_unlock:
 | 
			
		||||
	mutex_unlock(&p->mutex);
 | 
			
		||||
	dma_buf_put(dmabuf);
 | 
			
		||||
| 
						 | 
				
			
			@ -1742,6 +1745,64 @@ static int kfd_ioctl_smi_events(struct file *filep,
 | 
			
		|||
	return kfd_smi_event_open(dev, &args->anon_fd);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int kfd_ioctl_set_xnack_mode(struct file *filep,
 | 
			
		||||
				    struct kfd_process *p, void *data)
 | 
			
		||||
{
 | 
			
		||||
	struct kfd_ioctl_set_xnack_mode_args *args = data;
 | 
			
		||||
	int r = 0;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&p->mutex);
 | 
			
		||||
	if (args->xnack_enabled >= 0) {
 | 
			
		||||
		if (!list_empty(&p->pqm.queues)) {
 | 
			
		||||
			pr_debug("Process has user queues running\n");
 | 
			
		||||
			mutex_unlock(&p->mutex);
 | 
			
		||||
			return -EBUSY;
 | 
			
		||||
		}
 | 
			
		||||
		if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
 | 
			
		||||
			r = -EPERM;
 | 
			
		||||
		else
 | 
			
		||||
			p->xnack_enabled = args->xnack_enabled;
 | 
			
		||||
	} else {
 | 
			
		||||
		args->xnack_enabled = p->xnack_enabled;
 | 
			
		||||
	}
 | 
			
		||||
	mutex_unlock(&p->mutex);
 | 
			
		||||
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
 | 
			
		||||
static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
 | 
			
		||||
{
 | 
			
		||||
	struct kfd_ioctl_svm_args *args = data;
 | 
			
		||||
	int r = 0;
 | 
			
		||||
 | 
			
		||||
	if (p->svm_disabled)
 | 
			
		||||
		return -EPERM;
 | 
			
		||||
 | 
			
		||||
	pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
 | 
			
		||||
		 args->start_addr, args->size, args->op, args->nattr);
 | 
			
		||||
 | 
			
		||||
	if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	if (!args->start_addr || !args->size)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&p->mutex);
 | 
			
		||||
 | 
			
		||||
	r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
 | 
			
		||||
		      args->attrs);
 | 
			
		||||
 | 
			
		||||
	mutex_unlock(&p->mutex);
 | 
			
		||||
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
 | 
			
		||||
{
 | 
			
		||||
	return -EPERM;
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
 | 
			
		||||
	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
 | 
			
		||||
			    .cmd_drv = 0, .name = #ioctl}
 | 
			
		||||
| 
						 | 
				
			
			@ -1840,6 +1901,11 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 | 
			
		|||
 | 
			
		||||
	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
 | 
			
		||||
			kfd_ioctl_smi_events, 0),
 | 
			
		||||
 | 
			
		||||
	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
 | 
			
		||||
 | 
			
		||||
	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
 | 
			
		||||
			kfd_ioctl_set_xnack_mode, 0),
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -55,7 +55,7 @@ struct kfd_gpu_cache_info {
 | 
			
		|||
	uint32_t	cache_level;
 | 
			
		||||
	uint32_t	flags;
 | 
			
		||||
	/* Indicates how many Compute Units share this cache
 | 
			
		||||
	 * Value = 1 indicates the cache is not shared
 | 
			
		||||
	 * within a SA. Value = 1 indicates the cache is not shared
 | 
			
		||||
	 */
 | 
			
		||||
	uint32_t	num_cu_shared;
 | 
			
		||||
};
 | 
			
		||||
| 
						 | 
				
			
			@ -69,7 +69,6 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
 | 
			
		|||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 1,
 | 
			
		||||
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Instruction Cache (in SQC module) per bank */
 | 
			
		||||
| 
						 | 
				
			
			@ -126,9 +125,6 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
 | 
			
		|||
	/* TODO: Add L2 Cache information */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* NOTE: In future if more information is added to struct kfd_gpu_cache_info
 | 
			
		||||
 * the following ASICs may need a separate table.
 | 
			
		||||
 */
 | 
			
		||||
#define hawaii_cache_info kaveri_cache_info
 | 
			
		||||
#define tonga_cache_info carrizo_cache_info
 | 
			
		||||
#define fiji_cache_info  carrizo_cache_info
 | 
			
		||||
| 
						 | 
				
			
			@ -136,13 +132,562 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
 | 
			
		|||
#define polaris11_cache_info carrizo_cache_info
 | 
			
		||||
#define polaris12_cache_info carrizo_cache_info
 | 
			
		||||
#define vegam_cache_info carrizo_cache_info
 | 
			
		||||
/* TODO - check & update Vega10 cache details */
 | 
			
		||||
#define vega10_cache_info carrizo_cache_info
 | 
			
		||||
#define raven_cache_info carrizo_cache_info
 | 
			
		||||
#define renoir_cache_info carrizo_cache_info
 | 
			
		||||
/* TODO - check & update Navi10 cache details */
 | 
			
		||||
#define navi10_cache_info carrizo_cache_info
 | 
			
		||||
#define vangogh_cache_info carrizo_cache_info
 | 
			
		||||
 | 
			
		||||
/* NOTE: L1 cache information has been updated and L2/L3
 | 
			
		||||
 * cache information has been added for Vega10 and
 | 
			
		||||
 * newer ASICs. The unit for cache_size is KiB.
 | 
			
		||||
 * In future,  check & update cache details
 | 
			
		||||
 * for every new ASIC is required.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
static struct kfd_gpu_cache_info vega10_cache_info[] = {
 | 
			
		||||
	{
 | 
			
		||||
		/* TCP L1 Cache per CU */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 1,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Instruction Cache per SQC */
 | 
			
		||||
		.cache_size = 32,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_INST_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 3,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Data Cache per SQC */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 3,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L2 Data Cache per GPU (Total Tex Cache) */
 | 
			
		||||
		.cache_size = 4096,
 | 
			
		||||
		.cache_level = 2,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 16,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct kfd_gpu_cache_info raven_cache_info[] = {
 | 
			
		||||
	{
 | 
			
		||||
		/* TCP L1 Cache per CU */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 1,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Instruction Cache per SQC */
 | 
			
		||||
		.cache_size = 32,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_INST_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 3,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Data Cache per SQC */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 3,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L2 Data Cache per GPU (Total Tex Cache) */
 | 
			
		||||
		.cache_size = 1024,
 | 
			
		||||
		.cache_level = 2,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 11,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct kfd_gpu_cache_info renoir_cache_info[] = {
 | 
			
		||||
	{
 | 
			
		||||
		/* TCP L1 Cache per CU */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 1,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Instruction Cache per SQC */
 | 
			
		||||
		.cache_size = 32,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_INST_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 3,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Data Cache per SQC */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 3,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L2 Data Cache per GPU (Total Tex Cache) */
 | 
			
		||||
		.cache_size = 1024,
 | 
			
		||||
		.cache_level = 2,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 8,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct kfd_gpu_cache_info vega12_cache_info[] = {
 | 
			
		||||
	{
 | 
			
		||||
		/* TCP L1 Cache per CU */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 1,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Instruction Cache per SQC */
 | 
			
		||||
		.cache_size = 32,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_INST_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 3,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Data Cache per SQC */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 3,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L2 Data Cache per GPU (Total Tex Cache) */
 | 
			
		||||
		.cache_size = 2048,
 | 
			
		||||
		.cache_level = 2,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 5,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct kfd_gpu_cache_info vega20_cache_info[] = {
 | 
			
		||||
	{
 | 
			
		||||
		/* TCP L1 Cache per CU */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 1,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Instruction Cache per SQC */
 | 
			
		||||
		.cache_size = 32,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_INST_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 3,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Data Cache per SQC */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 3,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L2 Data Cache per GPU (Total Tex Cache) */
 | 
			
		||||
		.cache_size = 8192,
 | 
			
		||||
		.cache_level = 2,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 16,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
 | 
			
		||||
	{
 | 
			
		||||
		/* TCP L1 Cache per CU */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 1,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Instruction Cache per SQC */
 | 
			
		||||
		.cache_size = 32,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_INST_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 2,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Data Cache per SQC */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 2,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L2 Data Cache per GPU (Total Tex Cache) */
 | 
			
		||||
		.cache_size = 8192,
 | 
			
		||||
		.cache_level = 2,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 14,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct kfd_gpu_cache_info navi10_cache_info[] = {
 | 
			
		||||
	{
 | 
			
		||||
		/* TCP L1 Cache per CU */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 1,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Instruction Cache per SQC */
 | 
			
		||||
		.cache_size = 32,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_INST_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 2,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Data Cache per SQC */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 2,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* GL1 Data Cache per SA */
 | 
			
		||||
		.cache_size = 128,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 10,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L2 Data Cache per GPU (Total Tex Cache) */
 | 
			
		||||
		.cache_size = 4096,
 | 
			
		||||
		.cache_level = 2,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 10,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct kfd_gpu_cache_info vangogh_cache_info[] = {
 | 
			
		||||
	{
 | 
			
		||||
		/* TCP L1 Cache per CU */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 1,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Instruction Cache per SQC */
 | 
			
		||||
		.cache_size = 32,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_INST_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 2,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Data Cache per SQC */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 2,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* GL1 Data Cache per SA */
 | 
			
		||||
		.cache_size = 128,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 8,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L2 Data Cache per GPU (Total Tex Cache) */
 | 
			
		||||
		.cache_size = 1024,
 | 
			
		||||
		.cache_level = 2,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 8,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct kfd_gpu_cache_info navi14_cache_info[] = {
 | 
			
		||||
	{
 | 
			
		||||
		/* TCP L1 Cache per CU */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 1,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Instruction Cache per SQC */
 | 
			
		||||
		.cache_size = 32,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_INST_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 2,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Data Cache per SQC */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 2,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* GL1 Data Cache per SA */
 | 
			
		||||
		.cache_size = 128,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 12,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L2 Data Cache per GPU (Total Tex Cache) */
 | 
			
		||||
		.cache_size = 2048,
 | 
			
		||||
		.cache_level = 2,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 12,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
 | 
			
		||||
	{
 | 
			
		||||
		/* TCP L1 Cache per CU */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 1,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Instruction Cache per SQC */
 | 
			
		||||
		.cache_size = 32,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_INST_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 2,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Data Cache per SQC */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 2,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* GL1 Data Cache per SA */
 | 
			
		||||
		.cache_size = 128,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 10,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L2 Data Cache per GPU (Total Tex Cache) */
 | 
			
		||||
		.cache_size = 4096,
 | 
			
		||||
		.cache_level = 2,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 10,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L3 Data Cache per GPU */
 | 
			
		||||
		.cache_size = 128*1024,
 | 
			
		||||
		.cache_level = 3,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 10,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
 | 
			
		||||
	{
 | 
			
		||||
		/* TCP L1 Cache per CU */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 1,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Instruction Cache per SQC */
 | 
			
		||||
		.cache_size = 32,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_INST_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 2,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Data Cache per SQC */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 2,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* GL1 Data Cache per SA */
 | 
			
		||||
		.cache_size = 128,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 10,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L2 Data Cache per GPU (Total Tex Cache) */
 | 
			
		||||
		.cache_size = 3072,
 | 
			
		||||
		.cache_level = 2,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 10,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L3 Data Cache per GPU */
 | 
			
		||||
		.cache_size = 96*1024,
 | 
			
		||||
		.cache_level = 3,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 10,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
 | 
			
		||||
	{
 | 
			
		||||
		/* TCP L1 Cache per CU */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 1,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Instruction Cache per SQC */
 | 
			
		||||
		.cache_size = 32,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_INST_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 2,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* Scalar L1 Data Cache per SQC */
 | 
			
		||||
		.cache_size = 16,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 2,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* GL1 Data Cache per SA */
 | 
			
		||||
		.cache_size = 128,
 | 
			
		||||
		.cache_level = 1,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 8,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L2 Data Cache per GPU (Total Tex Cache) */
 | 
			
		||||
		.cache_size = 2048,
 | 
			
		||||
		.cache_level = 2,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 8,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		/* L3 Data Cache per GPU */
 | 
			
		||||
		.cache_size = 32*1024,
 | 
			
		||||
		.cache_level = 3,
 | 
			
		||||
		.flags = (CRAT_CACHE_FLAGS_ENABLED |
 | 
			
		||||
				CRAT_CACHE_FLAGS_DATA_CACHE |
 | 
			
		||||
				CRAT_CACHE_FLAGS_SIMD_CACHE),
 | 
			
		||||
		.num_cu_shared = 8,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
 | 
			
		||||
		struct crat_subtype_computeunit *cu)
 | 
			
		||||
| 
						 | 
				
			
			@ -544,7 +1089,7 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
 | 
			
		||||
static int fill_in_pcache(struct crat_subtype_cache *pcache,
 | 
			
		||||
static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
 | 
			
		||||
				struct kfd_gpu_cache_info *pcache_info,
 | 
			
		||||
				struct kfd_cu_info *cu_info,
 | 
			
		||||
				int mem_available,
 | 
			
		||||
| 
						 | 
				
			
			@ -597,6 +1142,70 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache,
 | 
			
		|||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
 | 
			
		||||
static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
 | 
			
		||||
				struct kfd_gpu_cache_info *pcache_info,
 | 
			
		||||
				struct kfd_cu_info *cu_info,
 | 
			
		||||
				int mem_available,
 | 
			
		||||
				int cache_type, unsigned int cu_processor_id)
 | 
			
		||||
{
 | 
			
		||||
	unsigned int cu_sibling_map_mask;
 | 
			
		||||
	int first_active_cu;
 | 
			
		||||
	int i, j, k;
 | 
			
		||||
 | 
			
		||||
	/* First check if enough memory is available */
 | 
			
		||||
	if (sizeof(struct crat_subtype_cache) > mem_available)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
 | 
			
		||||
	cu_sibling_map_mask &=
 | 
			
		||||
		((1 << pcache_info[cache_type].num_cu_shared) - 1);
 | 
			
		||||
	first_active_cu = ffs(cu_sibling_map_mask);
 | 
			
		||||
 | 
			
		||||
	/* CU could be inactive. In case of shared cache find the first active
 | 
			
		||||
	 * CU. and incase of non-shared cache check if the CU is inactive. If
 | 
			
		||||
	 * inactive active skip it
 | 
			
		||||
	 */
 | 
			
		||||
	if (first_active_cu) {
 | 
			
		||||
		memset(pcache, 0, sizeof(struct crat_subtype_cache));
 | 
			
		||||
		pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
 | 
			
		||||
		pcache->length = sizeof(struct crat_subtype_cache);
 | 
			
		||||
		pcache->flags = pcache_info[cache_type].flags;
 | 
			
		||||
		pcache->processor_id_low = cu_processor_id
 | 
			
		||||
					 + (first_active_cu - 1);
 | 
			
		||||
		pcache->cache_level = pcache_info[cache_type].cache_level;
 | 
			
		||||
		pcache->cache_size = pcache_info[cache_type].cache_size;
 | 
			
		||||
 | 
			
		||||
		/* Sibling map is w.r.t processor_id_low, so shift out
 | 
			
		||||
		 * inactive CU
 | 
			
		||||
		 */
 | 
			
		||||
		cu_sibling_map_mask =
 | 
			
		||||
			cu_sibling_map_mask >> (first_active_cu - 1);
 | 
			
		||||
		k = 0;
 | 
			
		||||
		for (i = 0; i < cu_info->num_shader_engines; i++) {
 | 
			
		||||
			for (j = 0; j < cu_info->num_shader_arrays_per_engine;
 | 
			
		||||
				j++) {
 | 
			
		||||
				pcache->sibling_map[k] =
 | 
			
		||||
				 (uint8_t)(cu_sibling_map_mask & 0xFF);
 | 
			
		||||
				pcache->sibling_map[k+1] =
 | 
			
		||||
				 (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
 | 
			
		||||
				pcache->sibling_map[k+2] =
 | 
			
		||||
				 (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
 | 
			
		||||
				pcache->sibling_map[k+3] =
 | 
			
		||||
				 (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
 | 
			
		||||
				k += 4;
 | 
			
		||||
				cu_sibling_map_mask =
 | 
			
		||||
					cu_info->cu_bitmap[i % 4][j + i / 4];
 | 
			
		||||
				cu_sibling_map_mask &= (
 | 
			
		||||
				 (1 << pcache_info[cache_type].num_cu_shared)
 | 
			
		||||
				 - 1);
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
 | 
			
		||||
 * tables
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -624,6 +1233,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
 | 
			
		|||
	int mem_available = available_size;
 | 
			
		||||
	unsigned int cu_processor_id;
 | 
			
		||||
	int ret;
 | 
			
		||||
	unsigned int num_cu_shared;
 | 
			
		||||
 | 
			
		||||
	switch (kdev->device_info->asic_family) {
 | 
			
		||||
	case CHIP_KAVERI:
 | 
			
		||||
| 
						 | 
				
			
			@ -663,13 +1273,22 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
 | 
			
		|||
		num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
 | 
			
		||||
		break;
 | 
			
		||||
	case CHIP_VEGA10:
 | 
			
		||||
	case CHIP_VEGA12:
 | 
			
		||||
	case CHIP_VEGA20:
 | 
			
		||||
	case CHIP_ARCTURUS:
 | 
			
		||||
	case CHIP_ALDEBARAN:
 | 
			
		||||
		pcache_info = vega10_cache_info;
 | 
			
		||||
		num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
 | 
			
		||||
		break;
 | 
			
		||||
	case CHIP_VEGA12:
 | 
			
		||||
		pcache_info = vega12_cache_info;
 | 
			
		||||
		num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
 | 
			
		||||
		break;
 | 
			
		||||
	case CHIP_VEGA20:
 | 
			
		||||
	case CHIP_ARCTURUS:
 | 
			
		||||
		pcache_info = vega20_cache_info;
 | 
			
		||||
		num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
 | 
			
		||||
		break;
 | 
			
		||||
	case CHIP_ALDEBARAN:
 | 
			
		||||
		pcache_info = aldebaran_cache_info;
 | 
			
		||||
		num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
 | 
			
		||||
		break;
 | 
			
		||||
	case CHIP_RAVEN:
 | 
			
		||||
		pcache_info = raven_cache_info;
 | 
			
		||||
		num_of_cache_types = ARRAY_SIZE(raven_cache_info);
 | 
			
		||||
| 
						 | 
				
			
			@ -680,13 +1299,25 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
 | 
			
		|||
		break;
 | 
			
		||||
	case CHIP_NAVI10:
 | 
			
		||||
	case CHIP_NAVI12:
 | 
			
		||||
	case CHIP_NAVI14:
 | 
			
		||||
	case CHIP_SIENNA_CICHLID:
 | 
			
		||||
	case CHIP_NAVY_FLOUNDER:
 | 
			
		||||
	case CHIP_DIMGREY_CAVEFISH:
 | 
			
		||||
		pcache_info = navi10_cache_info;
 | 
			
		||||
		num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
 | 
			
		||||
		break;
 | 
			
		||||
	case CHIP_NAVI14:
 | 
			
		||||
		pcache_info = navi14_cache_info;
 | 
			
		||||
		num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
 | 
			
		||||
		break;
 | 
			
		||||
	case CHIP_SIENNA_CICHLID:
 | 
			
		||||
		pcache_info = sienna_cichlid_cache_info;
 | 
			
		||||
		num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
 | 
			
		||||
		break;
 | 
			
		||||
	case CHIP_NAVY_FLOUNDER:
 | 
			
		||||
		pcache_info = navy_flounder_cache_info;
 | 
			
		||||
		num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
 | 
			
		||||
		break;
 | 
			
		||||
	case CHIP_DIMGREY_CAVEFISH:
 | 
			
		||||
		pcache_info = dimgrey_cavefish_cache_info;
 | 
			
		||||
		num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
 | 
			
		||||
		break;
 | 
			
		||||
	case CHIP_VANGOGH:
 | 
			
		||||
		pcache_info = vangogh_cache_info;
 | 
			
		||||
		num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
 | 
			
		||||
| 
						 | 
				
			
			@ -709,40 +1340,58 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
 | 
			
		|||
	 */
 | 
			
		||||
 | 
			
		||||
	for (ct = 0; ct < num_of_cache_types; ct++) {
 | 
			
		||||
		cu_processor_id = gpu_processor_id;
 | 
			
		||||
		for (i = 0; i < cu_info->num_shader_engines; i++) {
 | 
			
		||||
			for (j = 0; j < cu_info->num_shader_arrays_per_engine;
 | 
			
		||||
				j++) {
 | 
			
		||||
				for (k = 0; k < cu_info->num_cu_per_sh;
 | 
			
		||||
					k += pcache_info[ct].num_cu_shared) {
 | 
			
		||||
	  cu_processor_id = gpu_processor_id;
 | 
			
		||||
	  if (pcache_info[ct].cache_level == 1) {
 | 
			
		||||
	    for (i = 0; i < cu_info->num_shader_engines; i++) {
 | 
			
		||||
	      for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
 | 
			
		||||
	        for (k = 0; k < cu_info->num_cu_per_sh;
 | 
			
		||||
		  k += pcache_info[ct].num_cu_shared) {
 | 
			
		||||
		  ret = fill_in_l1_pcache(pcache,
 | 
			
		||||
					pcache_info,
 | 
			
		||||
					cu_info,
 | 
			
		||||
					mem_available,
 | 
			
		||||
					cu_info->cu_bitmap[i % 4][j + i / 4],
 | 
			
		||||
					ct,
 | 
			
		||||
					cu_processor_id,
 | 
			
		||||
					k);
 | 
			
		||||
 | 
			
		||||
					ret = fill_in_pcache(pcache,
 | 
			
		||||
						pcache_info,
 | 
			
		||||
						cu_info,
 | 
			
		||||
						mem_available,
 | 
			
		||||
						cu_info->cu_bitmap[i % 4][j + i / 4],
 | 
			
		||||
						ct,
 | 
			
		||||
						cu_processor_id,
 | 
			
		||||
						k);
 | 
			
		||||
		  if (ret < 0)
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
					if (ret < 0)
 | 
			
		||||
						break;
 | 
			
		||||
		  if (!ret) {
 | 
			
		||||
				pcache++;
 | 
			
		||||
				(*num_of_entries)++;
 | 
			
		||||
				mem_available -= sizeof(*pcache);
 | 
			
		||||
				(*size_filled) += sizeof(*pcache);
 | 
			
		||||
		  }
 | 
			
		||||
 | 
			
		||||
					if (!ret) {
 | 
			
		||||
						pcache++;
 | 
			
		||||
						(*num_of_entries)++;
 | 
			
		||||
						mem_available -=
 | 
			
		||||
							sizeof(*pcache);
 | 
			
		||||
						(*size_filled) +=
 | 
			
		||||
							sizeof(*pcache);
 | 
			
		||||
					}
 | 
			
		||||
 | 
			
		||||
					/* Move to next CU block */
 | 
			
		||||
					cu_processor_id +=
 | 
			
		||||
						pcache_info[ct].num_cu_shared;
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		  /* Move to next CU block */
 | 
			
		||||
		  num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
 | 
			
		||||
					cu_info->num_cu_per_sh) ?
 | 
			
		||||
					pcache_info[ct].num_cu_shared :
 | 
			
		||||
					(cu_info->num_cu_per_sh - k);
 | 
			
		||||
		  cu_processor_id += num_cu_shared;
 | 
			
		||||
		}
 | 
			
		||||
	      }
 | 
			
		||||
	    }
 | 
			
		||||
	  } else {
 | 
			
		||||
			ret = fill_in_l2_l3_pcache(pcache,
 | 
			
		||||
				pcache_info,
 | 
			
		||||
				cu_info,
 | 
			
		||||
				mem_available,
 | 
			
		||||
				ct,
 | 
			
		||||
				cu_processor_id);
 | 
			
		||||
 | 
			
		||||
			if (ret < 0)
 | 
			
		||||
				break;
 | 
			
		||||
 | 
			
		||||
			if (!ret) {
 | 
			
		||||
				pcache++;
 | 
			
		||||
				(*num_of_entries)++;
 | 
			
		||||
				mem_available -= sizeof(*pcache);
 | 
			
		||||
				(*size_filled) += sizeof(*pcache);
 | 
			
		||||
			}
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
 | 
			
		||||
| 
						 | 
				
			
			@ -1100,6 +1749,92 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
 | 
			
		|||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_ACPI_NUMA
 | 
			
		||||
static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
 | 
			
		||||
{
 | 
			
		||||
	struct acpi_table_header *table_header = NULL;
 | 
			
		||||
	struct acpi_subtable_header *sub_header = NULL;
 | 
			
		||||
	unsigned long table_end, subtable_len;
 | 
			
		||||
	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
 | 
			
		||||
			pci_dev_id(kdev->pdev);
 | 
			
		||||
	u32 bdf;
 | 
			
		||||
	acpi_status status;
 | 
			
		||||
	struct acpi_srat_cpu_affinity *cpu;
 | 
			
		||||
	struct acpi_srat_generic_affinity *gpu;
 | 
			
		||||
	int pxm = 0, max_pxm = 0;
 | 
			
		||||
	int numa_node = NUMA_NO_NODE;
 | 
			
		||||
	bool found = false;
 | 
			
		||||
 | 
			
		||||
	/* Fetch the SRAT table from ACPI */
 | 
			
		||||
	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
 | 
			
		||||
	if (status == AE_NOT_FOUND) {
 | 
			
		||||
		pr_warn("SRAT table not found\n");
 | 
			
		||||
		return;
 | 
			
		||||
	} else if (ACPI_FAILURE(status)) {
 | 
			
		||||
		const char *err = acpi_format_exception(status);
 | 
			
		||||
		pr_err("SRAT table error: %s\n", err);
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	table_end = (unsigned long)table_header + table_header->length;
 | 
			
		||||
 | 
			
		||||
	/* Parse all entries looking for a match. */
 | 
			
		||||
	sub_header = (struct acpi_subtable_header *)
 | 
			
		||||
			((unsigned long)table_header +
 | 
			
		||||
			sizeof(struct acpi_table_srat));
 | 
			
		||||
	subtable_len = sub_header->length;
 | 
			
		||||
 | 
			
		||||
	while (((unsigned long)sub_header) + subtable_len  < table_end) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * If length is 0, break from this loop to avoid
 | 
			
		||||
		 * infinite loop.
 | 
			
		||||
		 */
 | 
			
		||||
		if (subtable_len == 0) {
 | 
			
		||||
			pr_err("SRAT invalid zero length\n");
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		switch (sub_header->type) {
 | 
			
		||||
		case ACPI_SRAT_TYPE_CPU_AFFINITY:
 | 
			
		||||
			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
 | 
			
		||||
			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
 | 
			
		||||
					cpu->proximity_domain_lo;
 | 
			
		||||
			if (pxm > max_pxm)
 | 
			
		||||
				max_pxm = pxm;
 | 
			
		||||
			break;
 | 
			
		||||
		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
 | 
			
		||||
			gpu = (struct acpi_srat_generic_affinity *)sub_header;
 | 
			
		||||
			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
 | 
			
		||||
					*((u16 *)(&gpu->device_handle[2]));
 | 
			
		||||
			if (bdf == pci_id) {
 | 
			
		||||
				found = true;
 | 
			
		||||
				numa_node = pxm_to_node(gpu->proximity_domain);
 | 
			
		||||
			}
 | 
			
		||||
			break;
 | 
			
		||||
		default:
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (found)
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		sub_header = (struct acpi_subtable_header *)
 | 
			
		||||
				((unsigned long)sub_header + subtable_len);
 | 
			
		||||
		subtable_len = sub_header->length;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	acpi_put_table(table_header);
 | 
			
		||||
 | 
			
		||||
	/* Workaround bad cpu-gpu binding case */
 | 
			
		||||
	if (found && (numa_node < 0 ||
 | 
			
		||||
			numa_node > pxm_to_node(max_pxm)))
 | 
			
		||||
		numa_node = 0;
 | 
			
		||||
 | 
			
		||||
	if (numa_node != NUMA_NO_NODE)
 | 
			
		||||
		set_dev_node(&kdev->pdev->dev, numa_node);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
 | 
			
		||||
 * to its NUMA node
 | 
			
		||||
 *	@avail_size: Available size in the memory
 | 
			
		||||
| 
						 | 
				
			
			@ -1140,11 +1875,17 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
 | 
			
		|||
		 */
 | 
			
		||||
		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
 | 
			
		||||
		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
 | 
			
		||||
		sub_type_hdr->num_hops_xgmi = 1;
 | 
			
		||||
	} else {
 | 
			
		||||
		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	sub_type_hdr->proximity_domain_from = proximity_domain;
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_ACPI_NUMA
 | 
			
		||||
	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
 | 
			
		||||
		kfd_find_numa_node_in_srat(kdev);
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef CONFIG_NUMA
 | 
			
		||||
	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
 | 
			
		||||
		sub_type_hdr->proximity_domain_to = 0;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -26,10 +26,12 @@
 | 
			
		|||
#include "kfd_priv.h"
 | 
			
		||||
#include "kfd_device_queue_manager.h"
 | 
			
		||||
#include "kfd_pm4_headers_vi.h"
 | 
			
		||||
#include "kfd_pm4_headers_aldebaran.h"
 | 
			
		||||
#include "cwsr_trap_handler.h"
 | 
			
		||||
#include "kfd_iommu.h"
 | 
			
		||||
#include "amdgpu_amdkfd.h"
 | 
			
		||||
#include "kfd_smi_events.h"
 | 
			
		||||
#include "kfd_migrate.h"
 | 
			
		||||
 | 
			
		||||
#define MQD_SIZE_ALIGNED 768
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -576,7 +578,7 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = {
 | 
			
		|||
	[CHIP_VEGA20] = {&vega20_device_info, NULL},
 | 
			
		||||
	[CHIP_RENOIR] = {&renoir_device_info, NULL},
 | 
			
		||||
	[CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info},
 | 
			
		||||
	[CHIP_ALDEBARAN] = {&aldebaran_device_info, NULL},
 | 
			
		||||
	[CHIP_ALDEBARAN] = {&aldebaran_device_info, &aldebaran_device_info},
 | 
			
		||||
	[CHIP_NAVI10] = {&navi10_device_info, NULL},
 | 
			
		||||
	[CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
 | 
			
		||||
	[CHIP_NAVI14] = {&navi14_device_info, NULL},
 | 
			
		||||
| 
						 | 
				
			
			@ -697,7 +699,9 @@ static int kfd_gws_init(struct kfd_dev *kfd)
 | 
			
		|||
			&& kfd->device_info->asic_family <= CHIP_RAVEN
 | 
			
		||||
			&& kfd->mec2_fw_version >= 0x1b3)
 | 
			
		||||
		|| (kfd->device_info->asic_family == CHIP_ARCTURUS
 | 
			
		||||
			&& kfd->mec2_fw_version >= 0x30))
 | 
			
		||||
			&& kfd->mec2_fw_version >= 0x30)
 | 
			
		||||
		|| (kfd->device_info->asic_family == CHIP_ALDEBARAN
 | 
			
		||||
			&& kfd->mec2_fw_version >= 0x28))
 | 
			
		||||
		ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
 | 
			
		||||
				amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -713,7 +717,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 | 
			
		|||
			 struct drm_device *ddev,
 | 
			
		||||
			 const struct kgd2kfd_shared_resources *gpu_resources)
 | 
			
		||||
{
 | 
			
		||||
	unsigned int size;
 | 
			
		||||
	unsigned int size, map_process_packet_size;
 | 
			
		||||
 | 
			
		||||
	kfd->ddev = ddev;
 | 
			
		||||
	kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
 | 
			
		||||
| 
						 | 
				
			
			@ -748,7 +752,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 | 
			
		|||
	 * calculate max size of runlist packet.
 | 
			
		||||
	 * There can be only 2 packets at once
 | 
			
		||||
	 */
 | 
			
		||||
	size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) +
 | 
			
		||||
	map_process_packet_size =
 | 
			
		||||
			kfd->device_info->asic_family == CHIP_ALDEBARAN ?
 | 
			
		||||
				sizeof(struct pm4_mes_map_process_aldebaran) :
 | 
			
		||||
					sizeof(struct pm4_mes_map_process);
 | 
			
		||||
	size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
 | 
			
		||||
		max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
 | 
			
		||||
		+ sizeof(struct pm4_mes_runlist)) * 2;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -814,6 +822,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 | 
			
		|||
 | 
			
		||||
	kfd_cwsr_init(kfd);
 | 
			
		||||
 | 
			
		||||
	svm_migrate_init((struct amdgpu_device *)kfd->kgd);
 | 
			
		||||
 | 
			
		||||
	if (kfd_resume(kfd))
 | 
			
		||||
		goto kfd_resume_error;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -862,6 +872,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
 | 
			
		|||
{
 | 
			
		||||
	if (kfd->init_complete) {
 | 
			
		||||
		kgd2kfd_suspend(kfd, false);
 | 
			
		||||
		svm_migrate_fini((struct amdgpu_device *)kfd->kgd);
 | 
			
		||||
		device_queue_manager_uninit(kfd->dqm);
 | 
			
		||||
		kfd_interrupt_exit(kfd);
 | 
			
		||||
		kfd_topology_remove_device(kfd);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -738,7 +738,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 | 
			
		|||
 | 
			
		||||
	pdd = qpd_to_pdd(qpd);
 | 
			
		||||
	/* Retrieve PD base */
 | 
			
		||||
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
 | 
			
		||||
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
 | 
			
		||||
 | 
			
		||||
	dqm_lock(dqm);
 | 
			
		||||
	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
 | 
			
		||||
| 
						 | 
				
			
			@ -821,7 +821,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
 | 
			
		|||
 | 
			
		||||
	pdd = qpd_to_pdd(qpd);
 | 
			
		||||
	/* Retrieve PD base */
 | 
			
		||||
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
 | 
			
		||||
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
 | 
			
		||||
 | 
			
		||||
	dqm_lock(dqm);
 | 
			
		||||
	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
 | 
			
		||||
| 
						 | 
				
			
			@ -873,7 +873,7 @@ static int register_process(struct device_queue_manager *dqm,
 | 
			
		|||
 | 
			
		||||
	pdd = qpd_to_pdd(qpd);
 | 
			
		||||
	/* Retrieve PD base */
 | 
			
		||||
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
 | 
			
		||||
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
 | 
			
		||||
 | 
			
		||||
	dqm_lock(dqm);
 | 
			
		||||
	list_add(&n->list, &dqm->queues);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -61,10 +61,19 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
 | 
			
		|||
		qpd->sh_mem_config =
 | 
			
		||||
				SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
 | 
			
		||||
					SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
 | 
			
		||||
		if (dqm->dev->noretry &&
 | 
			
		||||
		    !dqm->dev->use_iommu_v2)
 | 
			
		||||
 | 
			
		||||
		if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) {
 | 
			
		||||
			/* Aldebaran can safely support different XNACK modes
 | 
			
		||||
			 * per process
 | 
			
		||||
			 */
 | 
			
		||||
			if (!pdd->process->xnack_enabled)
 | 
			
		||||
				qpd->sh_mem_config |=
 | 
			
		||||
					1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
 | 
			
		||||
		} else if (dqm->dev->noretry &&
 | 
			
		||||
			   !dqm->dev->use_iommu_v2) {
 | 
			
		||||
			qpd->sh_mem_config |=
 | 
			
		||||
				1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		qpd->sh_mem_ape1_limit = 0;
 | 
			
		||||
		qpd->sh_mem_ape1_base = 0;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -405,6 +405,10 @@ int kfd_init_apertures(struct kfd_process *process)
 | 
			
		|||
			case CHIP_POLARIS12:
 | 
			
		||||
			case CHIP_VEGAM:
 | 
			
		||||
				kfd_init_apertures_vi(pdd, id);
 | 
			
		||||
				/* VI GPUs cannot support SVM with only
 | 
			
		||||
				 * 40 bits of virtual address space.
 | 
			
		||||
				 */
 | 
			
		||||
				process->svm_disabled = true;
 | 
			
		||||
				break;
 | 
			
		||||
			case CHIP_VEGA10:
 | 
			
		||||
			case CHIP_VEGA12:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -25,7 +25,6 @@
 | 
			
		|||
#include "soc15_int.h"
 | 
			
		||||
#include "kfd_device_queue_manager.h"
 | 
			
		||||
#include "kfd_smi_events.h"
 | 
			
		||||
#include "amdgpu.h"
 | 
			
		||||
 | 
			
		||||
enum SQ_INTERRUPT_WORD_ENCODING {
 | 
			
		||||
	SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										937
									
								
								drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										937
									
								
								drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,937 @@
 | 
			
		|||
// SPDX-License-Identifier: GPL-2.0 OR MIT
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020-2021 Advanced Micro Devices, Inc.
 | 
			
		||||
 *
 | 
			
		||||
 * Permission is hereby granted, free of charge, to any person obtaining a
 | 
			
		||||
 * copy of this software and associated documentation files (the "Software"),
 | 
			
		||||
 * to deal in the Software without restriction, including without limitation
 | 
			
		||||
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 | 
			
		||||
 * and/or sell copies of the Software, and to permit persons to whom the
 | 
			
		||||
 * Software is furnished to do so, subject to the following conditions:
 | 
			
		||||
 *
 | 
			
		||||
 * The above copyright notice and this permission notice shall be included in
 | 
			
		||||
 * all copies or substantial portions of the Software.
 | 
			
		||||
 *
 | 
			
		||||
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
			
		||||
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
			
		||||
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 | 
			
		||||
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 | 
			
		||||
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 | 
			
		||||
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 | 
			
		||||
 * OTHER DEALINGS IN THE SOFTWARE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <linux/types.h>
 | 
			
		||||
#include <linux/hmm.h>
 | 
			
		||||
#include <linux/dma-direction.h>
 | 
			
		||||
#include <linux/dma-mapping.h>
 | 
			
		||||
#include "amdgpu_sync.h"
 | 
			
		||||
#include "amdgpu_object.h"
 | 
			
		||||
#include "amdgpu_vm.h"
 | 
			
		||||
#include "amdgpu_mn.h"
 | 
			
		||||
#include "kfd_priv.h"
 | 
			
		||||
#include "kfd_svm.h"
 | 
			
		||||
#include "kfd_migrate.h"
 | 
			
		||||
 | 
			
		||||
static uint64_t
 | 
			
		||||
svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
 | 
			
		||||
{
 | 
			
		||||
	return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
 | 
			
		||||
		     dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = ring->adev;
 | 
			
		||||
	struct amdgpu_job *job;
 | 
			
		||||
	unsigned int num_dw, num_bytes;
 | 
			
		||||
	struct dma_fence *fence;
 | 
			
		||||
	uint64_t src_addr, dst_addr;
 | 
			
		||||
	uint64_t pte_flags;
 | 
			
		||||
	void *cpu_addr;
 | 
			
		||||
	int r;
 | 
			
		||||
 | 
			
		||||
	/* use gart window 0 */
 | 
			
		||||
	*gart_addr = adev->gmc.gart_start;
 | 
			
		||||
 | 
			
		||||
	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
 | 
			
		||||
	num_bytes = npages * 8;
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
 | 
			
		||||
				     AMDGPU_IB_POOL_DELAYED, &job);
 | 
			
		||||
	if (r)
 | 
			
		||||
		return r;
 | 
			
		||||
 | 
			
		||||
	src_addr = num_dw * 4;
 | 
			
		||||
	src_addr += job->ibs[0].gpu_addr;
 | 
			
		||||
 | 
			
		||||
	dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
 | 
			
		||||
	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
 | 
			
		||||
				dst_addr, num_bytes, false);
 | 
			
		||||
 | 
			
		||||
	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 | 
			
		||||
	WARN_ON(job->ibs[0].length_dw > num_dw);
 | 
			
		||||
 | 
			
		||||
	pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
 | 
			
		||||
	pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
 | 
			
		||||
	if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
 | 
			
		||||
		pte_flags |= AMDGPU_PTE_WRITEABLE;
 | 
			
		||||
	pte_flags |= adev->gart.gart_pte_flags;
 | 
			
		||||
 | 
			
		||||
	cpu_addr = &job->ibs[0].ptr[num_dw];
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
 | 
			
		||||
	if (r)
 | 
			
		||||
		goto error_free;
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_job_submit(job, &adev->mman.entity,
 | 
			
		||||
			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
 | 
			
		||||
	if (r)
 | 
			
		||||
		goto error_free;
 | 
			
		||||
 | 
			
		||||
	dma_fence_put(fence);
 | 
			
		||||
 | 
			
		||||
	return r;
 | 
			
		||||
 | 
			
		||||
error_free:
 | 
			
		||||
	amdgpu_job_free(job);
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * svm_migrate_copy_memory_gart - sdma copy data between ram and vram
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu device the sdma ring running
 | 
			
		||||
 * @src: source page address array
 | 
			
		||||
 * @dst: destination page address array
 | 
			
		||||
 * @npages: number of pages to copy
 | 
			
		||||
 * @direction: enum MIGRATION_COPY_DIR
 | 
			
		||||
 * @mfence: output, sdma fence to signal after sdma is done
 | 
			
		||||
 *
 | 
			
		||||
 * ram address uses GART table continuous entries mapping to ram pages,
 | 
			
		||||
 * vram address uses direct mapping of vram pages, which must have npages
 | 
			
		||||
 * number of continuous pages.
 | 
			
		||||
 * GART update and sdma uses same buf copy function ring, sdma is splited to
 | 
			
		||||
 * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
 | 
			
		||||
 * the last sdma finish fence which is returned to check copy memory is done.
 | 
			
		||||
 *
 | 
			
		||||
 * Context: Process context, takes and releases gtt_window_lock
 | 
			
		||||
 *
 | 
			
		||||
 * Return:
 | 
			
		||||
 * 0 - OK, otherwise error code
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
 | 
			
		||||
			     uint64_t *vram, uint64_t npages,
 | 
			
		||||
			     enum MIGRATION_COPY_DIR direction,
 | 
			
		||||
			     struct dma_fence **mfence)
 | 
			
		||||
{
 | 
			
		||||
	const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
 | 
			
		||||
	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 | 
			
		||||
	uint64_t gart_s, gart_d;
 | 
			
		||||
	struct dma_fence *next;
 | 
			
		||||
	uint64_t size;
 | 
			
		||||
	int r;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&adev->mman.gtt_window_lock);
 | 
			
		||||
 | 
			
		||||
	while (npages) {
 | 
			
		||||
		size = min(GTT_MAX_PAGES, npages);
 | 
			
		||||
 | 
			
		||||
		if (direction == FROM_VRAM_TO_RAM) {
 | 
			
		||||
			gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
 | 
			
		||||
			r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
 | 
			
		||||
 | 
			
		||||
		} else if (direction == FROM_RAM_TO_VRAM) {
 | 
			
		||||
			r = svm_migrate_gart_map(ring, size, sys, &gart_s,
 | 
			
		||||
						 KFD_IOCTL_SVM_FLAG_GPU_RO);
 | 
			
		||||
			gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
 | 
			
		||||
		}
 | 
			
		||||
		if (r) {
 | 
			
		||||
			pr_debug("failed %d to create gart mapping\n", r);
 | 
			
		||||
			goto out_unlock;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
 | 
			
		||||
				       NULL, &next, false, true, false);
 | 
			
		||||
		if (r) {
 | 
			
		||||
			pr_debug("failed %d to copy memory\n", r);
 | 
			
		||||
			goto out_unlock;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		dma_fence_put(*mfence);
 | 
			
		||||
		*mfence = next;
 | 
			
		||||
		npages -= size;
 | 
			
		||||
		if (npages) {
 | 
			
		||||
			sys += size;
 | 
			
		||||
			vram += size;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
out_unlock:
 | 
			
		||||
	mutex_unlock(&adev->mman.gtt_window_lock);
 | 
			
		||||
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * svm_migrate_copy_done - wait for memory copy sdma is done
 | 
			
		||||
 *
 | 
			
		||||
 * @adev: amdgpu device the sdma memory copy is executing on
 | 
			
		||||
 * @mfence: migrate fence
 | 
			
		||||
 *
 | 
			
		||||
 * Wait for dma fence is signaled, if the copy ssplit into multiple sdma
 | 
			
		||||
 * operations, this is the last sdma operation fence.
 | 
			
		||||
 *
 | 
			
		||||
 * Context: called after svm_migrate_copy_memory
 | 
			
		||||
 *
 | 
			
		||||
 * Return:
 | 
			
		||||
 * 0		- success
 | 
			
		||||
 * otherwise	- error code from dma fence signal
 | 
			
		||||
 */
 | 
			
		||||
static int
 | 
			
		||||
svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
 | 
			
		||||
{
 | 
			
		||||
	int r = 0;
 | 
			
		||||
 | 
			
		||||
	if (mfence) {
 | 
			
		||||
		r = dma_fence_wait(mfence, false);
 | 
			
		||||
		dma_fence_put(mfence);
 | 
			
		||||
		pr_debug("sdma copy memory fence done\n");
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static uint64_t
 | 
			
		||||
svm_migrate_node_physical_addr(struct amdgpu_device *adev,
 | 
			
		||||
			       struct drm_mm_node **mm_node, uint64_t *offset)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_mm_node *node = *mm_node;
 | 
			
		||||
	uint64_t pos = *offset;
 | 
			
		||||
 | 
			
		||||
	if (node->start == AMDGPU_BO_INVALID_OFFSET) {
 | 
			
		||||
		pr_debug("drm node is not validated\n");
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	pr_debug("vram node start 0x%llx npages 0x%llx\n", node->start,
 | 
			
		||||
		 node->size);
 | 
			
		||||
 | 
			
		||||
	if (pos >= node->size) {
 | 
			
		||||
		do  {
 | 
			
		||||
			pos -= node->size;
 | 
			
		||||
			node++;
 | 
			
		||||
		} while (pos >= node->size);
 | 
			
		||||
 | 
			
		||||
		*mm_node = node;
 | 
			
		||||
		*offset = pos;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return (node->start + pos) << PAGE_SHIFT;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
unsigned long
 | 
			
		||||
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
 | 
			
		||||
{
 | 
			
		||||
	return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
 | 
			
		||||
{
 | 
			
		||||
	struct page *page;
 | 
			
		||||
 | 
			
		||||
	page = pfn_to_page(pfn);
 | 
			
		||||
	page->zone_device_data = prange;
 | 
			
		||||
	get_page(page);
 | 
			
		||||
	lock_page(page);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
 | 
			
		||||
{
 | 
			
		||||
	struct page *page;
 | 
			
		||||
 | 
			
		||||
	page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
 | 
			
		||||
	unlock_page(page);
 | 
			
		||||
	put_page(page);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static unsigned long
 | 
			
		||||
svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long addr;
 | 
			
		||||
 | 
			
		||||
	addr = page_to_pfn(page) << PAGE_SHIFT;
 | 
			
		||||
	return (addr - adev->kfd.dev->pgmap.range.start);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct page *
 | 
			
		||||
svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
 | 
			
		||||
{
 | 
			
		||||
	struct page *page;
 | 
			
		||||
 | 
			
		||||
	page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
 | 
			
		||||
	if (page)
 | 
			
		||||
		lock_page(page);
 | 
			
		||||
 | 
			
		||||
	return page;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void svm_migrate_put_sys_page(unsigned long addr)
 | 
			
		||||
{
 | 
			
		||||
	struct page *page;
 | 
			
		||||
 | 
			
		||||
	page = pfn_to_page(addr >> PAGE_SHIFT);
 | 
			
		||||
	unlock_page(page);
 | 
			
		||||
	put_page(page);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 | 
			
		||||
			 struct migrate_vma *migrate, struct dma_fence **mfence,
 | 
			
		||||
			 dma_addr_t *scratch)
 | 
			
		||||
{
 | 
			
		||||
	uint64_t npages = migrate->cpages;
 | 
			
		||||
	struct device *dev = adev->dev;
 | 
			
		||||
	struct drm_mm_node *node;
 | 
			
		||||
	dma_addr_t *src;
 | 
			
		||||
	uint64_t *dst;
 | 
			
		||||
	uint64_t vram_addr;
 | 
			
		||||
	uint64_t offset;
 | 
			
		||||
	uint64_t i, j;
 | 
			
		||||
	int r;
 | 
			
		||||
 | 
			
		||||
	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
 | 
			
		||||
		 prange->last);
 | 
			
		||||
 | 
			
		||||
	src = scratch;
 | 
			
		||||
	dst = (uint64_t *)(scratch + npages);
 | 
			
		||||
 | 
			
		||||
	r = svm_range_vram_node_new(adev, prange, true);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		pr_debug("failed %d get 0x%llx pages from vram\n", r, npages);
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	node = prange->ttm_res->mm_node;
 | 
			
		||||
	offset = prange->offset;
 | 
			
		||||
	vram_addr = svm_migrate_node_physical_addr(adev, &node, &offset);
 | 
			
		||||
	if (!vram_addr) {
 | 
			
		||||
		WARN_ONCE(1, "vram node address is 0\n");
 | 
			
		||||
		r = -ENOMEM;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for (i = j = 0; i < npages; i++) {
 | 
			
		||||
		struct page *spage;
 | 
			
		||||
 | 
			
		||||
		dst[i] = vram_addr + (j << PAGE_SHIFT);
 | 
			
		||||
		migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
 | 
			
		||||
		svm_migrate_get_vram_page(prange, migrate->dst[i]);
 | 
			
		||||
 | 
			
		||||
		migrate->dst[i] = migrate_pfn(migrate->dst[i]);
 | 
			
		||||
		migrate->dst[i] |= MIGRATE_PFN_LOCKED;
 | 
			
		||||
 | 
			
		||||
		if (migrate->src[i] & MIGRATE_PFN_VALID) {
 | 
			
		||||
			spage = migrate_pfn_to_page(migrate->src[i]);
 | 
			
		||||
			src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
 | 
			
		||||
					      DMA_TO_DEVICE);
 | 
			
		||||
			r = dma_mapping_error(dev, src[i]);
 | 
			
		||||
			if (r) {
 | 
			
		||||
				pr_debug("failed %d dma_map_page\n", r);
 | 
			
		||||
				goto out_free_vram_pages;
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			if (j) {
 | 
			
		||||
				r = svm_migrate_copy_memory_gart(
 | 
			
		||||
						adev, src + i - j,
 | 
			
		||||
						dst + i - j, j,
 | 
			
		||||
						FROM_RAM_TO_VRAM,
 | 
			
		||||
						mfence);
 | 
			
		||||
				if (r)
 | 
			
		||||
					goto out_free_vram_pages;
 | 
			
		||||
				offset += j;
 | 
			
		||||
				vram_addr = (node->start + offset) << PAGE_SHIFT;
 | 
			
		||||
				j = 0;
 | 
			
		||||
			} else {
 | 
			
		||||
				offset++;
 | 
			
		||||
				vram_addr += PAGE_SIZE;
 | 
			
		||||
			}
 | 
			
		||||
			if (offset >= node->size) {
 | 
			
		||||
				node++;
 | 
			
		||||
				pr_debug("next node size 0x%llx\n", node->size);
 | 
			
		||||
				vram_addr = node->start << PAGE_SHIFT;
 | 
			
		||||
				offset = 0;
 | 
			
		||||
			}
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n",
 | 
			
		||||
			 src[i] >> PAGE_SHIFT, page_to_pfn(spage));
 | 
			
		||||
 | 
			
		||||
		if (j + offset >= node->size - 1 && i < npages - 1) {
 | 
			
		||||
			r = svm_migrate_copy_memory_gart(adev, src + i - j,
 | 
			
		||||
							 dst + i - j, j + 1,
 | 
			
		||||
							 FROM_RAM_TO_VRAM,
 | 
			
		||||
							 mfence);
 | 
			
		||||
			if (r)
 | 
			
		||||
				goto out_free_vram_pages;
 | 
			
		||||
 | 
			
		||||
			node++;
 | 
			
		||||
			pr_debug("next node size 0x%llx\n", node->size);
 | 
			
		||||
			vram_addr = node->start << PAGE_SHIFT;
 | 
			
		||||
			offset = 0;
 | 
			
		||||
			j = 0;
 | 
			
		||||
		} else {
 | 
			
		||||
			j++;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
 | 
			
		||||
					 FROM_RAM_TO_VRAM, mfence);
 | 
			
		||||
 | 
			
		||||
out_free_vram_pages:
 | 
			
		||||
	if (r) {
 | 
			
		||||
		pr_debug("failed %d to copy memory to vram\n", r);
 | 
			
		||||
		while (i--) {
 | 
			
		||||
			svm_migrate_put_vram_page(adev, dst[i]);
 | 
			
		||||
			migrate->dst[i] = 0;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 | 
			
		||||
			struct vm_area_struct *vma, uint64_t start,
 | 
			
		||||
			uint64_t end)
 | 
			
		||||
{
 | 
			
		||||
	uint64_t npages = (end - start) >> PAGE_SHIFT;
 | 
			
		||||
	struct dma_fence *mfence = NULL;
 | 
			
		||||
	struct migrate_vma migrate;
 | 
			
		||||
	dma_addr_t *scratch;
 | 
			
		||||
	size_t size;
 | 
			
		||||
	void *buf;
 | 
			
		||||
	int r = -ENOMEM;
 | 
			
		||||
	int retry = 0;
 | 
			
		||||
 | 
			
		||||
	memset(&migrate, 0, sizeof(migrate));
 | 
			
		||||
	migrate.vma = vma;
 | 
			
		||||
	migrate.start = start;
 | 
			
		||||
	migrate.end = end;
 | 
			
		||||
	migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
 | 
			
		||||
	migrate.pgmap_owner = adev;
 | 
			
		||||
 | 
			
		||||
	size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
 | 
			
		||||
	size *= npages;
 | 
			
		||||
	buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
 | 
			
		||||
	if (!buf)
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
	migrate.src = buf;
 | 
			
		||||
	migrate.dst = migrate.src + npages;
 | 
			
		||||
	scratch = (dma_addr_t *)(migrate.dst + npages);
 | 
			
		||||
 | 
			
		||||
retry:
 | 
			
		||||
	r = migrate_vma_setup(&migrate);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
 | 
			
		||||
			 r, prange->svms, prange->start, prange->last);
 | 
			
		||||
		goto out_free;
 | 
			
		||||
	}
 | 
			
		||||
	if (migrate.cpages != npages) {
 | 
			
		||||
		pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages,
 | 
			
		||||
			 npages);
 | 
			
		||||
		migrate_vma_finalize(&migrate);
 | 
			
		||||
		if (retry++ >= 3) {
 | 
			
		||||
			r = -ENOMEM;
 | 
			
		||||
			pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n",
 | 
			
		||||
				 r, prange->svms, prange->start, prange->last);
 | 
			
		||||
			goto out_free;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		goto retry;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (migrate.cpages) {
 | 
			
		||||
		r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence,
 | 
			
		||||
					     scratch);
 | 
			
		||||
		migrate_vma_pages(&migrate);
 | 
			
		||||
		svm_migrate_copy_done(adev, mfence);
 | 
			
		||||
		migrate_vma_finalize(&migrate);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	svm_range_dma_unmap(adev->dev, scratch, 0, npages);
 | 
			
		||||
	svm_range_free_dma_mappings(prange);
 | 
			
		||||
 | 
			
		||||
out_free:
 | 
			
		||||
	kvfree(buf);
 | 
			
		||||
out:
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * svm_migrate_ram_to_vram - migrate svm range from system to device
 | 
			
		||||
 * @prange: range structure
 | 
			
		||||
 * @best_loc: the device to migrate to
 | 
			
		||||
 * @mm: the process mm structure
 | 
			
		||||
 *
 | 
			
		||||
 * Context: Process context, caller hold mmap read lock, svms lock, prange lock
 | 
			
		||||
 *
 | 
			
		||||
 * Return:
 | 
			
		||||
 * 0 - OK, otherwise error code
 | 
			
		||||
 */
 | 
			
		||||
static int
 | 
			
		||||
svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 | 
			
		||||
			struct mm_struct *mm)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long addr, start, end;
 | 
			
		||||
	struct vm_area_struct *vma;
 | 
			
		||||
	struct amdgpu_device *adev;
 | 
			
		||||
	int r = 0;
 | 
			
		||||
 | 
			
		||||
	if (prange->actual_loc == best_loc) {
 | 
			
		||||
		pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
 | 
			
		||||
			 prange->svms, prange->start, prange->last, best_loc);
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	adev = svm_range_get_adev_by_id(prange, best_loc);
 | 
			
		||||
	if (!adev) {
 | 
			
		||||
		pr_debug("failed to get device by id 0x%x\n", best_loc);
 | 
			
		||||
		return -ENODEV;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
 | 
			
		||||
		 prange->start, prange->last, best_loc);
 | 
			
		||||
 | 
			
		||||
	/* FIXME: workaround for page locking bug with invalid pages */
 | 
			
		||||
	svm_range_prefault(prange, mm);
 | 
			
		||||
 | 
			
		||||
	start = prange->start << PAGE_SHIFT;
 | 
			
		||||
	end = (prange->last + 1) << PAGE_SHIFT;
 | 
			
		||||
 | 
			
		||||
	for (addr = start; addr < end;) {
 | 
			
		||||
		unsigned long next;
 | 
			
		||||
 | 
			
		||||
		vma = find_vma(mm, addr);
 | 
			
		||||
		if (!vma || addr < vma->vm_start)
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		next = min(vma->vm_end, end);
 | 
			
		||||
		r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
 | 
			
		||||
		if (r) {
 | 
			
		||||
			pr_debug("failed to migrate\n");
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
		addr = next;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!r)
 | 
			
		||||
		prange->actual_loc = best_loc;
 | 
			
		||||
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void svm_migrate_page_free(struct page *page)
 | 
			
		||||
{
 | 
			
		||||
	/* Keep this function to avoid warning */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 | 
			
		||||
			struct migrate_vma *migrate, struct dma_fence **mfence,
 | 
			
		||||
			dma_addr_t *scratch)
 | 
			
		||||
{
 | 
			
		||||
	uint64_t npages = migrate->cpages;
 | 
			
		||||
	struct device *dev = adev->dev;
 | 
			
		||||
	uint64_t *src;
 | 
			
		||||
	dma_addr_t *dst;
 | 
			
		||||
	struct page *dpage;
 | 
			
		||||
	uint64_t i = 0, j;
 | 
			
		||||
	uint64_t addr;
 | 
			
		||||
	int r = 0;
 | 
			
		||||
 | 
			
		||||
	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
 | 
			
		||||
		 prange->last);
 | 
			
		||||
 | 
			
		||||
	addr = prange->start << PAGE_SHIFT;
 | 
			
		||||
 | 
			
		||||
	src = (uint64_t *)(scratch + npages);
 | 
			
		||||
	dst = scratch;
 | 
			
		||||
 | 
			
		||||
	for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) {
 | 
			
		||||
		struct page *spage;
 | 
			
		||||
 | 
			
		||||
		spage = migrate_pfn_to_page(migrate->src[i]);
 | 
			
		||||
		if (!spage) {
 | 
			
		||||
			pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n",
 | 
			
		||||
				 prange->svms, prange->start, prange->last);
 | 
			
		||||
			r = -ENOMEM;
 | 
			
		||||
			goto out_oom;
 | 
			
		||||
		}
 | 
			
		||||
		src[i] = svm_migrate_addr(adev, spage);
 | 
			
		||||
		if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
 | 
			
		||||
			r = svm_migrate_copy_memory_gart(adev, dst + i - j,
 | 
			
		||||
							 src + i - j, j,
 | 
			
		||||
							 FROM_VRAM_TO_RAM,
 | 
			
		||||
							 mfence);
 | 
			
		||||
			if (r)
 | 
			
		||||
				goto out_oom;
 | 
			
		||||
			j = 0;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		dpage = svm_migrate_get_sys_page(migrate->vma, addr);
 | 
			
		||||
		if (!dpage) {
 | 
			
		||||
			pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
 | 
			
		||||
				 prange->svms, prange->start, prange->last);
 | 
			
		||||
			r = -ENOMEM;
 | 
			
		||||
			goto out_oom;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
 | 
			
		||||
		r = dma_mapping_error(dev, dst[i]);
 | 
			
		||||
		if (r) {
 | 
			
		||||
			pr_debug("failed %d dma_map_page\n", r);
 | 
			
		||||
			goto out_oom;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n",
 | 
			
		||||
			      dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
 | 
			
		||||
 | 
			
		||||
		migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
 | 
			
		||||
		migrate->dst[i] |= MIGRATE_PFN_LOCKED;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
 | 
			
		||||
					 FROM_VRAM_TO_RAM, mfence);
 | 
			
		||||
 | 
			
		||||
out_oom:
 | 
			
		||||
	if (r) {
 | 
			
		||||
		pr_debug("failed %d copy to ram\n", r);
 | 
			
		||||
		while (i--) {
 | 
			
		||||
			svm_migrate_put_sys_page(dst[i]);
 | 
			
		||||
			migrate->dst[i] = 0;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 | 
			
		||||
		       struct vm_area_struct *vma, uint64_t start, uint64_t end)
 | 
			
		||||
{
 | 
			
		||||
	uint64_t npages = (end - start) >> PAGE_SHIFT;
 | 
			
		||||
	struct dma_fence *mfence = NULL;
 | 
			
		||||
	struct migrate_vma migrate;
 | 
			
		||||
	dma_addr_t *scratch;
 | 
			
		||||
	size_t size;
 | 
			
		||||
	void *buf;
 | 
			
		||||
	int r = -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	memset(&migrate, 0, sizeof(migrate));
 | 
			
		||||
	migrate.vma = vma;
 | 
			
		||||
	migrate.start = start;
 | 
			
		||||
	migrate.end = end;
 | 
			
		||||
	migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
 | 
			
		||||
	migrate.pgmap_owner = adev;
 | 
			
		||||
 | 
			
		||||
	size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
 | 
			
		||||
	size *= npages;
 | 
			
		||||
	buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
 | 
			
		||||
	if (!buf)
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
	migrate.src = buf;
 | 
			
		||||
	migrate.dst = migrate.src + npages;
 | 
			
		||||
	scratch = (dma_addr_t *)(migrate.dst + npages);
 | 
			
		||||
 | 
			
		||||
	r = migrate_vma_setup(&migrate);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
 | 
			
		||||
			 r, prange->svms, prange->start, prange->last);
 | 
			
		||||
		goto out_free;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	pr_debug("cpages %ld\n", migrate.cpages);
 | 
			
		||||
 | 
			
		||||
	if (migrate.cpages) {
 | 
			
		||||
		r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
 | 
			
		||||
					    scratch);
 | 
			
		||||
		migrate_vma_pages(&migrate);
 | 
			
		||||
		svm_migrate_copy_done(adev, mfence);
 | 
			
		||||
		migrate_vma_finalize(&migrate);
 | 
			
		||||
	} else {
 | 
			
		||||
		pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
 | 
			
		||||
			 prange->start, prange->last);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	svm_range_dma_unmap(adev->dev, scratch, 0, npages);
 | 
			
		||||
 | 
			
		||||
out_free:
 | 
			
		||||
	kvfree(buf);
 | 
			
		||||
out:
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * svm_migrate_vram_to_ram - migrate svm range from device to system
 | 
			
		||||
 * @prange: range structure
 | 
			
		||||
 * @mm: process mm, use current->mm if NULL
 | 
			
		||||
 *
 | 
			
		||||
 * Context: Process context, caller hold mmap read lock, svms lock, prange lock
 | 
			
		||||
 *
 | 
			
		||||
 * Return:
 | 
			
		||||
 * 0 - OK, otherwise error code
 | 
			
		||||
 */
 | 
			
		||||
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev;
 | 
			
		||||
	struct vm_area_struct *vma;
 | 
			
		||||
	unsigned long addr;
 | 
			
		||||
	unsigned long start;
 | 
			
		||||
	unsigned long end;
 | 
			
		||||
	int r = 0;
 | 
			
		||||
 | 
			
		||||
	if (!prange->actual_loc) {
 | 
			
		||||
		pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
 | 
			
		||||
			 prange->start, prange->last);
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
 | 
			
		||||
	if (!adev) {
 | 
			
		||||
		pr_debug("failed to get device by id 0x%x\n",
 | 
			
		||||
			 prange->actual_loc);
 | 
			
		||||
		return -ENODEV;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
 | 
			
		||||
		 prange->svms, prange, prange->start, prange->last,
 | 
			
		||||
		 prange->actual_loc);
 | 
			
		||||
 | 
			
		||||
	start = prange->start << PAGE_SHIFT;
 | 
			
		||||
	end = (prange->last + 1) << PAGE_SHIFT;
 | 
			
		||||
 | 
			
		||||
	for (addr = start; addr < end;) {
 | 
			
		||||
		unsigned long next;
 | 
			
		||||
 | 
			
		||||
		vma = find_vma(mm, addr);
 | 
			
		||||
		if (!vma || addr < vma->vm_start)
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		next = min(vma->vm_end, end);
 | 
			
		||||
		r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
 | 
			
		||||
		if (r) {
 | 
			
		||||
			pr_debug("failed %d to migrate\n", r);
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
		addr = next;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!r) {
 | 
			
		||||
		svm_range_vram_node_free(prange);
 | 
			
		||||
		prange->actual_loc = 0;
 | 
			
		||||
	}
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * svm_migrate_vram_to_vram - migrate svm range from device to device
 | 
			
		||||
 * @prange: range structure
 | 
			
		||||
 * @best_loc: the device to migrate to
 | 
			
		||||
 * @mm: process mm, use current->mm if NULL
 | 
			
		||||
 *
 | 
			
		||||
 * Context: Process context, caller hold mmap read lock, svms lock, prange lock
 | 
			
		||||
 *
 | 
			
		||||
 * Return:
 | 
			
		||||
 * 0 - OK, otherwise error code
 | 
			
		||||
 */
 | 
			
		||||
static int
 | 
			
		||||
svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
 | 
			
		||||
			 struct mm_struct *mm)
 | 
			
		||||
{
 | 
			
		||||
	int r;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * TODO: for both devices with PCIe large bar or on same xgmi hive, skip
 | 
			
		||||
	 * system memory as migration bridge
 | 
			
		||||
	 */
 | 
			
		||||
 | 
			
		||||
	pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
 | 
			
		||||
 | 
			
		||||
	r = svm_migrate_vram_to_ram(prange, mm);
 | 
			
		||||
	if (r)
 | 
			
		||||
		return r;
 | 
			
		||||
 | 
			
		||||
	return svm_migrate_ram_to_vram(prange, best_loc, mm);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int
 | 
			
		||||
svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
 | 
			
		||||
		    struct mm_struct *mm)
 | 
			
		||||
{
 | 
			
		||||
	if  (!prange->actual_loc)
 | 
			
		||||
		return svm_migrate_ram_to_vram(prange, best_loc, mm);
 | 
			
		||||
	else
 | 
			
		||||
		return svm_migrate_vram_to_vram(prange, best_loc, mm);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * svm_migrate_to_ram - CPU page fault handler
 | 
			
		||||
 * @vmf: CPU vm fault vma, address
 | 
			
		||||
 *
 | 
			
		||||
 * Context: vm fault handler, caller holds the mmap read lock
 | 
			
		||||
 *
 | 
			
		||||
 * Return:
 | 
			
		||||
 * 0 - OK
 | 
			
		||||
 * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
 | 
			
		||||
 */
 | 
			
		||||
static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long addr = vmf->address;
 | 
			
		||||
	struct vm_area_struct *vma;
 | 
			
		||||
	enum svm_work_list_ops op;
 | 
			
		||||
	struct svm_range *parent;
 | 
			
		||||
	struct svm_range *prange;
 | 
			
		||||
	struct kfd_process *p;
 | 
			
		||||
	struct mm_struct *mm;
 | 
			
		||||
	int r = 0;
 | 
			
		||||
 | 
			
		||||
	vma = vmf->vma;
 | 
			
		||||
	mm = vma->vm_mm;
 | 
			
		||||
 | 
			
		||||
	p = kfd_lookup_process_by_mm(vma->vm_mm);
 | 
			
		||||
	if (!p) {
 | 
			
		||||
		pr_debug("failed find process at fault address 0x%lx\n", addr);
 | 
			
		||||
		return VM_FAULT_SIGBUS;
 | 
			
		||||
	}
 | 
			
		||||
	addr >>= PAGE_SHIFT;
 | 
			
		||||
	pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&p->svms.lock);
 | 
			
		||||
 | 
			
		||||
	prange = svm_range_from_addr(&p->svms, addr, &parent);
 | 
			
		||||
	if (!prange) {
 | 
			
		||||
		pr_debug("cannot find svm range at 0x%lx\n", addr);
 | 
			
		||||
		r = -EFAULT;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&parent->migrate_mutex);
 | 
			
		||||
	if (prange != parent)
 | 
			
		||||
		mutex_lock_nested(&prange->migrate_mutex, 1);
 | 
			
		||||
 | 
			
		||||
	if (!prange->actual_loc)
 | 
			
		||||
		goto out_unlock_prange;
 | 
			
		||||
 | 
			
		||||
	svm_range_lock(parent);
 | 
			
		||||
	if (prange != parent)
 | 
			
		||||
		mutex_lock_nested(&prange->lock, 1);
 | 
			
		||||
	r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
 | 
			
		||||
	if (prange != parent)
 | 
			
		||||
		mutex_unlock(&prange->lock);
 | 
			
		||||
	svm_range_unlock(parent);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		pr_debug("failed %d to split range by granularity\n", r);
 | 
			
		||||
		goto out_unlock_prange;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	r = svm_migrate_vram_to_ram(prange, mm);
 | 
			
		||||
	if (r)
 | 
			
		||||
		pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
 | 
			
		||||
			 prange, prange->start, prange->last);
 | 
			
		||||
 | 
			
		||||
	/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
 | 
			
		||||
	if (p->xnack_enabled && parent == prange)
 | 
			
		||||
		op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
 | 
			
		||||
	else
 | 
			
		||||
		op = SVM_OP_UPDATE_RANGE_NOTIFIER;
 | 
			
		||||
	svm_range_add_list_work(&p->svms, parent, mm, op);
 | 
			
		||||
	schedule_deferred_list_work(&p->svms);
 | 
			
		||||
 | 
			
		||||
out_unlock_prange:
 | 
			
		||||
	if (prange != parent)
 | 
			
		||||
		mutex_unlock(&prange->migrate_mutex);
 | 
			
		||||
	mutex_unlock(&parent->migrate_mutex);
 | 
			
		||||
out:
 | 
			
		||||
	mutex_unlock(&p->svms.lock);
 | 
			
		||||
	kfd_unref_process(p);
 | 
			
		||||
 | 
			
		||||
	pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
 | 
			
		||||
 | 
			
		||||
	return r ? VM_FAULT_SIGBUS : 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
 | 
			
		||||
	.page_free		= svm_migrate_page_free,
 | 
			
		||||
	.migrate_to_ram		= svm_migrate_to_ram,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* Each VRAM page uses sizeof(struct page) on system memory */
 | 
			
		||||
#define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
 | 
			
		||||
 | 
			
		||||
int svm_migrate_init(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	struct kfd_dev *kfddev = adev->kfd.dev;
 | 
			
		||||
	struct dev_pagemap *pgmap;
 | 
			
		||||
	struct resource *res;
 | 
			
		||||
	unsigned long size;
 | 
			
		||||
	void *r;
 | 
			
		||||
 | 
			
		||||
	/* Page migration works on Vega10 or newer */
 | 
			
		||||
	if (kfddev->device_info->asic_family < CHIP_VEGA10)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	pgmap = &kfddev->pgmap;
 | 
			
		||||
	memset(pgmap, 0, sizeof(*pgmap));
 | 
			
		||||
 | 
			
		||||
	/* TODO: register all vram to HMM for now.
 | 
			
		||||
	 * should remove reserved size
 | 
			
		||||
	 */
 | 
			
		||||
	size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
 | 
			
		||||
	res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
 | 
			
		||||
	if (IS_ERR(res))
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	pgmap->type = MEMORY_DEVICE_PRIVATE;
 | 
			
		||||
	pgmap->nr_range = 1;
 | 
			
		||||
	pgmap->range.start = res->start;
 | 
			
		||||
	pgmap->range.end = res->end;
 | 
			
		||||
	pgmap->ops = &svm_migrate_pgmap_ops;
 | 
			
		||||
	pgmap->owner = adev;
 | 
			
		||||
	pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
 | 
			
		||||
	r = devm_memremap_pages(adev->dev, pgmap);
 | 
			
		||||
	if (IS_ERR(r)) {
 | 
			
		||||
		pr_err("failed to register HMM device memory\n");
 | 
			
		||||
		devm_release_mem_region(adev->dev, res->start,
 | 
			
		||||
					res->end - res->start + 1);
 | 
			
		||||
		return PTR_ERR(r);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	pr_debug("reserve %ldMB system memory for VRAM pages struct\n",
 | 
			
		||||
		 SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20);
 | 
			
		||||
 | 
			
		||||
	amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
 | 
			
		||||
 | 
			
		||||
	pr_info("HMM registered %ldMB device memory\n", size >> 20);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void svm_migrate_fini(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	struct dev_pagemap *pgmap = &adev->kfd.dev->pgmap;
 | 
			
		||||
 | 
			
		||||
	devm_memunmap_pages(adev->dev, pgmap);
 | 
			
		||||
	devm_release_mem_region(adev->dev, pgmap->range.start,
 | 
			
		||||
				pgmap->range.end - pgmap->range.start + 1);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										65
									
								
								drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,65 @@
 | 
			
		|||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020-2021 Advanced Micro Devices, Inc.
 | 
			
		||||
 *
 | 
			
		||||
 * Permission is hereby granted, free of charge, to any person obtaining a
 | 
			
		||||
 * copy of this software and associated documentation files (the "Software"),
 | 
			
		||||
 * to deal in the Software without restriction, including without limitation
 | 
			
		||||
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 | 
			
		||||
 * and/or sell copies of the Software, and to permit persons to whom the
 | 
			
		||||
 * Software is furnished to do so, subject to the following conditions:
 | 
			
		||||
 *
 | 
			
		||||
 * The above copyright notice and this permission notice shall be included in
 | 
			
		||||
 * all copies or substantial portions of the Software.
 | 
			
		||||
 *
 | 
			
		||||
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
			
		||||
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
			
		||||
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 | 
			
		||||
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 | 
			
		||||
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 | 
			
		||||
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 | 
			
		||||
 * OTHER DEALINGS IN THE SOFTWARE.
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef KFD_MIGRATE_H_
 | 
			
		||||
#define KFD_MIGRATE_H_
 | 
			
		||||
 | 
			
		||||
#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
 | 
			
		||||
 | 
			
		||||
#include <linux/rwsem.h>
 | 
			
		||||
#include <linux/list.h>
 | 
			
		||||
#include <linux/mutex.h>
 | 
			
		||||
#include <linux/sched/mm.h>
 | 
			
		||||
#include <linux/hmm.h>
 | 
			
		||||
#include "kfd_priv.h"
 | 
			
		||||
#include "kfd_svm.h"
 | 
			
		||||
 | 
			
		||||
enum MIGRATION_COPY_DIR {
 | 
			
		||||
	FROM_RAM_TO_VRAM = 0,
 | 
			
		||||
	FROM_VRAM_TO_RAM
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
int svm_migrate_to_vram(struct svm_range *prange,  uint32_t best_loc,
 | 
			
		||||
			struct mm_struct *mm);
 | 
			
		||||
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
 | 
			
		||||
unsigned long
 | 
			
		||||
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
 | 
			
		||||
 | 
			
		||||
int svm_migrate_init(struct amdgpu_device *adev);
 | 
			
		||||
void svm_migrate_fini(struct amdgpu_device *adev);
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
static inline int svm_migrate_init(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
static inline void svm_migrate_fini(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	/* empty */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
 | 
			
		||||
 | 
			
		||||
#endif /* KFD_MIGRATE_H_ */
 | 
			
		||||
| 
						 | 
				
			
			@ -124,14 +124,14 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 | 
			
		|||
{
 | 
			
		||||
	unsigned int alloc_size_bytes;
 | 
			
		||||
	unsigned int *rl_buffer, rl_wptr, i;
 | 
			
		||||
	int retval, proccesses_mapped;
 | 
			
		||||
	int retval, processes_mapped;
 | 
			
		||||
	struct device_process_node *cur;
 | 
			
		||||
	struct qcm_process_device *qpd;
 | 
			
		||||
	struct queue *q;
 | 
			
		||||
	struct kernel_queue *kq;
 | 
			
		||||
	bool is_over_subscription;
 | 
			
		||||
 | 
			
		||||
	rl_wptr = retval = proccesses_mapped = 0;
 | 
			
		||||
	rl_wptr = retval = processes_mapped = 0;
 | 
			
		||||
 | 
			
		||||
	retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
 | 
			
		||||
				&alloc_size_bytes, &is_over_subscription);
 | 
			
		||||
| 
						 | 
				
			
			@ -148,7 +148,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 | 
			
		|||
	list_for_each_entry(cur, queues, list) {
 | 
			
		||||
		qpd = cur->qpd;
 | 
			
		||||
		/* build map process packet */
 | 
			
		||||
		if (proccesses_mapped >= pm->dqm->processes_count) {
 | 
			
		||||
		if (processes_mapped >= pm->dqm->processes_count) {
 | 
			
		||||
			pr_debug("Not enough space left in runlist IB\n");
 | 
			
		||||
			pm_release_ib(pm);
 | 
			
		||||
			return -ENOMEM;
 | 
			
		||||
| 
						 | 
				
			
			@ -158,7 +158,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 | 
			
		|||
		if (retval)
 | 
			
		||||
			return retval;
 | 
			
		||||
 | 
			
		||||
		proccesses_mapped++;
 | 
			
		||||
		processes_mapped++;
 | 
			
		||||
		inc_wptr(&rl_wptr, pm->pmf->map_process_size,
 | 
			
		||||
				alloc_size_bytes);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -242,7 +242,6 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
 | 
			
		|||
	case CHIP_RAVEN:
 | 
			
		||||
	case CHIP_RENOIR:
 | 
			
		||||
	case CHIP_ARCTURUS:
 | 
			
		||||
	case CHIP_ALDEBARAN:
 | 
			
		||||
	case CHIP_NAVI10:
 | 
			
		||||
	case CHIP_NAVI12:
 | 
			
		||||
	case CHIP_NAVI14:
 | 
			
		||||
| 
						 | 
				
			
			@ -252,6 +251,9 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
 | 
			
		|||
	case CHIP_DIMGREY_CAVEFISH:
 | 
			
		||||
		pm->pmf = &kfd_v9_pm_funcs;
 | 
			
		||||
		break;
 | 
			
		||||
	case CHIP_ALDEBARAN:
 | 
			
		||||
		pm->pmf = &kfd_aldebaran_pm_funcs;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		WARN(1, "Unexpected ASIC family %u",
 | 
			
		||||
		     dqm->dev->device_info->asic_family);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -24,6 +24,7 @@
 | 
			
		|||
#include "kfd_kernel_queue.h"
 | 
			
		||||
#include "kfd_device_queue_manager.h"
 | 
			
		||||
#include "kfd_pm4_headers_ai.h"
 | 
			
		||||
#include "kfd_pm4_headers_aldebaran.h"
 | 
			
		||||
#include "kfd_pm4_opcodes.h"
 | 
			
		||||
#include "gc/gc_10_1_0_sh_mask.h"
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -35,7 +36,6 @@ static int pm_map_process_v9(struct packet_manager *pm,
 | 
			
		|||
 | 
			
		||||
	packet = (struct pm4_mes_map_process *)buffer;
 | 
			
		||||
	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
 | 
			
		||||
 | 
			
		||||
	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
 | 
			
		||||
					sizeof(struct pm4_mes_map_process));
 | 
			
		||||
	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -73,6 +73,45 @@ static int pm_map_process_v9(struct packet_manager *pm,
 | 
			
		|||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int pm_map_process_aldebaran(struct packet_manager *pm,
 | 
			
		||||
		uint32_t *buffer, struct qcm_process_device *qpd)
 | 
			
		||||
{
 | 
			
		||||
	struct pm4_mes_map_process_aldebaran *packet;
 | 
			
		||||
	uint64_t vm_page_table_base_addr = qpd->page_table_base;
 | 
			
		||||
 | 
			
		||||
	packet = (struct pm4_mes_map_process_aldebaran *)buffer;
 | 
			
		||||
	memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran));
 | 
			
		||||
	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
 | 
			
		||||
			sizeof(struct pm4_mes_map_process_aldebaran));
 | 
			
		||||
	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
 | 
			
		||||
	packet->bitfields2.process_quantum = 10;
 | 
			
		||||
	packet->bitfields2.pasid = qpd->pqm->process->pasid;
 | 
			
		||||
	packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
 | 
			
		||||
	packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
 | 
			
		||||
	packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
 | 
			
		||||
	packet->bitfields14.num_oac = qpd->num_oac;
 | 
			
		||||
	packet->bitfields14.sdma_enable = 1;
 | 
			
		||||
	packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
 | 
			
		||||
 | 
			
		||||
	packet->sh_mem_config = qpd->sh_mem_config;
 | 
			
		||||
	packet->sh_mem_bases = qpd->sh_mem_bases;
 | 
			
		||||
	if (qpd->tba_addr) {
 | 
			
		||||
		packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
 | 
			
		||||
		packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
 | 
			
		||||
		packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
 | 
			
		||||
	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
 | 
			
		||||
 | 
			
		||||
	packet->vm_context_page_table_base_addr_lo32 =
 | 
			
		||||
			lower_32_bits(vm_page_table_base_addr);
 | 
			
		||||
	packet->vm_context_page_table_base_addr_hi32 =
 | 
			
		||||
			upper_32_bits(vm_page_table_base_addr);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
 | 
			
		||||
			uint64_t ib, size_t ib_size_in_dwords, bool chain)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -324,3 +363,20 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
 | 
			
		|||
	.query_status_size	= sizeof(struct pm4_mes_query_status),
 | 
			
		||||
	.release_mem_size	= 0,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
 | 
			
		||||
	.map_process		= pm_map_process_aldebaran,
 | 
			
		||||
	.runlist		= pm_runlist_v9,
 | 
			
		||||
	.set_resources		= pm_set_resources_v9,
 | 
			
		||||
	.map_queues		= pm_map_queues_v9,
 | 
			
		||||
	.unmap_queues		= pm_unmap_queues_v9,
 | 
			
		||||
	.query_status		= pm_query_status_v9,
 | 
			
		||||
	.release_mem		= NULL,
 | 
			
		||||
	.map_process_size	= sizeof(struct pm4_mes_map_process_aldebaran),
 | 
			
		||||
	.runlist_size		= sizeof(struct pm4_mes_runlist),
 | 
			
		||||
	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
 | 
			
		||||
	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
 | 
			
		||||
	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
 | 
			
		||||
	.query_status_size	= sizeof(struct pm4_mes_query_status),
 | 
			
		||||
	.release_mem_size	= 0,
 | 
			
		||||
};
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										93
									
								
								drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,93 @@
 | 
			
		|||
/*
 | 
			
		||||
 * Copyright 2020 Advanced Micro Devices, Inc.
 | 
			
		||||
 *
 | 
			
		||||
 * Permission is hereby granted, free of charge, to any person obtaining a
 | 
			
		||||
 * copy of this software and associated documentation files (the "Software"),
 | 
			
		||||
 * to deal in the Software without restriction, including without limitation
 | 
			
		||||
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 | 
			
		||||
 * and/or sell copies of the Software, and to permit persons to whom the
 | 
			
		||||
 * Software is furnished to do so, subject to the following conditions:
 | 
			
		||||
 *
 | 
			
		||||
 * The above copyright notice and this permission notice shall be included in
 | 
			
		||||
 * all copies or substantial portions of the Software.
 | 
			
		||||
 *
 | 
			
		||||
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
			
		||||
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
			
		||||
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 | 
			
		||||
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 | 
			
		||||
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 | 
			
		||||
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 | 
			
		||||
 * OTHER DEALINGS IN THE SOFTWARE.
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/*--------------------MES_MAP_PROCESS (PER DEBUG VMID)--------------------*/
 | 
			
		||||
 | 
			
		||||
#ifndef PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED
 | 
			
		||||
#define PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED
 | 
			
		||||
 | 
			
		||||
struct pm4_mes_map_process_aldebaran {
 | 
			
		||||
	union {
 | 
			
		||||
		union PM4_MES_TYPE_3_HEADER header;	/* header */
 | 
			
		||||
		uint32_t ordinal1;
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	union {
 | 
			
		||||
		struct {
 | 
			
		||||
			uint32_t pasid:16;	    /* 0 - 15  */
 | 
			
		||||
			uint32_t single_memops:1;   /* 16      */
 | 
			
		||||
			uint32_t reserved1:1;	    /* 17      */
 | 
			
		||||
			uint32_t debug_vmid:4;	    /* 18 - 21 */
 | 
			
		||||
			uint32_t new_debug:1;	    /* 22      */
 | 
			
		||||
			uint32_t tmz:1;		    /* 23      */
 | 
			
		||||
			uint32_t diq_enable:1;      /* 24      */
 | 
			
		||||
			uint32_t process_quantum:7; /* 25 - 31 */
 | 
			
		||||
		} bitfields2;
 | 
			
		||||
		uint32_t ordinal2;
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	uint32_t vm_context_page_table_base_addr_lo32;
 | 
			
		||||
 | 
			
		||||
	uint32_t vm_context_page_table_base_addr_hi32;
 | 
			
		||||
 | 
			
		||||
	uint32_t sh_mem_bases;
 | 
			
		||||
 | 
			
		||||
	uint32_t sh_mem_config;
 | 
			
		||||
 | 
			
		||||
	uint32_t sq_shader_tba_lo;
 | 
			
		||||
 | 
			
		||||
	uint32_t sq_shader_tba_hi;
 | 
			
		||||
 | 
			
		||||
	uint32_t sq_shader_tma_lo;
 | 
			
		||||
 | 
			
		||||
	uint32_t sq_shader_tma_hi;
 | 
			
		||||
 | 
			
		||||
	uint32_t reserved6;
 | 
			
		||||
 | 
			
		||||
	uint32_t gds_addr_lo;
 | 
			
		||||
 | 
			
		||||
	uint32_t gds_addr_hi;
 | 
			
		||||
 | 
			
		||||
	union {
 | 
			
		||||
		struct {
 | 
			
		||||
			uint32_t num_gws:7;
 | 
			
		||||
			uint32_t sdma_enable:1;
 | 
			
		||||
			uint32_t num_oac:4;
 | 
			
		||||
			uint32_t gds_size_hi:4;
 | 
			
		||||
			uint32_t gds_size:6;
 | 
			
		||||
			uint32_t num_queues:10;
 | 
			
		||||
		} bitfields14;
 | 
			
		||||
		uint32_t ordinal14;
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	uint32_t spi_gdbg_per_vmid_cntl;
 | 
			
		||||
 | 
			
		||||
	uint32_t tcp_watch_cntl[4];
 | 
			
		||||
 | 
			
		||||
	uint32_t completion_signal_lo;
 | 
			
		||||
 | 
			
		||||
	uint32_t completion_signal_hi;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			@ -322,6 +322,9 @@ struct kfd_dev {
 | 
			
		|||
	unsigned int max_doorbell_slices;
 | 
			
		||||
 | 
			
		||||
	int noretry;
 | 
			
		||||
 | 
			
		||||
	/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
 | 
			
		||||
	struct dev_pagemap pgmap;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum kfd_mempool {
 | 
			
		||||
| 
						 | 
				
			
			@ -669,7 +672,7 @@ struct kfd_process_device {
 | 
			
		|||
 | 
			
		||||
	/* VM context for GPUVM allocations */
 | 
			
		||||
	struct file *drm_file;
 | 
			
		||||
	void *vm;
 | 
			
		||||
	void *drm_priv;
 | 
			
		||||
 | 
			
		||||
	/* GPUVM allocations storage */
 | 
			
		||||
	struct idr alloc_idr;
 | 
			
		||||
| 
						 | 
				
			
			@ -731,6 +734,17 @@ struct kfd_process_device {
 | 
			
		|||
 | 
			
		||||
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
 | 
			
		||||
 | 
			
		||||
struct svm_range_list {
 | 
			
		||||
	struct mutex			lock;
 | 
			
		||||
	struct rb_root_cached		objects;
 | 
			
		||||
	struct list_head		list;
 | 
			
		||||
	struct work_struct		deferred_list_work;
 | 
			
		||||
	struct list_head		deferred_range_list;
 | 
			
		||||
	spinlock_t			deferred_list_lock;
 | 
			
		||||
	atomic_t			evicted_ranges;
 | 
			
		||||
	struct delayed_work		restore_work;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* Process data */
 | 
			
		||||
struct kfd_process {
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -809,6 +823,12 @@ struct kfd_process {
 | 
			
		|||
	struct kobject *kobj;
 | 
			
		||||
	struct kobject *kobj_queues;
 | 
			
		||||
	struct attribute attr_pasid;
 | 
			
		||||
 | 
			
		||||
	/* shared virtual memory registered by this process */
 | 
			
		||||
	struct svm_range_list svms;
 | 
			
		||||
	bool svm_disabled;
 | 
			
		||||
 | 
			
		||||
	bool xnack_enabled;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
 | 
			
		||||
| 
						 | 
				
			
			@ -842,6 +862,20 @@ struct kfd_process *kfd_create_process(struct file *filep);
 | 
			
		|||
struct kfd_process *kfd_get_process(const struct task_struct *);
 | 
			
		||||
struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
 | 
			
		||||
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
 | 
			
		||||
 | 
			
		||||
int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
 | 
			
		||||
int kfd_process_gpuid_from_kgd(struct kfd_process *p,
 | 
			
		||||
			       struct amdgpu_device *adev, uint32_t *gpuid,
 | 
			
		||||
			       uint32_t *gpuidx);
 | 
			
		||||
static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
 | 
			
		||||
				uint32_t gpuidx, uint32_t *gpuid) {
 | 
			
		||||
	return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL;
 | 
			
		||||
}
 | 
			
		||||
static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
 | 
			
		||||
				struct kfd_process *p, uint32_t gpuidx) {
 | 
			
		||||
	return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void kfd_unref_process(struct kfd_process *p);
 | 
			
		||||
int kfd_process_evict_queues(struct kfd_process *p);
 | 
			
		||||
int kfd_process_restore_queues(struct kfd_process *p);
 | 
			
		||||
| 
						 | 
				
			
			@ -857,6 +891,8 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
 | 
			
		|||
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 | 
			
		||||
							struct kfd_process *p);
 | 
			
		||||
 | 
			
		||||
bool kfd_process_xnack_mode(struct kfd_process *p, bool supported);
 | 
			
		||||
 | 
			
		||||
int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
 | 
			
		||||
			  struct vm_area_struct *vma);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1052,6 +1088,7 @@ struct packet_manager_funcs {
 | 
			
		|||
 | 
			
		||||
extern const struct packet_manager_funcs kfd_vi_pm_funcs;
 | 
			
		||||
extern const struct packet_manager_funcs kfd_v9_pm_funcs;
 | 
			
		||||
extern const struct packet_manager_funcs kfd_aldebaran_pm_funcs;
 | 
			
		||||
 | 
			
		||||
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
 | 
			
		||||
void pm_uninit(struct packet_manager *pm, bool hanging);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -35,6 +35,7 @@
 | 
			
		|||
#include <linux/pm_runtime.h>
 | 
			
		||||
#include "amdgpu_amdkfd.h"
 | 
			
		||||
#include "amdgpu.h"
 | 
			
		||||
#include "kfd_svm.h"
 | 
			
		||||
 | 
			
		||||
struct mm_struct;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -42,6 +43,7 @@ struct mm_struct;
 | 
			
		|||
#include "kfd_device_queue_manager.h"
 | 
			
		||||
#include "kfd_dbgmgr.h"
 | 
			
		||||
#include "kfd_iommu.h"
 | 
			
		||||
#include "kfd_svm.h"
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * List of struct kfd_process (field kfd_process).
 | 
			
		||||
| 
						 | 
				
			
			@ -250,7 +252,7 @@ static void kfd_sdma_activity_worker(struct work_struct *work)
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device
 | 
			
		||||
 * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device
 | 
			
		||||
 * by current process. Translates acquired wave count into number of compute units
 | 
			
		||||
 * that are occupied.
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -647,8 +649,9 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
 | 
			
		|||
{
 | 
			
		||||
	struct kfd_dev *dev = pdd->dev;
 | 
			
		||||
 | 
			
		||||
	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
 | 
			
		||||
	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL);
 | 
			
		||||
	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
 | 
			
		||||
	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
 | 
			
		||||
					       NULL);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
 | 
			
		||||
| 
						 | 
				
			
			@ -667,11 +670,11 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
 | 
			
		|||
	int err;
 | 
			
		||||
 | 
			
		||||
	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
 | 
			
		||||
						 pdd->vm, &mem, NULL, flags);
 | 
			
		||||
						 pdd->drm_priv, &mem, NULL, flags);
 | 
			
		||||
	if (err)
 | 
			
		||||
		goto err_alloc_mem;
 | 
			
		||||
 | 
			
		||||
	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
 | 
			
		||||
	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv);
 | 
			
		||||
	if (err)
 | 
			
		||||
		goto err_map_mem;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -712,7 +715,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
 | 
			
		|||
	return err;
 | 
			
		||||
 | 
			
		||||
err_map_mem:
 | 
			
		||||
	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL);
 | 
			
		||||
	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv,
 | 
			
		||||
					       NULL);
 | 
			
		||||
err_alloc_mem:
 | 
			
		||||
	*kptr = NULL;
 | 
			
		||||
	return err;
 | 
			
		||||
| 
						 | 
				
			
			@ -901,13 +905,14 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
 | 
			
		|||
		for (i = 0; i < p->n_pdds; i++) {
 | 
			
		||||
			struct kfd_process_device *peer_pdd = p->pdds[i];
 | 
			
		||||
 | 
			
		||||
			if (!peer_pdd->vm)
 | 
			
		||||
			if (!peer_pdd->drm_priv)
 | 
			
		||||
				continue;
 | 
			
		||||
			amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 | 
			
		||||
				peer_pdd->dev->kgd, mem, peer_pdd->vm);
 | 
			
		||||
				peer_pdd->dev->kgd, mem, peer_pdd->drm_priv);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL);
 | 
			
		||||
		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem,
 | 
			
		||||
						       pdd->drm_priv, NULL);
 | 
			
		||||
		kfd_process_device_remove_obj_handle(pdd, id);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -932,7 +937,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 | 
			
		|||
 | 
			
		||||
		if (pdd->drm_file) {
 | 
			
		||||
			amdgpu_amdkfd_gpuvm_release_process_vm(
 | 
			
		||||
					pdd->dev->kgd, pdd->vm);
 | 
			
		||||
					pdd->dev->kgd, pdd->drm_priv);
 | 
			
		||||
			fput(pdd->drm_file);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1000,6 +1005,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 | 
			
		|||
	kfd_iommu_unbind_process(p);
 | 
			
		||||
 | 
			
		||||
	kfd_process_free_outstanding_kfd_bos(p);
 | 
			
		||||
	svm_range_list_fini(p);
 | 
			
		||||
 | 
			
		||||
	kfd_process_destroy_pdds(p);
 | 
			
		||||
	dma_fence_put(p->ef);
 | 
			
		||||
| 
						 | 
				
			
			@ -1058,6 +1064,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 | 
			
		|||
 | 
			
		||||
	cancel_delayed_work_sync(&p->eviction_work);
 | 
			
		||||
	cancel_delayed_work_sync(&p->restore_work);
 | 
			
		||||
	cancel_delayed_work_sync(&p->svms.restore_work);
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&p->mutex);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1186,6 +1193,56 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	/* On most GFXv9 GPUs, the retry mode in the SQ must match the
 | 
			
		||||
	 * boot time retry setting. Mixing processes with different
 | 
			
		||||
	 * XNACK/retry settings can hang the GPU.
 | 
			
		||||
	 *
 | 
			
		||||
	 * Different GPUs can have different noretry settings depending
 | 
			
		||||
	 * on HW bugs or limitations. We need to find at least one
 | 
			
		||||
	 * XNACK mode for this process that's compatible with all GPUs.
 | 
			
		||||
	 * Fortunately GPUs with retry enabled (noretry=0) can run code
 | 
			
		||||
	 * built for XNACK-off. On GFXv9 it may perform slower.
 | 
			
		||||
	 *
 | 
			
		||||
	 * Therefore applications built for XNACK-off can always be
 | 
			
		||||
	 * supported and will be our fallback if any GPU does not
 | 
			
		||||
	 * support retry.
 | 
			
		||||
	 */
 | 
			
		||||
	for (i = 0; i < p->n_pdds; i++) {
 | 
			
		||||
		struct kfd_dev *dev = p->pdds[i]->dev;
 | 
			
		||||
 | 
			
		||||
		/* Only consider GFXv9 and higher GPUs. Older GPUs don't
 | 
			
		||||
		 * support the SVM APIs and don't need to be considered
 | 
			
		||||
		 * for the XNACK mode selection.
 | 
			
		||||
		 */
 | 
			
		||||
		if (dev->device_info->asic_family < CHIP_VEGA10)
 | 
			
		||||
			continue;
 | 
			
		||||
		/* Aldebaran can always support XNACK because it can support
 | 
			
		||||
		 * per-process XNACK mode selection. But let the dev->noretry
 | 
			
		||||
		 * setting still influence the default XNACK mode.
 | 
			
		||||
		 */
 | 
			
		||||
		if (supported &&
 | 
			
		||||
		    dev->device_info->asic_family == CHIP_ALDEBARAN)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		/* GFXv10 and later GPUs do not support shader preemption
 | 
			
		||||
		 * during page faults. This can lead to poor QoS for queue
 | 
			
		||||
		 * management and memory-manager-related preemptions or
 | 
			
		||||
		 * even deadlocks.
 | 
			
		||||
		 */
 | 
			
		||||
		if (dev->device_info->asic_family >= CHIP_NAVI10)
 | 
			
		||||
			return false;
 | 
			
		||||
 | 
			
		||||
		if (dev->noretry)
 | 
			
		||||
			return false;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * On return the kfd_process is fully operational and will be freed when the
 | 
			
		||||
 * mm is released
 | 
			
		||||
| 
						 | 
				
			
			@ -1205,6 +1262,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 | 
			
		|||
	process->mm = thread->mm;
 | 
			
		||||
	process->lead_thread = thread->group_leader;
 | 
			
		||||
	process->n_pdds = 0;
 | 
			
		||||
	process->svm_disabled = false;
 | 
			
		||||
	INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
 | 
			
		||||
	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
 | 
			
		||||
	process->last_restore_timestamp = get_jiffies_64();
 | 
			
		||||
| 
						 | 
				
			
			@ -1224,6 +1282,13 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 | 
			
		|||
	if (err != 0)
 | 
			
		||||
		goto err_init_apertures;
 | 
			
		||||
 | 
			
		||||
	/* Check XNACK support after PDDs are created in kfd_init_apertures */
 | 
			
		||||
	process->xnack_enabled = kfd_process_xnack_mode(process, false);
 | 
			
		||||
 | 
			
		||||
	err = svm_range_list_init(process);
 | 
			
		||||
	if (err)
 | 
			
		||||
		goto err_init_svm_range_list;
 | 
			
		||||
 | 
			
		||||
	/* alloc_notifier needs to find the process in the hash table */
 | 
			
		||||
	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
 | 
			
		||||
			(uintptr_t)process->mm);
 | 
			
		||||
| 
						 | 
				
			
			@ -1246,6 +1311,8 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 | 
			
		|||
 | 
			
		||||
err_register_notifier:
 | 
			
		||||
	hash_del_rcu(&process->kfd_processes);
 | 
			
		||||
	svm_range_list_fini(process);
 | 
			
		||||
err_init_svm_range_list:
 | 
			
		||||
	kfd_process_free_outstanding_kfd_bos(process);
 | 
			
		||||
	kfd_process_destroy_pdds(process);
 | 
			
		||||
err_init_apertures:
 | 
			
		||||
| 
						 | 
				
			
			@ -1375,7 +1442,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 | 
			
		|||
	if (!drm_file)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	if (pdd->vm)
 | 
			
		||||
	if (pdd->drm_priv)
 | 
			
		||||
		return -EBUSY;
 | 
			
		||||
 | 
			
		||||
	p = pdd->process;
 | 
			
		||||
| 
						 | 
				
			
			@ -1383,13 +1450,12 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 | 
			
		|||
 | 
			
		||||
	ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
 | 
			
		||||
		dev->kgd, drm_file, p->pasid,
 | 
			
		||||
		&pdd->vm, &p->kgd_process_info, &p->ef);
 | 
			
		||||
		&p->kgd_process_info, &p->ef);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
		pr_err("Failed to create process VM object\n");
 | 
			
		||||
		return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	amdgpu_vm_set_task_info(pdd->vm);
 | 
			
		||||
	pdd->drm_priv = drm_file->private_data;
 | 
			
		||||
 | 
			
		||||
	ret = kfd_process_device_reserve_ib_mem(pdd);
 | 
			
		||||
	if (ret)
 | 
			
		||||
| 
						 | 
				
			
			@ -1405,7 +1471,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 | 
			
		|||
err_init_cwsr:
 | 
			
		||||
err_reserve_ib_mem:
 | 
			
		||||
	kfd_process_device_free_bos(pdd);
 | 
			
		||||
	pdd->vm = NULL;
 | 
			
		||||
	pdd->drm_priv = NULL;
 | 
			
		||||
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1429,7 +1495,7 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
 | 
			
		|||
		return ERR_PTR(-ENOMEM);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!pdd->vm)
 | 
			
		||||
	if (!pdd->drm_priv)
 | 
			
		||||
		return ERR_PTR(-ENODEV);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -1600,6 +1666,32 @@ int kfd_process_restore_queues(struct kfd_process *p)
 | 
			
		|||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < p->n_pdds; i++)
 | 
			
		||||
		if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id)
 | 
			
		||||
			return i;
 | 
			
		||||
	return -EINVAL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int
 | 
			
		||||
kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
 | 
			
		||||
			   uint32_t *gpuid, uint32_t *gpuidx)
 | 
			
		||||
{
 | 
			
		||||
	struct kgd_dev *kgd = (struct kgd_dev *)adev;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < p->n_pdds; i++)
 | 
			
		||||
		if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
 | 
			
		||||
			*gpuid = p->pdds[i]->dev->id;
 | 
			
		||||
			*gpuidx = i;
 | 
			
		||||
			return 0;
 | 
			
		||||
		}
 | 
			
		||||
	return -EINVAL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void evict_process_worker(struct work_struct *work)
 | 
			
		||||
{
 | 
			
		||||
	int ret;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										3085
									
								
								drivers/gpu/drm/amd/amdkfd/kfd_svm.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3085
									
								
								drivers/gpu/drm/amd/amdkfd/kfd_svm.c
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										206
									
								
								drivers/gpu/drm/amd/amdkfd/kfd_svm.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										206
									
								
								drivers/gpu/drm/amd/amdkfd/kfd_svm.h
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,206 @@
 | 
			
		|||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020-2021 Advanced Micro Devices, Inc.
 | 
			
		||||
 *
 | 
			
		||||
 * Permission is hereby granted, free of charge, to any person obtaining a
 | 
			
		||||
 * copy of this software and associated documentation files (the "Software"),
 | 
			
		||||
 * to deal in the Software without restriction, including without limitation
 | 
			
		||||
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 | 
			
		||||
 * and/or sell copies of the Software, and to permit persons to whom the
 | 
			
		||||
 * Software is furnished to do so, subject to the following conditions:
 | 
			
		||||
 *
 | 
			
		||||
 * The above copyright notice and this permission notice shall be included in
 | 
			
		||||
 * all copies or substantial portions of the Software.
 | 
			
		||||
 *
 | 
			
		||||
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
			
		||||
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
			
		||||
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 | 
			
		||||
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 | 
			
		||||
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 | 
			
		||||
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 | 
			
		||||
 * OTHER DEALINGS IN THE SOFTWARE.
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef KFD_SVM_H_
 | 
			
		||||
#define KFD_SVM_H_
 | 
			
		||||
 | 
			
		||||
#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
 | 
			
		||||
 | 
			
		||||
#include <linux/rwsem.h>
 | 
			
		||||
#include <linux/list.h>
 | 
			
		||||
#include <linux/mutex.h>
 | 
			
		||||
#include <linux/sched/mm.h>
 | 
			
		||||
#include <linux/hmm.h>
 | 
			
		||||
#include "amdgpu.h"
 | 
			
		||||
#include "kfd_priv.h"
 | 
			
		||||
 | 
			
		||||
struct svm_range_bo {
 | 
			
		||||
	struct amdgpu_bo		*bo;
 | 
			
		||||
	struct kref			kref;
 | 
			
		||||
	struct list_head		range_list; /* all svm ranges shared this bo */
 | 
			
		||||
	spinlock_t			list_lock;
 | 
			
		||||
	struct amdgpu_amdkfd_fence	*eviction_fence;
 | 
			
		||||
	struct work_struct		eviction_work;
 | 
			
		||||
	struct svm_range_list		*svms;
 | 
			
		||||
	uint32_t			evicting;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum svm_work_list_ops {
 | 
			
		||||
	SVM_OP_NULL,
 | 
			
		||||
	SVM_OP_UNMAP_RANGE,
 | 
			
		||||
	SVM_OP_UPDATE_RANGE_NOTIFIER,
 | 
			
		||||
	SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP,
 | 
			
		||||
	SVM_OP_ADD_RANGE,
 | 
			
		||||
	SVM_OP_ADD_RANGE_AND_MAP
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct svm_work_list_item {
 | 
			
		||||
	enum svm_work_list_ops op;
 | 
			
		||||
	struct mm_struct *mm;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * struct svm_range - shared virtual memory range
 | 
			
		||||
 *
 | 
			
		||||
 * @svms:       list of svm ranges, structure defined in kfd_process
 | 
			
		||||
 * @migrate_mutex: to serialize range migration, validation and mapping update
 | 
			
		||||
 * @start:      range start address in pages
 | 
			
		||||
 * @last:       range last address in pages
 | 
			
		||||
 * @it_node:    node [start, last] stored in interval tree, start, last are page
 | 
			
		||||
 *              aligned, page size is (last - start + 1)
 | 
			
		||||
 * @list:       link list node, used to scan all ranges of svms
 | 
			
		||||
 * @update_list:link list node used to add to update_list
 | 
			
		||||
 * @remove_list:link list node used to add to remove list
 | 
			
		||||
 * @insert_list:link list node used to add to insert list
 | 
			
		||||
 * @mapping:    bo_va mapping structure to create and update GPU page table
 | 
			
		||||
 * @npages:     number of pages
 | 
			
		||||
 * @dma_addr:   dma mapping address on each GPU for system memory physical page
 | 
			
		||||
 * @ttm_res:    vram ttm resource map
 | 
			
		||||
 * @offset:     range start offset within mm_nodes
 | 
			
		||||
 * @svm_bo:     struct to manage splited amdgpu_bo
 | 
			
		||||
 * @svm_bo_list:link list node, to scan all ranges which share same svm_bo
 | 
			
		||||
 * @lock:       protect prange start, last, child_list, svm_bo_list
 | 
			
		||||
 * @saved_flags:save/restore current PF_MEMALLOC flags
 | 
			
		||||
 * @flags:      flags defined as KFD_IOCTL_SVM_FLAG_*
 | 
			
		||||
 * @perferred_loc: perferred location, 0 for CPU, or GPU id
 | 
			
		||||
 * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
 | 
			
		||||
 * @actual_loc: the actual location, 0 for CPU, or GPU id
 | 
			
		||||
 * @granularity:migration granularity, log2 num pages
 | 
			
		||||
 * @invalid:    not 0 means cpu page table is invalidated
 | 
			
		||||
 * @validate_timestamp: system timestamp when range is validated
 | 
			
		||||
 * @notifier:   register mmu interval notifier
 | 
			
		||||
 * @work_item:  deferred work item information
 | 
			
		||||
 * @deferred_list: list header used to add range to deferred list
 | 
			
		||||
 * @child_list: list header for split ranges which are not added to svms yet
 | 
			
		||||
 * @bitmap_access: index bitmap of GPUs which can access the range
 | 
			
		||||
 * @bitmap_aip: index bitmap of GPUs which can access the range in place
 | 
			
		||||
 *
 | 
			
		||||
 * Data structure for virtual memory range shared by CPU and GPUs, it can be
 | 
			
		||||
 * allocated from system memory ram or device vram, and migrate from ram to vram
 | 
			
		||||
 * or from vram to ram.
 | 
			
		||||
 */
 | 
			
		||||
struct svm_range {
 | 
			
		||||
	struct svm_range_list		*svms;
 | 
			
		||||
	struct mutex			migrate_mutex;
 | 
			
		||||
	unsigned long			start;
 | 
			
		||||
	unsigned long			last;
 | 
			
		||||
	struct interval_tree_node	it_node;
 | 
			
		||||
	struct list_head		list;
 | 
			
		||||
	struct list_head		update_list;
 | 
			
		||||
	struct list_head		remove_list;
 | 
			
		||||
	struct list_head		insert_list;
 | 
			
		||||
	struct amdgpu_bo_va_mapping	mapping;
 | 
			
		||||
	uint64_t			npages;
 | 
			
		||||
	dma_addr_t			*dma_addr[MAX_GPU_INSTANCE];
 | 
			
		||||
	struct ttm_resource		*ttm_res;
 | 
			
		||||
	uint64_t			offset;
 | 
			
		||||
	struct svm_range_bo		*svm_bo;
 | 
			
		||||
	struct list_head		svm_bo_list;
 | 
			
		||||
	struct mutex                    lock;
 | 
			
		||||
	unsigned int                    saved_flags;
 | 
			
		||||
	uint32_t			flags;
 | 
			
		||||
	uint32_t			preferred_loc;
 | 
			
		||||
	uint32_t			prefetch_loc;
 | 
			
		||||
	uint32_t			actual_loc;
 | 
			
		||||
	uint8_t				granularity;
 | 
			
		||||
	atomic_t			invalid;
 | 
			
		||||
	uint64_t			validate_timestamp;
 | 
			
		||||
	struct mmu_interval_notifier	notifier;
 | 
			
		||||
	struct svm_work_list_item	work_item;
 | 
			
		||||
	struct list_head		deferred_list;
 | 
			
		||||
	struct list_head		child_list;
 | 
			
		||||
	DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
 | 
			
		||||
	DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
 | 
			
		||||
	bool				validated_once;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static inline void svm_range_lock(struct svm_range *prange)
 | 
			
		||||
{
 | 
			
		||||
	mutex_lock(&prange->lock);
 | 
			
		||||
	prange->saved_flags = memalloc_noreclaim_save();
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
static inline void svm_range_unlock(struct svm_range *prange)
 | 
			
		||||
{
 | 
			
		||||
	memalloc_noreclaim_restore(prange->saved_flags);
 | 
			
		||||
	mutex_unlock(&prange->lock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int svm_range_list_init(struct kfd_process *p);
 | 
			
		||||
void svm_range_list_fini(struct kfd_process *p);
 | 
			
		||||
int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
 | 
			
		||||
	      uint64_t size, uint32_t nattrs,
 | 
			
		||||
	      struct kfd_ioctl_svm_attribute *attrs);
 | 
			
		||||
struct svm_range *svm_range_from_addr(struct svm_range_list *svms,
 | 
			
		||||
				      unsigned long addr,
 | 
			
		||||
				      struct svm_range **parent);
 | 
			
		||||
struct amdgpu_device *svm_range_get_adev_by_id(struct svm_range *prange,
 | 
			
		||||
					       uint32_t id);
 | 
			
		||||
int svm_range_vram_node_new(struct amdgpu_device *adev,
 | 
			
		||||
			    struct svm_range *prange, bool clear);
 | 
			
		||||
void svm_range_vram_node_free(struct svm_range *prange);
 | 
			
		||||
int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
 | 
			
		||||
			       unsigned long addr, struct svm_range *parent,
 | 
			
		||||
			       struct svm_range *prange);
 | 
			
		||||
int svm_range_restore_pages(struct amdgpu_device *adev,
 | 
			
		||||
			    unsigned int pasid, uint64_t addr);
 | 
			
		||||
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
 | 
			
		||||
void svm_range_add_list_work(struct svm_range_list *svms,
 | 
			
		||||
			     struct svm_range *prange, struct mm_struct *mm,
 | 
			
		||||
			     enum svm_work_list_ops op);
 | 
			
		||||
void schedule_deferred_list_work(struct svm_range_list *svms);
 | 
			
		||||
void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
 | 
			
		||||
			 unsigned long offset, unsigned long npages);
 | 
			
		||||
void svm_range_free_dma_mappings(struct svm_range *prange);
 | 
			
		||||
void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm);
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
struct kfd_process;
 | 
			
		||||
 | 
			
		||||
static inline int svm_range_list_init(struct kfd_process *p)
 | 
			
		||||
{
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
static inline void svm_range_list_fini(struct kfd_process *p)
 | 
			
		||||
{
 | 
			
		||||
	/* empty */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int svm_range_restore_pages(struct amdgpu_device *adev,
 | 
			
		||||
					  unsigned int pasid, uint64_t addr)
 | 
			
		||||
{
 | 
			
		||||
	return -EFAULT;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int svm_range_schedule_evict_svm_bo(
 | 
			
		||||
		struct amdgpu_amdkfd_fence *fence)
 | 
			
		||||
{
 | 
			
		||||
	WARN_ONCE(1, "SVM eviction fence triggered, but SVM is disabled");
 | 
			
		||||
	return -EINVAL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
 | 
			
		||||
 | 
			
		||||
#endif /* KFD_SVM_H_ */
 | 
			
		||||
| 
						 | 
				
			
			@ -1192,40 +1192,60 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
 | 
			
		|||
		mem->mem_clk_max = local_mem_info.mem_clk_max;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev,
 | 
			
		||||
					struct kfd_topology_device *target_gpu_dev,
 | 
			
		||||
					struct kfd_iolink_properties *link)
 | 
			
		||||
{
 | 
			
		||||
	/* xgmi always supports atomics between links. */
 | 
			
		||||
	if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	/* check pcie support to set cpu(dev) flags for target_gpu_dev link. */
 | 
			
		||||
	if (target_gpu_dev) {
 | 
			
		||||
		uint32_t cap;
 | 
			
		||||
 | 
			
		||||
		pcie_capability_read_dword(target_gpu_dev->gpu->pdev,
 | 
			
		||||
				PCI_EXP_DEVCAP2, &cap);
 | 
			
		||||
 | 
			
		||||
		if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
 | 
			
		||||
			     PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
 | 
			
		||||
			link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
 | 
			
		||||
				CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
 | 
			
		||||
	/* set gpu (dev) flags. */
 | 
			
		||||
	} else {
 | 
			
		||||
		if (!dev->gpu->pci_atomic_requested ||
 | 
			
		||||
				dev->gpu->device_info->asic_family ==
 | 
			
		||||
							CHIP_HAWAII)
 | 
			
		||||
			link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
 | 
			
		||||
				CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
 | 
			
		||||
{
 | 
			
		||||
	struct kfd_iolink_properties *link, *cpu_link;
 | 
			
		||||
	struct kfd_topology_device *cpu_dev;
 | 
			
		||||
	uint32_t cap;
 | 
			
		||||
	uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED;
 | 
			
		||||
	uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED;
 | 
			
		||||
	struct kfd_iolink_properties *link, *inbound_link;
 | 
			
		||||
	struct kfd_topology_device *peer_dev;
 | 
			
		||||
 | 
			
		||||
	if (!dev || !dev->gpu)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	pcie_capability_read_dword(dev->gpu->pdev,
 | 
			
		||||
			PCI_EXP_DEVCAP2, &cap);
 | 
			
		||||
 | 
			
		||||
	if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
 | 
			
		||||
		     PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
 | 
			
		||||
		cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
 | 
			
		||||
			CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
 | 
			
		||||
 | 
			
		||||
	if (!dev->gpu->pci_atomic_requested ||
 | 
			
		||||
	    dev->gpu->device_info->asic_family == CHIP_HAWAII)
 | 
			
		||||
		flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
 | 
			
		||||
			CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
 | 
			
		||||
 | 
			
		||||
	/* GPU only creates direct links so apply flags setting to all */
 | 
			
		||||
	list_for_each_entry(link, &dev->io_link_props, list) {
 | 
			
		||||
		link->flags = flag;
 | 
			
		||||
		cpu_dev = kfd_topology_device_by_proximity_domain(
 | 
			
		||||
		link->flags = CRAT_IOLINK_FLAGS_ENABLED;
 | 
			
		||||
		kfd_set_iolink_no_atomics(dev, NULL, link);
 | 
			
		||||
		peer_dev = kfd_topology_device_by_proximity_domain(
 | 
			
		||||
				link->node_to);
 | 
			
		||||
		if (cpu_dev) {
 | 
			
		||||
			list_for_each_entry(cpu_link,
 | 
			
		||||
					    &cpu_dev->io_link_props, list)
 | 
			
		||||
				if (cpu_link->node_to == link->node_from)
 | 
			
		||||
					cpu_link->flags = cpu_flag;
 | 
			
		||||
 | 
			
		||||
		if (!peer_dev)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		list_for_each_entry(inbound_link, &peer_dev->io_link_props,
 | 
			
		||||
									list) {
 | 
			
		||||
			if (inbound_link->node_to != link->node_from)
 | 
			
		||||
				continue;
 | 
			
		||||
 | 
			
		||||
			inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
 | 
			
		||||
			kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1410,15 +1430,21 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 | 
			
		|||
	adev = (struct amdgpu_device *)(dev->gpu->kgd);
 | 
			
		||||
	/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
 | 
			
		||||
	dev->node_props.capability |=
 | 
			
		||||
		((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
 | 
			
		||||
		((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
 | 
			
		||||
		HSA_CAP_SRAM_EDCSUPPORTED : 0;
 | 
			
		||||
	dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
 | 
			
		||||
	dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
 | 
			
		||||
		HSA_CAP_MEM_EDCSUPPORTED : 0;
 | 
			
		||||
 | 
			
		||||
	if (adev->asic_type != CHIP_VEGA10)
 | 
			
		||||
		dev->node_props.capability |= (adev->ras_features != 0) ?
 | 
			
		||||
		dev->node_props.capability |= (adev->ras_enabled != 0) ?
 | 
			
		||||
			HSA_CAP_RASEVENTNOTIFY : 0;
 | 
			
		||||
 | 
			
		||||
	/* SVM API and HMM page migration work together, device memory type
 | 
			
		||||
	 * is initialized to not 0 when page migration register device memory.
 | 
			
		||||
	 */
 | 
			
		||||
	if (adev->kfd.dev->pgmap.type != 0)
 | 
			
		||||
		dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
 | 
			
		||||
 | 
			
		||||
	kfd_debug_print_topology();
 | 
			
		||||
 | 
			
		||||
	if (!res)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -53,8 +53,9 @@
 | 
			
		|||
#define HSA_CAP_ASIC_REVISION_MASK		0x03c00000
 | 
			
		||||
#define HSA_CAP_ASIC_REVISION_SHIFT		22
 | 
			
		||||
#define HSA_CAP_SRAM_EDCSUPPORTED		0x04000000
 | 
			
		||||
#define HSA_CAP_SVMAPI_SUPPORTED		0x08000000
 | 
			
		||||
 | 
			
		||||
#define HSA_CAP_RESERVED			0xf80f8000
 | 
			
		||||
#define HSA_CAP_RESERVED			0xf00f8000
 | 
			
		||||
 | 
			
		||||
struct kfd_node_properties {
 | 
			
		||||
	uint64_t hive_id;
 | 
			
		||||
| 
						 | 
				
			
			@ -98,9 +99,10 @@ struct kfd_node_properties {
 | 
			
		|||
#define HSA_MEM_HEAP_TYPE_GPU_LDS	4
 | 
			
		||||
#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH	5
 | 
			
		||||
 | 
			
		||||
#define HSA_MEM_FLAGS_HOT_PLUGGABLE	0x00000001
 | 
			
		||||
#define HSA_MEM_FLAGS_NON_VOLATILE	0x00000002
 | 
			
		||||
#define HSA_MEM_FLAGS_RESERVED		0xfffffffc
 | 
			
		||||
#define HSA_MEM_FLAGS_HOT_PLUGGABLE		0x00000001
 | 
			
		||||
#define HSA_MEM_FLAGS_NON_VOLATILE		0x00000002
 | 
			
		||||
#define HSA_MEM_FLAGS_COHERENTHOSTACCESS	0x00000004
 | 
			
		||||
#define HSA_MEM_FLAGS_RESERVED			0xfffffff8
 | 
			
		||||
 | 
			
		||||
struct kfd_mem_properties {
 | 
			
		||||
	struct list_head	list;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -35,6 +35,7 @@
 | 
			
		|||
#include "dc/inc/hw/abm.h"
 | 
			
		||||
#include "dc/dc_dmub_srv.h"
 | 
			
		||||
#include "dc/dc_edid_parser.h"
 | 
			
		||||
#include "dc/dc_stat.h"
 | 
			
		||||
#include "amdgpu_dm_trace.h"
 | 
			
		||||
 | 
			
		||||
#include "vid.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -59,6 +60,7 @@
 | 
			
		|||
 | 
			
		||||
#include "ivsrcid/ivsrcid_vislands30.h"
 | 
			
		||||
 | 
			
		||||
#include "i2caux_interface.h"
 | 
			
		||||
#include <linux/module.h>
 | 
			
		||||
#include <linux/moduleparam.h>
 | 
			
		||||
#include <linux/types.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -618,6 +620,58 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params)
 | 
			
		|||
	amdgpu_dm_crtc_handle_crc_window_irq(&acrtc->base);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * dm_dmub_outbox1_low_irq() - Handles Outbox interrupt
 | 
			
		||||
 * @interrupt_params: used for determining the Outbox instance
 | 
			
		||||
 *
 | 
			
		||||
 * Handles the Outbox Interrupt
 | 
			
		||||
 * event handler.
 | 
			
		||||
 */
 | 
			
		||||
#define DMUB_TRACE_MAX_READ 64
 | 
			
		||||
static void dm_dmub_outbox1_low_irq(void *interrupt_params)
 | 
			
		||||
{
 | 
			
		||||
	struct dmub_notification notify;
 | 
			
		||||
	struct common_irq_params *irq_params = interrupt_params;
 | 
			
		||||
	struct amdgpu_device *adev = irq_params->adev;
 | 
			
		||||
	struct amdgpu_display_manager *dm = &adev->dm;
 | 
			
		||||
	struct dmcub_trace_buf_entry entry = { 0 };
 | 
			
		||||
	uint32_t count = 0;
 | 
			
		||||
 | 
			
		||||
	if (dc_enable_dmub_notifications(adev->dm.dc)) {
 | 
			
		||||
		if (irq_params->irq_src == DC_IRQ_SOURCE_DMCUB_OUTBOX) {
 | 
			
		||||
			do {
 | 
			
		||||
				dc_stat_get_dmub_notification(adev->dm.dc, ¬ify);
 | 
			
		||||
			} while (notify.pending_notification);
 | 
			
		||||
 | 
			
		||||
			if (adev->dm.dmub_notify)
 | 
			
		||||
				memcpy(adev->dm.dmub_notify, ¬ify, sizeof(struct dmub_notification));
 | 
			
		||||
			if (notify.type == DMUB_NOTIFICATION_AUX_REPLY)
 | 
			
		||||
				complete(&adev->dm.dmub_aux_transfer_done);
 | 
			
		||||
			// TODO : HPD Implementation
 | 
			
		||||
 | 
			
		||||
		} else {
 | 
			
		||||
			DRM_ERROR("DM: Failed to receive correct outbox IRQ !");
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	do {
 | 
			
		||||
		if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) {
 | 
			
		||||
			trace_amdgpu_dmub_trace_high_irq(entry.trace_code, entry.tick_count,
 | 
			
		||||
							entry.param0, entry.param1);
 | 
			
		||||
 | 
			
		||||
			DRM_DEBUG_DRIVER("trace_code:%u, tick_count:%u, param0:%u, param1:%u\n",
 | 
			
		||||
				 entry.trace_code, entry.tick_count, entry.param0, entry.param1);
 | 
			
		||||
		} else
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		count++;
 | 
			
		||||
 | 
			
		||||
	} while (count <= DMUB_TRACE_MAX_READ);
 | 
			
		||||
 | 
			
		||||
	ASSERT(count <= DMUB_TRACE_MAX_READ);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static int dm_set_clockgating_state(void *handle,
 | 
			
		||||
| 
						 | 
				
			
			@ -938,32 +992,6 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
#if defined(CONFIG_DRM_AMD_DC_DCN)
 | 
			
		||||
#define DMUB_TRACE_MAX_READ 64
 | 
			
		||||
static void dm_dmub_trace_high_irq(void *interrupt_params)
 | 
			
		||||
{
 | 
			
		||||
	struct common_irq_params *irq_params = interrupt_params;
 | 
			
		||||
	struct amdgpu_device *adev = irq_params->adev;
 | 
			
		||||
	struct amdgpu_display_manager *dm = &adev->dm;
 | 
			
		||||
	struct dmcub_trace_buf_entry entry = { 0 };
 | 
			
		||||
	uint32_t count = 0;
 | 
			
		||||
 | 
			
		||||
	do {
 | 
			
		||||
		if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) {
 | 
			
		||||
			trace_amdgpu_dmub_trace_high_irq(entry.trace_code, entry.tick_count,
 | 
			
		||||
							entry.param0, entry.param1);
 | 
			
		||||
 | 
			
		||||
			DRM_DEBUG_DRIVER("trace_code:%u, tick_count:%u, param0:%u, param1:%u\n",
 | 
			
		||||
				 entry.trace_code, entry.tick_count, entry.param0, entry.param1);
 | 
			
		||||
		} else
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		count++;
 | 
			
		||||
 | 
			
		||||
	} while (count <= DMUB_TRACE_MAX_READ);
 | 
			
		||||
 | 
			
		||||
	ASSERT(count <= DMUB_TRACE_MAX_READ);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_addr_space_config *pa_config)
 | 
			
		||||
{
 | 
			
		||||
	uint64_t pt_base;
 | 
			
		||||
| 
						 | 
				
			
			@ -1220,6 +1248,16 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 | 
			
		|||
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
 | 
			
		||||
	adev->dm.crc_rd_wrk = amdgpu_dm_crtc_secure_display_create_work();
 | 
			
		||||
#endif
 | 
			
		||||
	if (dc_enable_dmub_notifications(adev->dm.dc)) {
 | 
			
		||||
		init_completion(&adev->dm.dmub_aux_transfer_done);
 | 
			
		||||
		adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL);
 | 
			
		||||
		if (!adev->dm.dmub_notify) {
 | 
			
		||||
			DRM_INFO("amdgpu: fail to allocate adev->dm.dmub_notify");
 | 
			
		||||
			goto error;
 | 
			
		||||
		}
 | 
			
		||||
		amdgpu_dm_outbox_init(adev);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (amdgpu_dm_initialize_drm_device(adev)) {
 | 
			
		||||
		DRM_ERROR(
 | 
			
		||||
		"amdgpu: failed to initialize sw for display support.\n");
 | 
			
		||||
| 
						 | 
				
			
			@ -1293,6 +1331,11 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
 | 
			
		|||
		adev->dm.dc->ctx->dmub_srv = NULL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (dc_enable_dmub_notifications(adev->dm.dc)) {
 | 
			
		||||
		kfree(adev->dm.dmub_notify);
 | 
			
		||||
		adev->dm.dmub_notify = NULL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (adev->dm.dmub_bo)
 | 
			
		||||
		amdgpu_bo_free_kernel(&adev->dm.dmub_bo,
 | 
			
		||||
				      &adev->dm.dmub_bo_gpu_addr,
 | 
			
		||||
| 
						 | 
				
			
			@ -2708,8 +2751,7 @@ static void handle_hpd_rx_irq(void *param)
 | 
			
		|||
	 * conflict, after implement i2c helper, this mutex should be
 | 
			
		||||
	 * retired.
 | 
			
		||||
	 */
 | 
			
		||||
	if (dc_link->type != dc_connection_mst_branch)
 | 
			
		||||
		mutex_lock(&aconnector->hpd_lock);
 | 
			
		||||
	mutex_lock(&aconnector->hpd_lock);
 | 
			
		||||
 | 
			
		||||
	read_hpd_rx_irq_data(dc_link, &hpd_irq_data);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2726,13 +2768,15 @@ static void handle_hpd_rx_irq(void *param)
 | 
			
		|||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&adev->dm.dc_lock);
 | 
			
		||||
	if (!amdgpu_in_reset(adev)) {
 | 
			
		||||
		mutex_lock(&adev->dm.dc_lock);
 | 
			
		||||
#ifdef CONFIG_DRM_AMD_DC_HDCP
 | 
			
		||||
	result = dc_link_handle_hpd_rx_irq(dc_link, &hpd_irq_data, NULL);
 | 
			
		||||
#else
 | 
			
		||||
	result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL);
 | 
			
		||||
#endif
 | 
			
		||||
	mutex_unlock(&adev->dm.dc_lock);
 | 
			
		||||
		mutex_unlock(&adev->dm.dc_lock);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	if (result && !is_mst_root_connector) {
 | 
			
		||||
| 
						 | 
				
			
			@ -2776,10 +2820,10 @@ static void handle_hpd_rx_irq(void *param)
 | 
			
		|||
	}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	if (dc_link->type != dc_connection_mst_branch) {
 | 
			
		||||
	if (dc_link->type != dc_connection_mst_branch)
 | 
			
		||||
		drm_dp_cec_irq(&aconnector->dm_dp_aux.aux);
 | 
			
		||||
		mutex_unlock(&aconnector->hpd_lock);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	mutex_unlock(&aconnector->hpd_lock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void register_hpd_handlers(struct amdgpu_device *adev)
 | 
			
		||||
| 
						 | 
				
			
			@ -3151,28 +3195,6 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
 | 
			
		|||
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (dc->ctx->dmub_srv) {
 | 
			
		||||
		i = DCN_1_0__SRCID__DMCUB_OUTBOX_HIGH_PRIORITY_READY_INT;
 | 
			
		||||
		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->dmub_trace_irq);
 | 
			
		||||
 | 
			
		||||
		if (r) {
 | 
			
		||||
			DRM_ERROR("Failed to add dmub trace irq id!\n");
 | 
			
		||||
			return r;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT;
 | 
			
		||||
		int_params.irq_source =
 | 
			
		||||
			dc_interrupt_to_irq_source(dc, i, 0);
 | 
			
		||||
 | 
			
		||||
		c_irq_params = &adev->dm.dmub_trace_params[0];
 | 
			
		||||
 | 
			
		||||
		c_irq_params->adev = adev;
 | 
			
		||||
		c_irq_params->irq_src = int_params.irq_source;
 | 
			
		||||
 | 
			
		||||
		amdgpu_dm_irq_register_interrupt(adev, &int_params,
 | 
			
		||||
				dm_dmub_trace_high_irq, c_irq_params);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* HPD */
 | 
			
		||||
	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DC_HPD1_INT,
 | 
			
		||||
			&adev->hpd_irq);
 | 
			
		||||
| 
						 | 
				
			
			@ -3185,6 +3207,41 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
 | 
			
		|||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
/* Register Outbox IRQ sources and initialize IRQ callbacks */
 | 
			
		||||
static int register_outbox_irq_handlers(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	struct dc *dc = adev->dm.dc;
 | 
			
		||||
	struct common_irq_params *c_irq_params;
 | 
			
		||||
	struct dc_interrupt_params int_params = {0};
 | 
			
		||||
	int r, i;
 | 
			
		||||
 | 
			
		||||
	int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
 | 
			
		||||
	int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
 | 
			
		||||
 | 
			
		||||
	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT,
 | 
			
		||||
			&adev->dmub_outbox_irq);
 | 
			
		||||
	if (r) {
 | 
			
		||||
		DRM_ERROR("Failed to add outbox irq id!\n");
 | 
			
		||||
		return r;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (dc->ctx->dmub_srv) {
 | 
			
		||||
		i = DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT;
 | 
			
		||||
		int_params.int_context = INTERRUPT_LOW_IRQ_CONTEXT;
 | 
			
		||||
		int_params.irq_source =
 | 
			
		||||
		dc_interrupt_to_irq_source(dc, i, 0);
 | 
			
		||||
 | 
			
		||||
		c_irq_params = &adev->dm.dmub_outbox_params[0];
 | 
			
		||||
 | 
			
		||||
		c_irq_params->adev = adev;
 | 
			
		||||
		c_irq_params->irq_src = int_params.irq_source;
 | 
			
		||||
 | 
			
		||||
		amdgpu_dm_irq_register_interrupt(adev, &int_params,
 | 
			
		||||
				dm_dmub_outbox1_low_irq, c_irq_params);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -3414,22 +3471,37 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
 | 
			
		|||
{
 | 
			
		||||
	struct amdgpu_display_manager *dm = bl_get_data(bd);
 | 
			
		||||
	struct amdgpu_dm_backlight_caps caps;
 | 
			
		||||
	struct dc_link *link = NULL;
 | 
			
		||||
	struct dc_link *link[AMDGPU_DM_MAX_NUM_EDP];
 | 
			
		||||
	u32 brightness;
 | 
			
		||||
	bool rc;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	amdgpu_dm_update_backlight_caps(dm);
 | 
			
		||||
	caps = dm->backlight_caps;
 | 
			
		||||
 | 
			
		||||
	link = (struct dc_link *)dm->backlight_link;
 | 
			
		||||
	for (i = 0; i < dm->num_of_edps; i++)
 | 
			
		||||
		link[i] = (struct dc_link *)dm->backlight_link[i];
 | 
			
		||||
 | 
			
		||||
	brightness = convert_brightness_from_user(&caps, bd->props.brightness);
 | 
			
		||||
	// Change brightness based on AUX property
 | 
			
		||||
	if (caps.aux_support)
 | 
			
		||||
		rc = dc_link_set_backlight_level_nits(link, true, brightness,
 | 
			
		||||
						      AUX_BL_DEFAULT_TRANSITION_TIME_MS);
 | 
			
		||||
	else
 | 
			
		||||
		rc = dc_link_set_backlight_level(dm->backlight_link, brightness, 0);
 | 
			
		||||
	if (caps.aux_support) {
 | 
			
		||||
		for (i = 0; i < dm->num_of_edps; i++) {
 | 
			
		||||
			rc = dc_link_set_backlight_level_nits(link[i], true, brightness,
 | 
			
		||||
				AUX_BL_DEFAULT_TRANSITION_TIME_MS);
 | 
			
		||||
			if (!rc) {
 | 
			
		||||
				DRM_ERROR("DM: Failed to update backlight via AUX on eDP[%d]\n", i);
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		for (i = 0; i < dm->num_of_edps; i++) {
 | 
			
		||||
			rc = dc_link_set_backlight_level(dm->backlight_link[i], brightness, 0);
 | 
			
		||||
			if (!rc) {
 | 
			
		||||
				DRM_ERROR("DM: Failed to update backlight on eDP[%d]\n", i);
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return rc ? 0 : 1;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -3443,7 +3515,7 @@ static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd)
 | 
			
		|||
	caps = dm->backlight_caps;
 | 
			
		||||
 | 
			
		||||
	if (caps.aux_support) {
 | 
			
		||||
		struct dc_link *link = (struct dc_link *)dm->backlight_link;
 | 
			
		||||
		struct dc_link *link = (struct dc_link *)dm->backlight_link[0];
 | 
			
		||||
		u32 avg, peak;
 | 
			
		||||
		bool rc;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -3452,7 +3524,7 @@ static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd)
 | 
			
		|||
			return bd->props.brightness;
 | 
			
		||||
		return convert_brightness_to_user(&caps, avg);
 | 
			
		||||
	} else {
 | 
			
		||||
		int ret = dc_link_get_backlight_level(dm->backlight_link);
 | 
			
		||||
		int ret = dc_link_get_backlight_level(dm->backlight_link[0]);
 | 
			
		||||
 | 
			
		||||
		if (ret == DC_ERROR_UNEXPECTED)
 | 
			
		||||
			return bd->props.brightness;
 | 
			
		||||
| 
						 | 
				
			
			@ -3549,10 +3621,13 @@ static void register_backlight_device(struct amdgpu_display_manager *dm,
 | 
			
		|||
		 * DM initialization because not having a backlight control
 | 
			
		||||
		 * is better then a black screen.
 | 
			
		||||
		 */
 | 
			
		||||
		amdgpu_dm_register_backlight_device(dm);
 | 
			
		||||
		if (!dm->backlight_dev)
 | 
			
		||||
			amdgpu_dm_register_backlight_device(dm);
 | 
			
		||||
 | 
			
		||||
		if (dm->backlight_dev)
 | 
			
		||||
			dm->backlight_link = link;
 | 
			
		||||
		if (dm->backlight_dev) {
 | 
			
		||||
			dm->backlight_link[dm->num_of_edps] = link;
 | 
			
		||||
			dm->num_of_edps++;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -3643,6 +3718,22 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 | 
			
		|||
			goto fail;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
#if defined(CONFIG_DRM_AMD_DC_DCN)
 | 
			
		||||
	/* Use Outbox interrupt */
 | 
			
		||||
	switch (adev->asic_type) {
 | 
			
		||||
	case CHIP_SIENNA_CICHLID:
 | 
			
		||||
	case CHIP_NAVY_FLOUNDER:
 | 
			
		||||
	case CHIP_RENOIR:
 | 
			
		||||
		if (register_outbox_irq_handlers(dm->adev)) {
 | 
			
		||||
			DRM_ERROR("DM: Failed to initialize IRQ\n");
 | 
			
		||||
			goto fail;
 | 
			
		||||
		}
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		DRM_DEBUG_KMS("Unsupported ASIC type for outbox: 0x%X\n", adev->asic_type);
 | 
			
		||||
	}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	/* loops over all connectors on the board */
 | 
			
		||||
	for (i = 0; i < link_cnt; i++) {
 | 
			
		||||
		struct dc_link *link = NULL;
 | 
			
		||||
| 
						 | 
				
			
			@ -6560,13 +6651,13 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
 | 
			
		|||
{
 | 
			
		||||
	struct dc_stream_state *stream = NULL;
 | 
			
		||||
	struct drm_connector *connector;
 | 
			
		||||
	struct drm_connector_state *new_con_state, *old_con_state;
 | 
			
		||||
	struct drm_connector_state *new_con_state;
 | 
			
		||||
	struct amdgpu_dm_connector *aconnector;
 | 
			
		||||
	struct dm_connector_state *dm_conn_state;
 | 
			
		||||
	int i, j, clock, bpp;
 | 
			
		||||
	int vcpi, pbn_div, pbn = 0;
 | 
			
		||||
 | 
			
		||||
	for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
 | 
			
		||||
	for_each_new_connector_in_state(state, connector, new_con_state, i) {
 | 
			
		||||
 | 
			
		||||
		aconnector = to_amdgpu_dm_connector(connector);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -8164,15 +8255,14 @@ static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state,
 | 
			
		|||
static void amdgpu_dm_commit_cursors(struct drm_atomic_state *state)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_plane *plane;
 | 
			
		||||
	struct drm_plane_state *old_plane_state, *new_plane_state;
 | 
			
		||||
	struct drm_plane_state *old_plane_state;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * TODO: Make this per-stream so we don't issue redundant updates for
 | 
			
		||||
	 * commits with multiple streams.
 | 
			
		||||
	 */
 | 
			
		||||
	for_each_oldnew_plane_in_state(state, plane, old_plane_state,
 | 
			
		||||
				       new_plane_state, i)
 | 
			
		||||
	for_each_old_plane_in_state(state, plane, old_plane_state, i)
 | 
			
		||||
		if (plane->type == DRM_PLANE_TYPE_CURSOR)
 | 
			
		||||
			handle_cursor_update(plane, old_plane_state);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -10668,3 +10758,30 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
 | 
			
		|||
 | 
			
		||||
	return value;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int linkIndex,
 | 
			
		||||
				struct aux_payload *payload, enum aux_return_code_type *operation_result)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = ctx->driver_context;
 | 
			
		||||
	int ret = 0;
 | 
			
		||||
 | 
			
		||||
	dc_process_dmub_aux_transfer_async(ctx->dc, linkIndex, payload);
 | 
			
		||||
	ret = wait_for_completion_interruptible_timeout(&adev->dm.dmub_aux_transfer_done, 10*HZ);
 | 
			
		||||
	if (ret == 0) {
 | 
			
		||||
		*operation_result = AUX_RET_ERROR_TIMEOUT;
 | 
			
		||||
		return -1;
 | 
			
		||||
	}
 | 
			
		||||
	*operation_result = (enum aux_return_code_type)adev->dm.dmub_notify->result;
 | 
			
		||||
 | 
			
		||||
	if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) {
 | 
			
		||||
		(*payload->reply) = adev->dm.dmub_notify->aux_reply.command;
 | 
			
		||||
 | 
			
		||||
		// For read case, Copy data to payload
 | 
			
		||||
		if (!payload->write && adev->dm.dmub_notify->aux_reply.length &&
 | 
			
		||||
		(*payload->reply == AUX_TRANSACTION_REPLY_AUX_ACK))
 | 
			
		||||
			memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data,
 | 
			
		||||
			adev->dm.dmub_notify->aux_reply.length);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return adev->dm.dmub_notify->aux_reply.length;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -46,6 +46,7 @@
 | 
			
		|||
 | 
			
		||||
#define AMDGPU_DM_MAX_CRTC 6
 | 
			
		||||
 | 
			
		||||
#define AMDGPU_DM_MAX_NUM_EDP 2
 | 
			
		||||
/*
 | 
			
		||||
#include "include/amdgpu_dal_power_if.h"
 | 
			
		||||
#include "amdgpu_dm_irq.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -54,6 +55,8 @@
 | 
			
		|||
#include "irq_types.h"
 | 
			
		||||
#include "signal_types.h"
 | 
			
		||||
#include "amdgpu_dm_crc.h"
 | 
			
		||||
struct aux_payload;
 | 
			
		||||
enum aux_return_code_type;
 | 
			
		||||
 | 
			
		||||
/* Forward declarations */
 | 
			
		||||
struct amdgpu_device;
 | 
			
		||||
| 
						 | 
				
			
			@ -62,6 +65,7 @@ struct dc;
 | 
			
		|||
struct amdgpu_bo;
 | 
			
		||||
struct dmub_srv;
 | 
			
		||||
struct dc_plane_state;
 | 
			
		||||
struct dmub_notification;
 | 
			
		||||
 | 
			
		||||
struct common_irq_params {
 | 
			
		||||
	struct amdgpu_device *adev;
 | 
			
		||||
| 
						 | 
				
			
			@ -135,6 +139,10 @@ struct amdgpu_dm_backlight_caps {
 | 
			
		|||
 | 
			
		||||
/**
 | 
			
		||||
 * struct dal_allocation - Tracks mapped FB memory for SMU communication
 | 
			
		||||
 * @list: list of dal allocations
 | 
			
		||||
 * @bo: GPU buffer object
 | 
			
		||||
 * @cpu_ptr: CPU virtual address of the GPU buffer object
 | 
			
		||||
 * @gpu_addr: GPU virtual address of the GPU buffer object
 | 
			
		||||
 */
 | 
			
		||||
struct dal_allocation {
 | 
			
		||||
	struct list_head list;
 | 
			
		||||
| 
						 | 
				
			
			@ -164,6 +172,7 @@ struct dal_allocation {
 | 
			
		|||
 * @compressor: Frame buffer compression buffer. See &struct dm_compressor_info
 | 
			
		||||
 * @force_timing_sync: set via debugfs. When set, indicates that all connected
 | 
			
		||||
 *		       displays will be forced to synchronize.
 | 
			
		||||
 * @dmcub_trace_event_en: enable dmcub trace events
 | 
			
		||||
 */
 | 
			
		||||
struct amdgpu_display_manager {
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -178,6 +187,8 @@ struct amdgpu_display_manager {
 | 
			
		|||
	 */
 | 
			
		||||
	struct dmub_srv *dmub_srv;
 | 
			
		||||
 | 
			
		||||
	struct dmub_notification *dmub_notify;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * @dmub_fb_info:
 | 
			
		||||
	 *
 | 
			
		||||
| 
						 | 
				
			
			@ -349,11 +360,17 @@ struct amdgpu_display_manager {
 | 
			
		|||
	struct common_irq_params
 | 
			
		||||
	dmub_trace_params[1];
 | 
			
		||||
 | 
			
		||||
	struct common_irq_params
 | 
			
		||||
	dmub_outbox_params[1];
 | 
			
		||||
 | 
			
		||||
	spinlock_t irq_handler_list_table_lock;
 | 
			
		||||
 | 
			
		||||
	struct backlight_device *backlight_dev;
 | 
			
		||||
 | 
			
		||||
	const struct dc_link *backlight_link;
 | 
			
		||||
	const struct dc_link *backlight_link[AMDGPU_DM_MAX_NUM_EDP];
 | 
			
		||||
 | 
			
		||||
	uint8_t num_of_edps;
 | 
			
		||||
 | 
			
		||||
	struct amdgpu_dm_backlight_caps backlight_caps;
 | 
			
		||||
 | 
			
		||||
	struct mod_freesync *freesync_module;
 | 
			
		||||
| 
						 | 
				
			
			@ -418,6 +435,7 @@ struct amdgpu_display_manager {
 | 
			
		|||
	 * DAL fb memory allocation list, for communication with SMU.
 | 
			
		||||
	 */
 | 
			
		||||
	struct list_head da_list;
 | 
			
		||||
	struct completion dmub_aux_transfer_done;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum dsc_clock_force_state {
 | 
			
		||||
| 
						 | 
				
			
			@ -600,4 +618,6 @@ void amdgpu_dm_update_connector_after_detect(
 | 
			
		|||
 | 
			
		||||
extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs;
 | 
			
		||||
 | 
			
		||||
int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int linkIndex,
 | 
			
		||||
					struct aux_payload *payload, enum aux_return_code_type *operation_result);
 | 
			
		||||
#endif /* __AMDGPU_DM_H__ */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -925,6 +925,22 @@ static int hdcp_sink_capability_show(struct seq_file *m, void *data)
 | 
			
		|||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Returns whether the connected display is internal and not hotpluggable.
 | 
			
		||||
 * Example usage: cat /sys/kernel/debug/dri/0/DP-1/internal_display
 | 
			
		||||
 */
 | 
			
		||||
static int internal_display_show(struct seq_file *m, void *data)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_connector *connector = m->private;
 | 
			
		||||
	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
 | 
			
		||||
	struct dc_link *link = aconnector->dc_link;
 | 
			
		||||
 | 
			
		||||
	seq_printf(m, "Internal: %u\n", link->is_internal_display);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* function description
 | 
			
		||||
 *
 | 
			
		||||
 * generic SDP message access for testing
 | 
			
		||||
| 
						 | 
				
			
			@ -2361,6 +2377,44 @@ static ssize_t dp_max_bpc_write(struct file *f, const char __user *buf,
 | 
			
		|||
	return size;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Backlight at this moment.  Read only.
 | 
			
		||||
 * As written to display, taking ABM and backlight lut into account.
 | 
			
		||||
 * Ranges from 0x0 to 0x10000 (= 100% PWM)
 | 
			
		||||
 *
 | 
			
		||||
 * Example usage: cat /sys/kernel/debug/dri/0/eDP-1/current_backlight
 | 
			
		||||
 */
 | 
			
		||||
static int current_backlight_show(struct seq_file *m, void *unused)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(m->private);
 | 
			
		||||
	struct dc_link *link = aconnector->dc_link;
 | 
			
		||||
	unsigned int backlight;
 | 
			
		||||
 | 
			
		||||
	backlight = dc_link_get_backlight_level(link);
 | 
			
		||||
	seq_printf(m, "0x%x\n", backlight);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Backlight value that is being approached.  Read only.
 | 
			
		||||
 * As written to display, taking ABM and backlight lut into account.
 | 
			
		||||
 * Ranges from 0x0 to 0x10000 (= 100% PWM)
 | 
			
		||||
 *
 | 
			
		||||
 * Example usage: cat /sys/kernel/debug/dri/0/eDP-1/target_backlight
 | 
			
		||||
 */
 | 
			
		||||
static int target_backlight_show(struct seq_file *m, void *unused)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(m->private);
 | 
			
		||||
	struct dc_link *link = aconnector->dc_link;
 | 
			
		||||
	unsigned int backlight;
 | 
			
		||||
 | 
			
		||||
	backlight = dc_link_get_target_backlight_pwm(link);
 | 
			
		||||
	seq_printf(m, "0x%x\n", backlight);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support);
 | 
			
		||||
DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
 | 
			
		||||
DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer);
 | 
			
		||||
| 
						 | 
				
			
			@ -2369,6 +2423,7 @@ DEFINE_SHOW_ATTRIBUTE(dp_lttpr_status);
 | 
			
		|||
#ifdef CONFIG_DRM_AMD_DC_HDCP
 | 
			
		||||
DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability);
 | 
			
		||||
#endif
 | 
			
		||||
DEFINE_SHOW_ATTRIBUTE(internal_display);
 | 
			
		||||
 | 
			
		||||
static const struct file_operations dp_dsc_clock_en_debugfs_fops = {
 | 
			
		||||
	.owner = THIS_MODULE,
 | 
			
		||||
| 
						 | 
				
			
			@ -2594,13 +2649,17 @@ DEFINE_DEBUGFS_ATTRIBUTE(dmcub_trace_event_state_fops, dmcub_trace_event_state_g
 | 
			
		|||
 | 
			
		||||
DEFINE_DEBUGFS_ATTRIBUTE(psr_fops, psr_get, NULL, "%llu\n");
 | 
			
		||||
 | 
			
		||||
DEFINE_SHOW_ATTRIBUTE(current_backlight);
 | 
			
		||||
DEFINE_SHOW_ATTRIBUTE(target_backlight);
 | 
			
		||||
 | 
			
		||||
static const struct {
 | 
			
		||||
	char *name;
 | 
			
		||||
	const struct file_operations *fops;
 | 
			
		||||
} connector_debugfs_entries[] = {
 | 
			
		||||
		{"force_yuv420_output", &force_yuv420_output_fops},
 | 
			
		||||
		{"output_bpc", &output_bpc_fops},
 | 
			
		||||
		{"trigger_hotplug", &trigger_hotplug_debugfs_fops}
 | 
			
		||||
		{"trigger_hotplug", &trigger_hotplug_debugfs_fops},
 | 
			
		||||
		{"internal_display", &internal_display_fops}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void connector_debugfs_init(struct amdgpu_dm_connector *connector)
 | 
			
		||||
| 
						 | 
				
			
			@ -2616,8 +2675,13 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector)
 | 
			
		|||
					    dp_debugfs_entries[i].fops);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP)
 | 
			
		||||
	if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) {
 | 
			
		||||
		debugfs_create_file_unsafe("psr_state", 0444, dir, connector, &psr_fops);
 | 
			
		||||
		debugfs_create_file("amdgpu_current_backlight_pwm", 0444, dir, connector,
 | 
			
		||||
				    ¤t_backlight_fops);
 | 
			
		||||
		debugfs_create_file("amdgpu_target_backlight_pwm", 0444, dir, connector,
 | 
			
		||||
				    &target_backlight_fops);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < ARRAY_SIZE(connector_debugfs_entries); i++) {
 | 
			
		||||
		debugfs_create_file(connector_debugfs_entries[i].name,
 | 
			
		||||
| 
						 | 
				
			
			@ -2920,38 +2984,6 @@ static ssize_t dtn_log_write(
 | 
			
		|||
	return size;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Backlight at this moment.  Read only.
 | 
			
		||||
 * As written to display, taking ABM and backlight lut into account.
 | 
			
		||||
 * Ranges from 0x0 to 0x10000 (= 100% PWM)
 | 
			
		||||
 */
 | 
			
		||||
static int current_backlight_show(struct seq_file *m, void *unused)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
 | 
			
		||||
	struct amdgpu_display_manager *dm = &adev->dm;
 | 
			
		||||
 | 
			
		||||
	unsigned int backlight = dc_link_get_backlight_level(dm->backlight_link);
 | 
			
		||||
 | 
			
		||||
	seq_printf(m, "0x%x\n", backlight);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Backlight value that is being approached.  Read only.
 | 
			
		||||
 * As written to display, taking ABM and backlight lut into account.
 | 
			
		||||
 * Ranges from 0x0 to 0x10000 (= 100% PWM)
 | 
			
		||||
 */
 | 
			
		||||
static int target_backlight_show(struct seq_file *m, void *unused)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
 | 
			
		||||
	struct amdgpu_display_manager *dm = &adev->dm;
 | 
			
		||||
 | 
			
		||||
	unsigned int backlight = dc_link_get_target_backlight_pwm(dm->backlight_link);
 | 
			
		||||
 | 
			
		||||
	seq_printf(m, "0x%x\n", backlight);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int mst_topo_show(struct seq_file *m, void *unused)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
 | 
			
		||||
| 
						 | 
				
			
			@ -3134,8 +3166,6 @@ static int visual_confirm_get(void *data, u64 *val)
 | 
			
		|||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
DEFINE_SHOW_ATTRIBUTE(current_backlight);
 | 
			
		||||
DEFINE_SHOW_ATTRIBUTE(target_backlight);
 | 
			
		||||
DEFINE_SHOW_ATTRIBUTE(mst_topo);
 | 
			
		||||
DEFINE_DEBUGFS_ATTRIBUTE(visual_confirm_fops, visual_confirm_get,
 | 
			
		||||
			 visual_confirm_set, "%llu\n");
 | 
			
		||||
| 
						 | 
				
			
			@ -3215,10 +3245,6 @@ void dtn_debugfs_init(struct amdgpu_device *adev)
 | 
			
		|||
	struct drm_minor *minor = adev_to_drm(adev)->primary;
 | 
			
		||||
	struct dentry *root = minor->debugfs_root;
 | 
			
		||||
 | 
			
		||||
	debugfs_create_file("amdgpu_current_backlight_pwm", 0444,
 | 
			
		||||
			    root, adev, ¤t_backlight_fops);
 | 
			
		||||
	debugfs_create_file("amdgpu_target_backlight_pwm", 0444,
 | 
			
		||||
			    root, adev, &target_backlight_fops);
 | 
			
		||||
	debugfs_create_file("amdgpu_mst_topology", 0444, root,
 | 
			
		||||
			    adev, &mst_topo_fops);
 | 
			
		||||
	debugfs_create_file("amdgpu_dm_dtn_log", 0644, root, adev,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -544,8 +544,10 @@ bool dm_helpers_dp_write_dsc_enable(
 | 
			
		|||
		ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT)
 | 
			
		||||
		return dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1);
 | 
			
		||||
	if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT) {
 | 
			
		||||
		ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1);
 | 
			
		||||
		DC_LOG_DC("Send DSC %s to sst display\n", enable_dsc ? "enable" : "disable");
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return (ret > 0);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -640,7 +642,14 @@ enum dc_edid_status dm_helpers_read_local_edid(
 | 
			
		|||
 | 
			
		||||
	return edid_status;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int dm_helper_dmub_aux_transfer_sync(
 | 
			
		||||
		struct dc_context *ctx,
 | 
			
		||||
		const struct dc_link *link,
 | 
			
		||||
		struct aux_payload *payload,
 | 
			
		||||
		enum aux_return_code_type *operation_result)
 | 
			
		||||
{
 | 
			
		||||
	return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload, operation_result);
 | 
			
		||||
}
 | 
			
		||||
void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)
 | 
			
		||||
{
 | 
			
		||||
	/* TODO: something */
 | 
			
		||||
| 
						 | 
				
			
			@ -698,12 +707,12 @@ void dm_helpers_free_gpu_mem(
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool dm_helpers_dmub_outbox0_interrupt_control(struct dc_context *ctx, bool enable)
 | 
			
		||||
bool dm_helpers_dmub_outbox_interrupt_control(struct dc_context *ctx, bool enable)
 | 
			
		||||
{
 | 
			
		||||
	enum dc_irq_source irq_source;
 | 
			
		||||
	bool ret;
 | 
			
		||||
 | 
			
		||||
	irq_source = DC_IRQ_SOURCE_DMCUB_OUTBOX0;
 | 
			
		||||
	irq_source = DC_IRQ_SOURCE_DMCUB_OUTBOX;
 | 
			
		||||
 | 
			
		||||
	ret = dc_interrupt_set(ctx->dc, irq_source, enable);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -769,6 +769,18 @@ static int amdgpu_dm_set_vline0_irq_state(struct amdgpu_device *adev,
 | 
			
		|||
		__func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int amdgpu_dm_set_dmub_outbox_irq_state(struct amdgpu_device *adev,
 | 
			
		||||
					struct amdgpu_irq_src *source,
 | 
			
		||||
					unsigned int crtc_id,
 | 
			
		||||
					enum amdgpu_interrupt_state state)
 | 
			
		||||
{
 | 
			
		||||
	enum dc_irq_source irq_source = DC_IRQ_SOURCE_DMCUB_OUTBOX;
 | 
			
		||||
	bool st = (state == AMDGPU_IRQ_STATE_ENABLE);
 | 
			
		||||
 | 
			
		||||
	dc_interrupt_set(adev->dm.dc, irq_source, st);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int amdgpu_dm_set_vupdate_irq_state(struct amdgpu_device *adev,
 | 
			
		||||
					   struct amdgpu_irq_src *source,
 | 
			
		||||
					   unsigned int crtc_id,
 | 
			
		||||
| 
						 | 
				
			
			@ -805,6 +817,11 @@ static const struct amdgpu_irq_src_funcs dm_vline0_irq_funcs = {
 | 
			
		|||
	.process = amdgpu_dm_irq_handler,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const struct amdgpu_irq_src_funcs dm_dmub_outbox_irq_funcs = {
 | 
			
		||||
	.set = amdgpu_dm_set_dmub_outbox_irq_state,
 | 
			
		||||
	.process = amdgpu_dm_irq_handler,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const struct amdgpu_irq_src_funcs dm_vupdate_irq_funcs = {
 | 
			
		||||
	.set = amdgpu_dm_set_vupdate_irq_state,
 | 
			
		||||
	.process = amdgpu_dm_irq_handler,
 | 
			
		||||
| 
						 | 
				
			
			@ -827,13 +844,15 @@ static const struct amdgpu_irq_src_funcs dm_hpd_irq_funcs = {
 | 
			
		|||
 | 
			
		||||
void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
	adev->crtc_irq.num_types = adev->mode_info.num_crtc;
 | 
			
		||||
	adev->crtc_irq.funcs = &dm_crtc_irq_funcs;
 | 
			
		||||
 | 
			
		||||
	adev->vline0_irq.num_types = adev->mode_info.num_crtc;
 | 
			
		||||
	adev->vline0_irq.funcs = &dm_vline0_irq_funcs;
 | 
			
		||||
 | 
			
		||||
	adev->dmub_outbox_irq.num_types = 1;
 | 
			
		||||
	adev->dmub_outbox_irq.funcs = &dm_dmub_outbox_irq_funcs;
 | 
			
		||||
 | 
			
		||||
	adev->vupdate_irq.num_types = adev->mode_info.num_crtc;
 | 
			
		||||
	adev->vupdate_irq.funcs = &dm_vupdate_irq_funcs;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -846,6 +865,12 @@ void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev)
 | 
			
		|||
	adev->hpd_irq.num_types = adev->mode_info.num_hpd;
 | 
			
		||||
	adev->hpd_irq.funcs = &dm_hpd_irq_funcs;
 | 
			
		||||
}
 | 
			
		||||
void amdgpu_dm_outbox_init(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	dc_interrupt_set(adev->dm.dc,
 | 
			
		||||
		DC_IRQ_SOURCE_DMCUB_OUTBOX,
 | 
			
		||||
		true);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * amdgpu_dm_hpd_init - hpd setup callback.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -82,6 +82,7 @@ void amdgpu_dm_irq_unregister_interrupt(struct amdgpu_device *adev,
 | 
			
		|||
 | 
			
		||||
void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev);
 | 
			
		||||
 | 
			
		||||
void amdgpu_dm_outbox_init(struct amdgpu_device *adev);
 | 
			
		||||
void amdgpu_dm_hpd_init(struct amdgpu_device *adev);
 | 
			
		||||
void amdgpu_dm_hpd_fini(struct amdgpu_device *adev);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -278,6 +278,9 @@ dm_dp_mst_detect(struct drm_connector *connector,
 | 
			
		|||
	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
 | 
			
		||||
	struct amdgpu_dm_connector *master = aconnector->mst_port;
 | 
			
		||||
 | 
			
		||||
	if (drm_connector_is_unregistered(connector))
 | 
			
		||||
		return connector_status_disconnected;
 | 
			
		||||
 | 
			
		||||
	return drm_dp_mst_detect_port(connector, ctx, &master->mst_mgr,
 | 
			
		||||
				      aconnector->port);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -54,7 +54,7 @@ AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LI
 | 
			
		|||
 | 
			
		||||
include $(AMD_DC)
 | 
			
		||||
 | 
			
		||||
DISPLAY_CORE = dc.o dc_link.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
 | 
			
		||||
DISPLAY_CORE = dc.o  dc_stat.o dc_link.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
 | 
			
		||||
dc_surface.o dc_link_hwss.o dc_link_dp.o dc_link_ddc.o dc_debug.o dc_stream.o \
 | 
			
		||||
dc_link_enc_cfg.o
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -836,8 +836,10 @@ static enum bp_result bios_parser_get_spread_spectrum_info(
 | 
			
		|||
			return get_ss_info_v4_1(bp, signal, index, ss_info);
 | 
			
		||||
		case 2:
 | 
			
		||||
		case 3:
 | 
			
		||||
		case 4:
 | 
			
		||||
			return get_ss_info_v4_2(bp, signal, index, ss_info);
 | 
			
		||||
		default:
 | 
			
		||||
			ASSERT(0);
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
		break;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -106,10 +106,10 @@ static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
 | 
			
		|||
	for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
 | 
			
		||||
		int dpp_inst, dppclk_khz, prev_dppclk_khz;
 | 
			
		||||
 | 
			
		||||
		/* Loop index will match dpp->inst if resource exists,
 | 
			
		||||
		 * and we want to avoid dependency on dpp object
 | 
			
		||||
		/* Loop index may not match dpp->inst if some pipes disabled,
 | 
			
		||||
		 * so select correct inst from res_pool
 | 
			
		||||
		 */
 | 
			
		||||
		dpp_inst = i;
 | 
			
		||||
		dpp_inst = clk_mgr->base.ctx->dc->res_pool->dpps[i]->inst;
 | 
			
		||||
		dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
 | 
			
		||||
 | 
			
		||||
		prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i];
 | 
			
		||||
| 
						 | 
				
			
			@ -128,7 +128,7 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
 | 
			
		|||
	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
 | 
			
		||||
	struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
 | 
			
		||||
	struct dc *dc = clk_mgr_base->ctx->dc;
 | 
			
		||||
	int display_count, i;
 | 
			
		||||
	int display_count;
 | 
			
		||||
	bool update_dppclk = false;
 | 
			
		||||
	bool update_dispclk = false;
 | 
			
		||||
	bool dpp_clock_lowered = false;
 | 
			
		||||
| 
						 | 
				
			
			@ -210,14 +210,6 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
 | 
			
		|||
				clk_mgr_base->clks.dppclk_khz,
 | 
			
		||||
				safe_to_lower);
 | 
			
		||||
 | 
			
		||||
		for (i = 0; i < context->stream_count; i++) {
 | 
			
		||||
			if (context->streams[i]->signal == SIGNAL_TYPE_EDP &&
 | 
			
		||||
				context->streams[i]->apply_seamless_boot_optimization) {
 | 
			
		||||
				dc_wait_for_vblank(dc, context->streams[i]);
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		clk_mgr_base->clks.actual_dppclk_khz =
 | 
			
		||||
				rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -769,43 +761,6 @@ static struct wm_table ddr4_wm_table_rn = {
 | 
			
		|||
	}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct wm_table ddr4_1R_wm_table_rn = {
 | 
			
		||||
	.entries = {
 | 
			
		||||
		{
 | 
			
		||||
			.wm_inst = WM_A,
 | 
			
		||||
			.wm_type = WM_TYPE_PSTATE_CHG,
 | 
			
		||||
			.pstate_latency_us = 11.72,
 | 
			
		||||
			.sr_exit_time_us = 13.90,
 | 
			
		||||
			.sr_enter_plus_exit_time_us = 14.80,
 | 
			
		||||
			.valid = true,
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			.wm_inst = WM_B,
 | 
			
		||||
			.wm_type = WM_TYPE_PSTATE_CHG,
 | 
			
		||||
			.pstate_latency_us = 11.72,
 | 
			
		||||
			.sr_exit_time_us = 13.90,
 | 
			
		||||
			.sr_enter_plus_exit_time_us = 14.80,
 | 
			
		||||
			.valid = true,
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			.wm_inst = WM_C,
 | 
			
		||||
			.wm_type = WM_TYPE_PSTATE_CHG,
 | 
			
		||||
			.pstate_latency_us = 11.72,
 | 
			
		||||
			.sr_exit_time_us = 13.90,
 | 
			
		||||
			.sr_enter_plus_exit_time_us = 14.80,
 | 
			
		||||
			.valid = true,
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			.wm_inst = WM_D,
 | 
			
		||||
			.wm_type = WM_TYPE_PSTATE_CHG,
 | 
			
		||||
			.pstate_latency_us = 11.72,
 | 
			
		||||
			.sr_exit_time_us = 13.90,
 | 
			
		||||
			.sr_enter_plus_exit_time_us = 14.80,
 | 
			
		||||
			.valid = true,
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct wm_table lpddr4_wm_table_rn = {
 | 
			
		||||
	.entries = {
 | 
			
		||||
		{
 | 
			
		||||
| 
						 | 
				
			
			@ -842,46 +797,67 @@ static struct wm_table lpddr4_wm_table_rn = {
 | 
			
		|||
		},
 | 
			
		||||
	}
 | 
			
		||||
};
 | 
			
		||||
static unsigned int find_socclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage)
 | 
			
		||||
 | 
			
		||||
static unsigned int find_max_fclk_for_voltage(struct dpm_clocks *clock_table,
 | 
			
		||||
		unsigned int voltage)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
	uint32_t max_clk = 0;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < PP_SMU_NUM_FCLK_DPM_LEVELS; i++) {
 | 
			
		||||
		if (clock_table->FClocks[i].Vol <= voltage) {
 | 
			
		||||
			max_clk = clock_table->FClocks[i].Freq > max_clk ?
 | 
			
		||||
				clock_table->FClocks[i].Freq : max_clk;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return max_clk;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static unsigned int find_max_memclk_for_voltage(struct dpm_clocks *clock_table,
 | 
			
		||||
		unsigned int voltage)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
	uint32_t max_clk = 0;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < PP_SMU_NUM_MEMCLK_DPM_LEVELS; i++) {
 | 
			
		||||
		if (clock_table->MemClocks[i].Vol <= voltage) {
 | 
			
		||||
			max_clk = clock_table->MemClocks[i].Freq > max_clk ?
 | 
			
		||||
				clock_table->MemClocks[i].Freq : max_clk;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return max_clk;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static unsigned int find_max_socclk_for_voltage(struct dpm_clocks *clock_table,
 | 
			
		||||
		unsigned int voltage)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
	uint32_t max_clk = 0;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < PP_SMU_NUM_SOCCLK_DPM_LEVELS; i++) {
 | 
			
		||||
		if (clock_table->SocClocks[i].Vol == voltage)
 | 
			
		||||
			return clock_table->SocClocks[i].Freq;
 | 
			
		||||
		if (clock_table->SocClocks[i].Vol <= voltage) {
 | 
			
		||||
			max_clk = clock_table->SocClocks[i].Freq > max_clk ?
 | 
			
		||||
				clock_table->SocClocks[i].Freq : max_clk;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ASSERT(0);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < PP_SMU_NUM_DCFCLK_DPM_LEVELS; i++) {
 | 
			
		||||
		if (clock_table->DcfClocks[i].Vol == voltage)
 | 
			
		||||
			return clock_table->DcfClocks[i].Freq;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ASSERT(0);
 | 
			
		||||
	return 0;
 | 
			
		||||
	return max_clk;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params, struct dpm_clocks *clock_table, struct integrated_info *bios_info)
 | 
			
		||||
{
 | 
			
		||||
	int i, j = 0;
 | 
			
		||||
	unsigned int volt;
 | 
			
		||||
 | 
			
		||||
	j = -1;
 | 
			
		||||
 | 
			
		||||
	ASSERT(PP_SMU_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL);
 | 
			
		||||
 | 
			
		||||
	/* Find lowest DPM, FCLK is filled in reverse order*/
 | 
			
		||||
 | 
			
		||||
	for (i = PP_SMU_NUM_FCLK_DPM_LEVELS - 1; i >= 0; i--) {
 | 
			
		||||
		if (clock_table->FClocks[i].Freq != 0 && clock_table->FClocks[i].Vol != 0) {
 | 
			
		||||
	/* Find max DPM */
 | 
			
		||||
	for (i = 0; i < PP_SMU_NUM_DCFCLK_DPM_LEVELS; ++i) {
 | 
			
		||||
		if (clock_table->DcfClocks[i].Freq != 0 &&
 | 
			
		||||
				clock_table->DcfClocks[i].Vol != 0)
 | 
			
		||||
			j = i;
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (j == -1) {
 | 
			
		||||
| 
						 | 
				
			
			@ -892,13 +868,18 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params
 | 
			
		|||
 | 
			
		||||
	bw_params->clk_table.num_entries = j + 1;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) {
 | 
			
		||||
		bw_params->clk_table.entries[i].fclk_mhz = clock_table->FClocks[j].Freq;
 | 
			
		||||
		bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemClocks[j].Freq;
 | 
			
		||||
		bw_params->clk_table.entries[i].voltage = clock_table->FClocks[j].Vol;
 | 
			
		||||
		bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->FClocks[j].Vol);
 | 
			
		||||
		bw_params->clk_table.entries[i].socclk_mhz = find_socclk_for_voltage(clock_table,
 | 
			
		||||
									bw_params->clk_table.entries[i].voltage);
 | 
			
		||||
	for (i = 0; i < bw_params->clk_table.num_entries; i++) {
 | 
			
		||||
		volt = clock_table->DcfClocks[i].Vol;
 | 
			
		||||
 | 
			
		||||
		bw_params->clk_table.entries[i].voltage = volt;
 | 
			
		||||
		bw_params->clk_table.entries[i].dcfclk_mhz =
 | 
			
		||||
			clock_table->DcfClocks[i].Freq;
 | 
			
		||||
		bw_params->clk_table.entries[i].fclk_mhz =
 | 
			
		||||
			find_max_fclk_for_voltage(clock_table, volt);
 | 
			
		||||
		bw_params->clk_table.entries[i].memclk_mhz =
 | 
			
		||||
			find_max_memclk_for_voltage(clock_table, volt);
 | 
			
		||||
		bw_params->clk_table.entries[i].socclk_mhz =
 | 
			
		||||
			find_max_socclk_for_voltage(clock_table, volt);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	bw_params->vram_type = bios_info->memory_type;
 | 
			
		||||
| 
						 | 
				
			
			@ -990,12 +971,8 @@ void rn_clk_mgr_construct(
 | 
			
		|||
		} else {
 | 
			
		||||
			if (is_green_sardine)
 | 
			
		||||
				rn_bw_params.wm_table = ddr4_wm_table_gs;
 | 
			
		||||
			else {
 | 
			
		||||
				if (ctx->dc->config.is_single_rank_dimm)
 | 
			
		||||
					rn_bw_params.wm_table = ddr4_1R_wm_table_rn;
 | 
			
		||||
				else
 | 
			
		||||
					rn_bw_params.wm_table = ddr4_wm_table_rn;
 | 
			
		||||
			}
 | 
			
		||||
			else
 | 
			
		||||
				rn_bw_params.wm_table = ddr4_wm_table_rn;
 | 
			
		||||
		}
 | 
			
		||||
		/* Saved clocks configured at boot for debug purposes */
 | 
			
		||||
		rn_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info);
 | 
			
		||||
| 
						 | 
				
			
			@ -1013,9 +990,6 @@ void rn_clk_mgr_construct(
 | 
			
		|||
		if (status == PP_SMU_RESULT_OK &&
 | 
			
		||||
		    ctx->dc_bios && ctx->dc_bios->integrated_info) {
 | 
			
		||||
			rn_clk_mgr_helper_populate_bw_params (clk_mgr->base.bw_params, &clock_table, ctx->dc_bios->integrated_info);
 | 
			
		||||
			/* treat memory config as single channel if memory is asymmetrics. */
 | 
			
		||||
			if (ctx->dc->config.is_asymmetric_memory)
 | 
			
		||||
				clk_mgr->base.bw_params->num_channels = 1;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -59,7 +59,6 @@
 | 
			
		|||
#include "dc_link_ddc.h"
 | 
			
		||||
#include "dm_helpers.h"
 | 
			
		||||
#include "mem_input.h"
 | 
			
		||||
#include "hubp.h"
 | 
			
		||||
 | 
			
		||||
#include "dc_link_dp.h"
 | 
			
		||||
#include "dc_dmub_srv.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -3219,19 +3218,6 @@ void dc_link_remove_remote_sink(struct dc_link *link, struct dc_sink *sink)
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void dc_wait_for_vblank(struct dc *dc, struct dc_stream_state *stream)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < dc->res_pool->pipe_count; i++)
 | 
			
		||||
		if (dc->current_state->res_ctx.pipe_ctx[i].stream == stream) {
 | 
			
		||||
			struct timing_generator *tg =
 | 
			
		||||
				dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg;
 | 
			
		||||
			tg->funcs->wait_for_state(tg, CRTC_STATE_VBLANK);
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info)
 | 
			
		||||
{
 | 
			
		||||
	info->displayClock				= (unsigned int)state->bw_ctx.bw.dcn.clk.dispclk_khz;
 | 
			
		||||
| 
						 | 
				
			
			@ -3287,7 +3273,7 @@ void dc_allow_idle_optimizations(struct dc *dc, bool allow)
 | 
			
		|||
	if (dc->debug.disable_idle_power_optimizations)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (dc->clk_mgr->funcs->is_smu_present)
 | 
			
		||||
	if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->is_smu_present)
 | 
			
		||||
		if (!dc->clk_mgr->funcs->is_smu_present(dc->clk_mgr))
 | 
			
		||||
			return;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -48,6 +48,7 @@
 | 
			
		|||
#include "dce/dmub_psr.h"
 | 
			
		||||
#include "dmub/dmub_srv.h"
 | 
			
		||||
#include "inc/hw/panel_cntl.h"
 | 
			
		||||
#include "inc/link_enc_cfg.h"
 | 
			
		||||
 | 
			
		||||
#define DC_LOGGER_INIT(logger)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -247,6 +248,16 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type)
 | 
			
		|||
		link->dc->hwss.edp_wait_for_hpd_ready(link, true);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Link may not have physical HPD pin. */
 | 
			
		||||
	if (link->ep_type != DISPLAY_ENDPOINT_PHY) {
 | 
			
		||||
		if (link->hpd_status)
 | 
			
		||||
			*type = dc_connection_single;
 | 
			
		||||
		else
 | 
			
		||||
			*type = dc_connection_none;
 | 
			
		||||
 | 
			
		||||
		return true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* todo: may need to lock gpio access */
 | 
			
		||||
	hpd_pin = get_hpd_gpio(link->ctx->dc_bios, link->link_id,
 | 
			
		||||
			       link->ctx->gpio_service);
 | 
			
		||||
| 
						 | 
				
			
			@ -432,8 +443,18 @@ bool dc_link_is_dp_sink_present(struct dc_link *link)
 | 
			
		|||
static enum signal_type link_detect_sink(struct dc_link *link,
 | 
			
		||||
					 enum dc_detect_reason reason)
 | 
			
		||||
{
 | 
			
		||||
	enum signal_type result = get_basic_signal_type(link->link_enc->id,
 | 
			
		||||
							link->link_id);
 | 
			
		||||
	enum signal_type result;
 | 
			
		||||
	struct graphics_object_id enc_id;
 | 
			
		||||
 | 
			
		||||
	if (link->is_dig_mapping_flexible)
 | 
			
		||||
		enc_id = (struct graphics_object_id){.id = ENCODER_ID_UNKNOWN};
 | 
			
		||||
	else
 | 
			
		||||
		enc_id = link->link_enc->id;
 | 
			
		||||
	result = get_basic_signal_type(enc_id, link->link_id);
 | 
			
		||||
 | 
			
		||||
	/* Use basic signal type for link without physical connector. */
 | 
			
		||||
	if (link->ep_type != DISPLAY_ENDPOINT_PHY)
 | 
			
		||||
		return result;
 | 
			
		||||
 | 
			
		||||
	/* Internal digital encoder will detect only dongles
 | 
			
		||||
	 * that require digital signal
 | 
			
		||||
| 
						 | 
				
			
			@ -762,19 +783,20 @@ static bool detect_dp(struct dc_link *link,
 | 
			
		|||
		}
 | 
			
		||||
 | 
			
		||||
		if (link->type != dc_connection_mst_branch &&
 | 
			
		||||
		    is_dp_active_dongle(link)) {
 | 
			
		||||
			/* DP active dongles */
 | 
			
		||||
			link->type = dc_connection_active_dongle;
 | 
			
		||||
		    is_dp_branch_device(link)) {
 | 
			
		||||
			/* DP SST branch */
 | 
			
		||||
			link->type = dc_connection_sst_branch;
 | 
			
		||||
			if (!link->dpcd_caps.sink_count.bits.SINK_COUNT) {
 | 
			
		||||
				/*
 | 
			
		||||
				 * active dongle unplug processing for short irq
 | 
			
		||||
				 * SST branch unplug processing for short irq
 | 
			
		||||
				 */
 | 
			
		||||
				link_disconnect_sink(link);
 | 
			
		||||
				return true;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if (link->dpcd_caps.dongle_type !=
 | 
			
		||||
			    DISPLAY_DONGLE_DP_HDMI_CONVERTER)
 | 
			
		||||
			if (is_dp_active_dongle(link) &&
 | 
			
		||||
				(link->dpcd_caps.dongle_type !=
 | 
			
		||||
					DISPLAY_DONGLE_DP_HDMI_CONVERTER))
 | 
			
		||||
				*converter_disable_audio = true;
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
| 
						 | 
				
			
			@ -954,7 +976,8 @@ static bool dc_link_detect_helper(struct dc_link *link,
 | 
			
		|||
 | 
			
		||||
		case SIGNAL_TYPE_DISPLAY_PORT: {
 | 
			
		||||
			/* wa HPD high coming too early*/
 | 
			
		||||
			if (link->link_enc->features.flags.bits.DP_IS_USB_C == 1) {
 | 
			
		||||
			if (link->ep_type == DISPLAY_ENDPOINT_PHY &&
 | 
			
		||||
			    link->link_enc->features.flags.bits.DP_IS_USB_C == 1) {
 | 
			
		||||
				/* if alt mode times out, return false */
 | 
			
		||||
				if (!wait_for_entering_dp_alt_mode(link))
 | 
			
		||||
					return false;
 | 
			
		||||
| 
						 | 
				
			
			@ -974,8 +997,8 @@ static bool dc_link_detect_helper(struct dc_link *link,
 | 
			
		|||
					   sizeof(struct dpcd_caps)))
 | 
			
		||||
					same_dpcd = false;
 | 
			
		||||
			}
 | 
			
		||||
			/* Active dongle downstream unplug*/
 | 
			
		||||
			if (link->type == dc_connection_active_dongle &&
 | 
			
		||||
			/* Active SST downstream branch device unplug*/
 | 
			
		||||
			if (link->type == dc_connection_sst_branch &&
 | 
			
		||||
			    link->dpcd_caps.sink_count.bits.SINK_COUNT == 0) {
 | 
			
		||||
				if (prev_sink)
 | 
			
		||||
					/* Downstream unplug */
 | 
			
		||||
| 
						 | 
				
			
			@ -1206,14 +1229,25 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 | 
			
		|||
{
 | 
			
		||||
	const struct dc *dc = link->dc;
 | 
			
		||||
	bool ret;
 | 
			
		||||
	bool can_apply_seamless_boot = false;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < dc->current_state->stream_count; i++) {
 | 
			
		||||
		if (dc->current_state->streams[i]->apply_seamless_boot_optimization) {
 | 
			
		||||
			can_apply_seamless_boot = true;
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* get out of low power state */
 | 
			
		||||
	clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
 | 
			
		||||
	if (!can_apply_seamless_boot && reason != DETECT_REASON_BOOT)
 | 
			
		||||
		clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
 | 
			
		||||
 | 
			
		||||
	ret = dc_link_detect_helper(link, reason);
 | 
			
		||||
 | 
			
		||||
	/* Go back to power optimized state */
 | 
			
		||||
	clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
 | 
			
		||||
	if (!can_apply_seamless_boot && reason != DETECT_REASON_BOOT)
 | 
			
		||||
		clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
 | 
			
		||||
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1716,6 +1750,8 @@ static enum dc_status enable_link_dp(struct dc_state *state,
 | 
			
		|||
	bool apply_seamless_boot_optimization = false;
 | 
			
		||||
	uint32_t bl_oled_enable_delay = 50; // in ms
 | 
			
		||||
	const uint32_t post_oui_delay = 30; // 30ms
 | 
			
		||||
	/* Reduce link bandwidth between failed link training attempts. */
 | 
			
		||||
	bool do_fallback = false;
 | 
			
		||||
 | 
			
		||||
	// check for seamless boot
 | 
			
		||||
	for (i = 0; i < state->stream_count; i++) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1754,7 +1790,8 @@ static enum dc_status enable_link_dp(struct dc_state *state,
 | 
			
		|||
					       skip_video_pattern,
 | 
			
		||||
					       LINK_TRAINING_ATTEMPTS,
 | 
			
		||||
					       pipe_ctx,
 | 
			
		||||
					       pipe_ctx->stream->signal)) {
 | 
			
		||||
					       pipe_ctx->stream->signal,
 | 
			
		||||
					       do_fallback)) {
 | 
			
		||||
		link->cur_link_settings = link_settings;
 | 
			
		||||
		status = DC_OK;
 | 
			
		||||
	} else {
 | 
			
		||||
| 
						 | 
				
			
			@ -3475,9 +3512,11 @@ uint32_t dc_bandwidth_in_kbps_from_timing(
 | 
			
		|||
	uint32_t kbps;
 | 
			
		||||
 | 
			
		||||
#if defined(CONFIG_DRM_AMD_DC_DCN)
 | 
			
		||||
	if (timing->flags.DSC) {
 | 
			
		||||
		return dc_dsc_stream_bandwidth_in_kbps(timing->pix_clk_100hz, timing->dsc_cfg.bits_per_pixel);
 | 
			
		||||
	}
 | 
			
		||||
	if (timing->flags.DSC)
 | 
			
		||||
		return dc_dsc_stream_bandwidth_in_kbps(timing,
 | 
			
		||||
				timing->dsc_cfg.bits_per_pixel,
 | 
			
		||||
				timing->dsc_cfg.num_slices_h,
 | 
			
		||||
				timing->dsc_cfg.is_dp);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	switch (timing->display_color_depth) {
 | 
			
		||||
| 
						 | 
				
			
			@ -3539,19 +3578,6 @@ void dc_link_set_drive_settings(struct dc *dc,
 | 
			
		|||
	dc_link_dp_set_drive_settings(dc->links[i], lt_settings);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void dc_link_perform_link_training(struct dc *dc,
 | 
			
		||||
				   struct dc_link_settings *link_setting,
 | 
			
		||||
				   bool skip_video_pattern)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < dc->link_count; i++)
 | 
			
		||||
		dc_link_dp_perform_link_training(
 | 
			
		||||
			dc->links[i],
 | 
			
		||||
			link_setting,
 | 
			
		||||
			skip_video_pattern);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void dc_link_set_preferred_link_settings(struct dc *dc,
 | 
			
		||||
					 struct dc_link_settings *link_setting,
 | 
			
		||||
					 struct dc_link *link)
 | 
			
		||||
| 
						 | 
				
			
			@ -3702,8 +3728,22 @@ void dc_link_overwrite_extended_receiver_cap(
 | 
			
		|||
 | 
			
		||||
bool dc_link_is_fec_supported(const struct dc_link *link)
 | 
			
		||||
{
 | 
			
		||||
	struct link_encoder *link_enc = NULL;
 | 
			
		||||
 | 
			
		||||
	/* Links supporting dynamically assigned link encoder will be assigned next
 | 
			
		||||
	 * available encoder if one not already assigned.
 | 
			
		||||
	 */
 | 
			
		||||
	if (link->is_dig_mapping_flexible &&
 | 
			
		||||
			link->dc->res_pool->funcs->link_encs_assign) {
 | 
			
		||||
		link_enc = link_enc_cfg_get_link_enc_used_by_link(link->dc->current_state, link);
 | 
			
		||||
		if (link_enc == NULL)
 | 
			
		||||
			link_enc = link_enc_cfg_get_next_avail_link_enc(link->dc, link->dc->current_state);
 | 
			
		||||
	} else
 | 
			
		||||
		link_enc = link->link_enc;
 | 
			
		||||
	ASSERT(link_enc);
 | 
			
		||||
 | 
			
		||||
	return (dc_is_dp_signal(link->connector_signal) &&
 | 
			
		||||
			link->link_enc->features.fec_supported &&
 | 
			
		||||
			link_enc->features.fec_supported &&
 | 
			
		||||
			link->dpcd_caps.fec_cap.bits.FEC_CAPABLE &&
 | 
			
		||||
			!IS_FPGA_MAXIMUS_DC(link->ctx->dce_environment));
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -658,7 +658,10 @@ int dc_link_aux_transfer_raw(struct ddc_service *ddc,
 | 
			
		|||
		struct aux_payload *payload,
 | 
			
		||||
		enum aux_return_code_type *operation_result)
 | 
			
		||||
{
 | 
			
		||||
	return dce_aux_transfer_raw(ddc, payload, operation_result);
 | 
			
		||||
	if (dc_enable_dmub_notifications(ddc->ctx->dc))
 | 
			
		||||
		return dce_aux_transfer_dmub_raw(ddc, payload, operation_result);
 | 
			
		||||
	else
 | 
			
		||||
		return dce_aux_transfer_raw(ddc, payload, operation_result);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* dc_link_aux_transfer_with_retries() - Attempt to submit an
 | 
			
		||||
| 
						 | 
				
			
			@ -682,6 +685,10 @@ bool dc_link_aux_try_to_configure_timeout(struct ddc_service *ddc,
 | 
			
		|||
	bool result = false;
 | 
			
		||||
	struct ddc *ddc_pin = ddc->ddc_pin;
 | 
			
		||||
 | 
			
		||||
	/* Do not try to access nonexistent DDC pin. */
 | 
			
		||||
	if (ddc->link->ep_type != DISPLAY_ENDPOINT_PHY)
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	if (ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]->funcs->configure_timeout) {
 | 
			
		||||
		ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]->funcs->configure_timeout(ddc, timeout);
 | 
			
		||||
		result = true;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -14,6 +14,7 @@
 | 
			
		|||
#include "dpcd_defs.h"
 | 
			
		||||
#include "dc_dmub_srv.h"
 | 
			
		||||
#include "dce/dmub_hw_lock_mgr.h"
 | 
			
		||||
#include "inc/link_enc_cfg.h"
 | 
			
		||||
 | 
			
		||||
/*Travis*/
 | 
			
		||||
static const uint8_t DP_VGA_LVDS_CONVERTER_ID_2[] = "sivarT";
 | 
			
		||||
| 
						 | 
				
			
			@ -107,10 +108,50 @@ static void wait_for_training_aux_rd_interval(
 | 
			
		|||
		wait_in_micro_secs);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static enum dpcd_training_patterns
 | 
			
		||||
	dc_dp_training_pattern_to_dpcd_training_pattern(
 | 
			
		||||
	struct dc_link *link,
 | 
			
		||||
	enum dc_dp_training_pattern pattern)
 | 
			
		||||
{
 | 
			
		||||
	enum dpcd_training_patterns dpcd_tr_pattern =
 | 
			
		||||
	DPCD_TRAINING_PATTERN_VIDEOIDLE;
 | 
			
		||||
 | 
			
		||||
	switch (pattern) {
 | 
			
		||||
	case DP_TRAINING_PATTERN_SEQUENCE_1:
 | 
			
		||||
		dpcd_tr_pattern = DPCD_TRAINING_PATTERN_1;
 | 
			
		||||
		break;
 | 
			
		||||
	case DP_TRAINING_PATTERN_SEQUENCE_2:
 | 
			
		||||
		dpcd_tr_pattern = DPCD_TRAINING_PATTERN_2;
 | 
			
		||||
		break;
 | 
			
		||||
	case DP_TRAINING_PATTERN_SEQUENCE_3:
 | 
			
		||||
		dpcd_tr_pattern = DPCD_TRAINING_PATTERN_3;
 | 
			
		||||
		break;
 | 
			
		||||
	case DP_TRAINING_PATTERN_SEQUENCE_4:
 | 
			
		||||
		dpcd_tr_pattern = DPCD_TRAINING_PATTERN_4;
 | 
			
		||||
		break;
 | 
			
		||||
	case DP_TRAINING_PATTERN_VIDEOIDLE:
 | 
			
		||||
		dpcd_tr_pattern = DPCD_TRAINING_PATTERN_VIDEOIDLE;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		ASSERT(0);
 | 
			
		||||
		DC_LOG_HW_LINK_TRAINING("%s: Invalid HW Training pattern: %d\n",
 | 
			
		||||
			__func__, pattern);
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return dpcd_tr_pattern;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void dpcd_set_training_pattern(
 | 
			
		||||
	struct dc_link *link,
 | 
			
		||||
	union dpcd_training_pattern dpcd_pattern)
 | 
			
		||||
	enum dc_dp_training_pattern training_pattern)
 | 
			
		||||
{
 | 
			
		||||
	union dpcd_training_pattern dpcd_pattern = { {0} };
 | 
			
		||||
 | 
			
		||||
	dpcd_pattern.v1_4.TRAINING_PATTERN_SET =
 | 
			
		||||
			dc_dp_training_pattern_to_dpcd_training_pattern(
 | 
			
		||||
					link, training_pattern);
 | 
			
		||||
 | 
			
		||||
	core_link_write_dpcd(
 | 
			
		||||
		link,
 | 
			
		||||
		DP_TRAINING_PATTERN_SET,
 | 
			
		||||
| 
						 | 
				
			
			@ -132,10 +173,22 @@ static enum dc_dp_training_pattern decide_cr_training_pattern(
 | 
			
		|||
static enum dc_dp_training_pattern decide_eq_training_pattern(struct dc_link *link,
 | 
			
		||||
		const struct dc_link_settings *link_settings)
 | 
			
		||||
{
 | 
			
		||||
	struct link_encoder *link_enc;
 | 
			
		||||
	enum dc_dp_training_pattern highest_tp = DP_TRAINING_PATTERN_SEQUENCE_2;
 | 
			
		||||
	struct encoder_feature_support *features = &link->link_enc->features;
 | 
			
		||||
	struct encoder_feature_support *features;
 | 
			
		||||
	struct dpcd_caps *dpcd_caps = &link->dpcd_caps;
 | 
			
		||||
 | 
			
		||||
	/* Access link encoder capability based on whether it is statically
 | 
			
		||||
	 * or dynamically assigned to a link.
 | 
			
		||||
	 */
 | 
			
		||||
	if (link->is_dig_mapping_flexible &&
 | 
			
		||||
			link->dc->res_pool->funcs->link_encs_assign)
 | 
			
		||||
		link_enc = link_enc_cfg_get_link_enc_used_by_link(link->dc->current_state, link);
 | 
			
		||||
	else
 | 
			
		||||
		link_enc = link->link_enc;
 | 
			
		||||
	ASSERT(link_enc);
 | 
			
		||||
	features = &link_enc->features;
 | 
			
		||||
 | 
			
		||||
	if (features->flags.bits.IS_TPS3_CAPABLE)
 | 
			
		||||
		highest_tp = DP_TRAINING_PATTERN_SEQUENCE_3;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -227,37 +280,6 @@ static void dpcd_set_link_settings(
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static enum dpcd_training_patterns
 | 
			
		||||
	dc_dp_training_pattern_to_dpcd_training_pattern(
 | 
			
		||||
	struct dc_link *link,
 | 
			
		||||
	enum dc_dp_training_pattern pattern)
 | 
			
		||||
{
 | 
			
		||||
	enum dpcd_training_patterns dpcd_tr_pattern =
 | 
			
		||||
	DPCD_TRAINING_PATTERN_VIDEOIDLE;
 | 
			
		||||
 | 
			
		||||
	switch (pattern) {
 | 
			
		||||
	case DP_TRAINING_PATTERN_SEQUENCE_1:
 | 
			
		||||
		dpcd_tr_pattern = DPCD_TRAINING_PATTERN_1;
 | 
			
		||||
		break;
 | 
			
		||||
	case DP_TRAINING_PATTERN_SEQUENCE_2:
 | 
			
		||||
		dpcd_tr_pattern = DPCD_TRAINING_PATTERN_2;
 | 
			
		||||
		break;
 | 
			
		||||
	case DP_TRAINING_PATTERN_SEQUENCE_3:
 | 
			
		||||
		dpcd_tr_pattern = DPCD_TRAINING_PATTERN_3;
 | 
			
		||||
		break;
 | 
			
		||||
	case DP_TRAINING_PATTERN_SEQUENCE_4:
 | 
			
		||||
		dpcd_tr_pattern = DPCD_TRAINING_PATTERN_4;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		ASSERT(0);
 | 
			
		||||
		DC_LOG_HW_LINK_TRAINING("%s: Invalid HW Training pattern: %d\n",
 | 
			
		||||
			__func__, pattern);
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return dpcd_tr_pattern;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static uint8_t dc_dp_initialize_scrambling_data_symbols(
 | 
			
		||||
	struct dc_link *link,
 | 
			
		||||
	enum dc_dp_training_pattern pattern)
 | 
			
		||||
| 
						 | 
				
			
			@ -420,20 +442,30 @@ static bool is_cr_done(enum dc_lane_count ln_count,
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
static bool is_ch_eq_done(enum dc_lane_count ln_count,
 | 
			
		||||
	union lane_status *dpcd_lane_status,
 | 
			
		||||
	union lane_align_status_updated *lane_status_updated)
 | 
			
		||||
		union lane_status *dpcd_lane_status)
 | 
			
		||||
{
 | 
			
		||||
	bool done = true;
 | 
			
		||||
	uint32_t lane;
 | 
			
		||||
	if (!lane_status_updated->bits.INTERLANE_ALIGN_DONE)
 | 
			
		||||
		return false;
 | 
			
		||||
	else {
 | 
			
		||||
		for (lane = 0; lane < (uint32_t)(ln_count); lane++) {
 | 
			
		||||
			if (!dpcd_lane_status[lane].bits.SYMBOL_LOCKED_0 ||
 | 
			
		||||
				!dpcd_lane_status[lane].bits.CHANNEL_EQ_DONE_0)
 | 
			
		||||
				return false;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return true;
 | 
			
		||||
	for (lane = 0; lane < (uint32_t)(ln_count); lane++)
 | 
			
		||||
		if (!dpcd_lane_status[lane].bits.CHANNEL_EQ_DONE_0)
 | 
			
		||||
			done = false;
 | 
			
		||||
	return done;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool is_symbol_locked(enum dc_lane_count ln_count,
 | 
			
		||||
		union lane_status *dpcd_lane_status)
 | 
			
		||||
{
 | 
			
		||||
	bool locked = true;
 | 
			
		||||
	uint32_t lane;
 | 
			
		||||
	for (lane = 0; lane < (uint32_t)(ln_count); lane++)
 | 
			
		||||
		if (!dpcd_lane_status[lane].bits.SYMBOL_LOCKED_0)
 | 
			
		||||
			locked = false;
 | 
			
		||||
	return locked;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline bool is_interlane_aligned(union lane_align_status_updated align_status)
 | 
			
		||||
{
 | 
			
		||||
	return align_status.bits.INTERLANE_ALIGN_DONE == 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void update_drive_settings(
 | 
			
		||||
| 
						 | 
				
			
			@ -835,10 +867,9 @@ static bool perform_post_lt_adj_req_sequence(
 | 
			
		|||
			if (!is_cr_done(lane_count, dpcd_lane_status))
 | 
			
		||||
				return false;
 | 
			
		||||
 | 
			
		||||
			if (!is_ch_eq_done(
 | 
			
		||||
				lane_count,
 | 
			
		||||
				dpcd_lane_status,
 | 
			
		||||
				&dpcd_lane_status_updated))
 | 
			
		||||
			if (!is_ch_eq_done(lane_count, dpcd_lane_status) ||
 | 
			
		||||
					!is_symbol_locked(lane_count, dpcd_lane_status) ||
 | 
			
		||||
					!is_interlane_aligned(dpcd_lane_status_updated))
 | 
			
		||||
				return false;
 | 
			
		||||
 | 
			
		||||
			for (lane = 0; lane < (uint32_t)(lane_count); lane++) {
 | 
			
		||||
| 
						 | 
				
			
			@ -992,9 +1023,9 @@ static enum link_training_result perform_channel_equalization_sequence(
 | 
			
		|||
			return LINK_TRAINING_EQ_FAIL_CR;
 | 
			
		||||
 | 
			
		||||
		/* 6. check CHEQ done*/
 | 
			
		||||
		if (is_ch_eq_done(lane_count,
 | 
			
		||||
			dpcd_lane_status,
 | 
			
		||||
			&dpcd_lane_status_updated))
 | 
			
		||||
		if (is_ch_eq_done(lane_count, dpcd_lane_status) &&
 | 
			
		||||
				is_symbol_locked(lane_count, dpcd_lane_status) &&
 | 
			
		||||
				is_interlane_aligned(dpcd_lane_status_updated))
 | 
			
		||||
			return LINK_TRAINING_SUCCESS;
 | 
			
		||||
 | 
			
		||||
		/* 7. update VS/PE/PC2 in lt_settings*/
 | 
			
		||||
| 
						 | 
				
			
			@ -1162,7 +1193,7 @@ static inline enum link_training_result perform_link_training_int(
 | 
			
		|||
	return status;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static enum link_training_result check_link_loss_status(
 | 
			
		||||
enum link_training_result dp_check_link_loss_status(
 | 
			
		||||
	struct dc_link *link,
 | 
			
		||||
	const struct link_training_settings *link_training_setting)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -1296,7 +1327,7 @@ static void initialize_training_settings(
 | 
			
		|||
		lt_settings->enhanced_framing = 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static uint8_t convert_to_count(uint8_t lttpr_repeater_count)
 | 
			
		||||
uint8_t dp_convert_to_count(uint8_t lttpr_repeater_count)
 | 
			
		||||
{
 | 
			
		||||
	switch (lttpr_repeater_count) {
 | 
			
		||||
	case 0x80: // 1 lttpr repeater
 | 
			
		||||
| 
						 | 
				
			
			@ -1365,7 +1396,8 @@ static void configure_lttpr_mode_non_transparent(struct dc_link *link)
 | 
			
		|||
			link->dpcd_caps.lttpr_caps.mode = repeater_mode;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		repeater_cnt = convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
 | 
			
		||||
		repeater_cnt = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
 | 
			
		||||
 | 
			
		||||
		for (repeater_id = repeater_cnt; repeater_id > 0; repeater_id--) {
 | 
			
		||||
			aux_interval_address = DP_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 +
 | 
			
		||||
						((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (repeater_id - 1));
 | 
			
		||||
| 
						 | 
				
			
			@ -1555,7 +1587,6 @@ enum link_training_result dc_link_dp_perform_link_training(
 | 
			
		|||
{
 | 
			
		||||
	enum link_training_result status = LINK_TRAINING_SUCCESS;
 | 
			
		||||
	struct link_training_settings lt_settings;
 | 
			
		||||
	union dpcd_training_pattern dpcd_pattern = { { 0 } };
 | 
			
		||||
 | 
			
		||||
	bool fec_enable;
 | 
			
		||||
	uint8_t repeater_cnt;
 | 
			
		||||
| 
						 | 
				
			
			@ -1591,7 +1622,7 @@ enum link_training_result dc_link_dp_perform_link_training(
 | 
			
		|||
		/* 2. perform link training (set link training done
 | 
			
		||||
		 *  to false is done as well)
 | 
			
		||||
		 */
 | 
			
		||||
		repeater_cnt = convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
 | 
			
		||||
		repeater_cnt = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
 | 
			
		||||
 | 
			
		||||
		for (repeater_id = repeater_cnt; (repeater_id > 0 && status == LINK_TRAINING_SUCCESS);
 | 
			
		||||
				repeater_id--) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1621,8 +1652,7 @@ enum link_training_result dc_link_dp_perform_link_training(
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	/* 3. set training not in progress*/
 | 
			
		||||
	dpcd_pattern.v1_4.TRAINING_PATTERN_SET = DPCD_TRAINING_PATTERN_VIDEOIDLE;
 | 
			
		||||
	dpcd_set_training_pattern(link, dpcd_pattern);
 | 
			
		||||
	dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE);
 | 
			
		||||
	if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern) {
 | 
			
		||||
		status = perform_link_training_int(link,
 | 
			
		||||
				<_settings,
 | 
			
		||||
| 
						 | 
				
			
			@ -1634,7 +1664,7 @@ enum link_training_result dc_link_dp_perform_link_training(
 | 
			
		|||
	 */
 | 
			
		||||
	if (link->connector_signal != SIGNAL_TYPE_EDP && status == LINK_TRAINING_SUCCESS) {
 | 
			
		||||
		msleep(5);
 | 
			
		||||
		status = check_link_loss_status(link, <_settings);
 | 
			
		||||
		status = dp_check_link_loss_status(link, <_settings);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* 6. print status message*/
 | 
			
		||||
| 
						 | 
				
			
			@ -1687,18 +1717,31 @@ bool perform_link_training_with_retries(
 | 
			
		|||
	bool skip_video_pattern,
 | 
			
		||||
	int attempts,
 | 
			
		||||
	struct pipe_ctx *pipe_ctx,
 | 
			
		||||
	enum signal_type signal)
 | 
			
		||||
	enum signal_type signal,
 | 
			
		||||
	bool do_fallback)
 | 
			
		||||
{
 | 
			
		||||
	uint8_t j;
 | 
			
		||||
	uint8_t delay_between_attempts = LINK_TRAINING_RETRY_DELAY;
 | 
			
		||||
	struct dc_stream_state *stream = pipe_ctx->stream;
 | 
			
		||||
	struct dc_link *link = stream->link;
 | 
			
		||||
	enum dp_panel_mode panel_mode;
 | 
			
		||||
	struct link_encoder *link_enc;
 | 
			
		||||
	enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0;
 | 
			
		||||
	struct dc_link_settings currnet_setting = *link_setting;
 | 
			
		||||
 | 
			
		||||
	/* Dynamically assigned link encoders associated with stream rather than
 | 
			
		||||
	 * link.
 | 
			
		||||
	 */
 | 
			
		||||
	if (link->dc->res_pool->funcs->link_encs_assign)
 | 
			
		||||
		link_enc = stream->link_enc;
 | 
			
		||||
	else
 | 
			
		||||
		link_enc = link->link_enc;
 | 
			
		||||
	ASSERT(link_enc);
 | 
			
		||||
 | 
			
		||||
	/* We need to do this before the link training to ensure the idle pattern in SST
 | 
			
		||||
	 * mode will be sent right after the link training
 | 
			
		||||
	 */
 | 
			
		||||
	link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc,
 | 
			
		||||
	link_enc->funcs->connect_dig_be_to_fe(link_enc,
 | 
			
		||||
							pipe_ctx->stream_res.stream_enc->id, true);
 | 
			
		||||
 | 
			
		||||
	for (j = 0; j < attempts; ++j) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1710,7 +1753,7 @@ bool perform_link_training_with_retries(
 | 
			
		|||
			link,
 | 
			
		||||
			signal,
 | 
			
		||||
			pipe_ctx->clock_source->id,
 | 
			
		||||
			link_setting);
 | 
			
		||||
			&currnet_setting);
 | 
			
		||||
 | 
			
		||||
		if (stream->sink_patches.dppowerup_delay > 0) {
 | 
			
		||||
			int delay_dp_power_up_in_ms = stream->sink_patches.dppowerup_delay;
 | 
			
		||||
| 
						 | 
				
			
			@ -1725,14 +1768,12 @@ bool perform_link_training_with_retries(
 | 
			
		|||
			 panel_mode != DP_PANEL_MODE_DEFAULT);
 | 
			
		||||
 | 
			
		||||
		if (link->aux_access_disabled) {
 | 
			
		||||
			dc_link_dp_perform_link_training_skip_aux(link, link_setting);
 | 
			
		||||
			dc_link_dp_perform_link_training_skip_aux(link, &currnet_setting);
 | 
			
		||||
			return true;
 | 
			
		||||
		} else {
 | 
			
		||||
			enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0;
 | 
			
		||||
 | 
			
		||||
				status = dc_link_dp_perform_link_training(
 | 
			
		||||
										link,
 | 
			
		||||
										link_setting,
 | 
			
		||||
										&currnet_setting,
 | 
			
		||||
										skip_video_pattern);
 | 
			
		||||
			if (status == LINK_TRAINING_SUCCESS)
 | 
			
		||||
				return true;
 | 
			
		||||
| 
						 | 
				
			
			@ -1740,7 +1781,7 @@ bool perform_link_training_with_retries(
 | 
			
		|||
 | 
			
		||||
		/* latest link training still fail, skip delay and keep PHY on
 | 
			
		||||
		 */
 | 
			
		||||
		if (j == (attempts - 1))
 | 
			
		||||
		if (j == (attempts - 1) && link->ep_type == DISPLAY_ENDPOINT_PHY)
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		DC_LOG_WARNING("%s: Link training attempt %u of %d failed\n",
 | 
			
		||||
| 
						 | 
				
			
			@ -1748,6 +1789,19 @@ bool perform_link_training_with_retries(
 | 
			
		|||
 | 
			
		||||
		dp_disable_link_phy(link, signal);
 | 
			
		||||
 | 
			
		||||
		/* Abort link training if failure due to sink being unplugged. */
 | 
			
		||||
		if (status == LINK_TRAINING_ABORT)
 | 
			
		||||
			break;
 | 
			
		||||
		else if (do_fallback) {
 | 
			
		||||
			decide_fallback_link_setting(*link_setting, &currnet_setting, status);
 | 
			
		||||
			/* Fail link training if reduced link bandwidth no longer meets
 | 
			
		||||
			 * stream requirements.
 | 
			
		||||
			 */
 | 
			
		||||
			if (dc_bandwidth_in_kbps_from_timing(&stream->timing) <
 | 
			
		||||
					dc_link_bandwidth_kbps(link, &currnet_setting))
 | 
			
		||||
				break;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		msleep(delay_between_attempts);
 | 
			
		||||
 | 
			
		||||
		delay_between_attempts += LINK_TRAINING_RETRY_DELAY;
 | 
			
		||||
| 
						 | 
				
			
			@ -2429,6 +2483,12 @@ bool dp_validate_mode_timing(
 | 
			
		|||
 | 
			
		||||
	const struct dc_link_settings *link_setting;
 | 
			
		||||
 | 
			
		||||
	/* According to spec, VSC SDP should be used if pixel format is YCbCr420 */
 | 
			
		||||
	if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420 &&
 | 
			
		||||
			!link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED &&
 | 
			
		||||
			dal_graphics_object_id_get_connector_id(link->link_id) != CONNECTOR_ID_VIRTUAL)
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	/*always DP fail safe mode*/
 | 
			
		||||
	if ((timing->pix_clk_100hz / 10) == (uint32_t) 25175 &&
 | 
			
		||||
		timing->h_addressable == (uint32_t) 640 &&
 | 
			
		||||
| 
						 | 
				
			
			@ -2611,13 +2671,11 @@ static bool allow_hpd_rx_irq(const struct dc_link *link)
 | 
			
		|||
	/*
 | 
			
		||||
	 * Don't handle RX IRQ unless one of following is met:
 | 
			
		||||
	 * 1) The link is established (cur_link_settings != unknown)
 | 
			
		||||
	 * 2) We kicked off MST detection
 | 
			
		||||
	 * 3) We know we're dealing with an active dongle
 | 
			
		||||
	 * 2) We know we're dealing with a branch device, SST or MST
 | 
			
		||||
	 */
 | 
			
		||||
 | 
			
		||||
	if ((link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) ||
 | 
			
		||||
		(link->type == dc_connection_mst_branch) ||
 | 
			
		||||
		is_dp_active_dongle(link))
 | 
			
		||||
		is_dp_branch_device(link))
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	return false;
 | 
			
		||||
| 
						 | 
				
			
			@ -2917,6 +2975,22 @@ static void dp_test_send_link_test_pattern(struct dc_link *link)
 | 
			
		|||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	switch (dpcd_test_params.bits.CLR_FORMAT) {
 | 
			
		||||
	case 0:
 | 
			
		||||
		pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_RGB;
 | 
			
		||||
		break;
 | 
			
		||||
	case 1:
 | 
			
		||||
		pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_YCBCR422;
 | 
			
		||||
		break;
 | 
			
		||||
	case 2:
 | 
			
		||||
		pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_YCBCR444;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_RGB;
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	if (requestColorDepth != COLOR_DEPTH_UNDEFINED
 | 
			
		||||
			&& pipe_ctx->stream->timing.display_color_depth != requestColorDepth) {
 | 
			
		||||
		DC_LOG_DEBUG("%s: original bpc %d, changing to %d\n",
 | 
			
		||||
| 
						 | 
				
			
			@ -2924,9 +2998,10 @@ static void dp_test_send_link_test_pattern(struct dc_link *link)
 | 
			
		|||
				pipe_ctx->stream->timing.display_color_depth,
 | 
			
		||||
				requestColorDepth);
 | 
			
		||||
		pipe_ctx->stream->timing.display_color_depth = requestColorDepth;
 | 
			
		||||
		dp_update_dsc_config(pipe_ctx);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	dp_update_dsc_config(pipe_ctx);
 | 
			
		||||
 | 
			
		||||
	dc_link_dp_set_test_pattern(
 | 
			
		||||
			link,
 | 
			
		||||
			test_pattern,
 | 
			
		||||
| 
						 | 
				
			
			@ -3182,7 +3257,7 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd
 | 
			
		|||
			*out_link_loss = true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (link->type == dc_connection_active_dongle &&
 | 
			
		||||
	if (link->type == dc_connection_sst_branch &&
 | 
			
		||||
		hpd_irq_dpcd_data.bytes.sink_cnt.bits.SINK_COUNT
 | 
			
		||||
			!= link->dpcd_sink_count)
 | 
			
		||||
		status = true;
 | 
			
		||||
| 
						 | 
				
			
			@ -3232,6 +3307,12 @@ bool is_mst_supported(struct dc_link *link)
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
bool is_dp_active_dongle(const struct dc_link *link)
 | 
			
		||||
{
 | 
			
		||||
	return (link->dpcd_caps.dongle_type >= DISPLAY_DONGLE_DP_VGA_CONVERTER) &&
 | 
			
		||||
				(link->dpcd_caps.dongle_type <= DISPLAY_DONGLE_DP_HDMI_CONVERTER);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool is_dp_branch_device(const struct dc_link *link)
 | 
			
		||||
{
 | 
			
		||||
	return link->dpcd_caps.is_branch_dev;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -3593,7 +3674,9 @@ static bool retrieve_link_cap(struct dc_link *link)
 | 
			
		|||
				lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT -
 | 
			
		||||
								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
 | 
			
		||||
 | 
			
		||||
		/* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */
 | 
			
		||||
		is_lttpr_present = (link->dpcd_caps.lttpr_caps.phy_repeater_cnt > 0 &&
 | 
			
		||||
				link->dpcd_caps.lttpr_caps.phy_repeater_cnt < 0xff &&
 | 
			
		||||
				link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
 | 
			
		||||
				link->dpcd_caps.lttpr_caps.max_lane_count <= 4 &&
 | 
			
		||||
				link->dpcd_caps.lttpr_caps.revision.raw >= 0x14);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -112,8 +112,8 @@ static void update_link_enc_assignment(
 | 
			
		|||
 | 
			
		||||
/* Return first available DIG link encoder. */
 | 
			
		||||
static enum engine_id find_first_avail_link_enc(
 | 
			
		||||
		struct dc_context *ctx,
 | 
			
		||||
		struct dc_state *state)
 | 
			
		||||
		const struct dc_context *ctx,
 | 
			
		||||
		const struct dc_state *state)
 | 
			
		||||
{
 | 
			
		||||
	enum engine_id eng_id = ENGINE_ID_UNKNOWN;
 | 
			
		||||
	int i;
 | 
			
		||||
| 
						 | 
				
			
			@ -270,7 +270,7 @@ struct dc_link *link_enc_cfg_get_link_using_link_enc(
 | 
			
		|||
 | 
			
		||||
struct link_encoder *link_enc_cfg_get_link_enc_used_by_link(
 | 
			
		||||
		struct dc_state *state,
 | 
			
		||||
		struct dc_link *link)
 | 
			
		||||
		const struct dc_link *link)
 | 
			
		||||
{
 | 
			
		||||
	struct link_encoder *link_enc = NULL;
 | 
			
		||||
	struct display_endpoint_id ep_id;
 | 
			
		||||
| 
						 | 
				
			
			@ -296,8 +296,20 @@ struct link_encoder *link_enc_cfg_get_link_enc_used_by_link(
 | 
			
		|||
 | 
			
		||||
	if (stream_idx != -1)
 | 
			
		||||
		link_enc = state->streams[stream_idx]->link_enc;
 | 
			
		||||
	else
 | 
			
		||||
		dm_output_to_console("%s: No link encoder used by link(%d).\n", __func__, link->link_index);
 | 
			
		||||
 | 
			
		||||
	return link_enc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct link_encoder *link_enc_cfg_get_next_avail_link_enc(
 | 
			
		||||
	const struct dc *dc,
 | 
			
		||||
	const struct dc_state *state)
 | 
			
		||||
{
 | 
			
		||||
	struct link_encoder *link_enc = NULL;
 | 
			
		||||
	enum engine_id eng_id = ENGINE_ID_UNKNOWN;
 | 
			
		||||
 | 
			
		||||
	eng_id = find_first_avail_link_enc(dc->ctx, state);
 | 
			
		||||
	if (eng_id != ENGINE_ID_UNKNOWN)
 | 
			
		||||
		link_enc = dc->res_pool->link_encoders[eng_id - ENGINE_ID_DIGA];
 | 
			
		||||
 | 
			
		||||
	return link_enc;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -384,7 +384,8 @@ void dp_retrain_link_dp_test(struct dc_link *link,
 | 
			
		|||
					skip_video_pattern,
 | 
			
		||||
					LINK_TRAINING_ATTEMPTS,
 | 
			
		||||
					&pipes[i],
 | 
			
		||||
					SIGNAL_TYPE_DISPLAY_PORT);
 | 
			
		||||
					SIGNAL_TYPE_DISPLAY_PORT,
 | 
			
		||||
					false);
 | 
			
		||||
 | 
			
		||||
			link->dc->hwss.enable_stream(&pipes[i]);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1706,12 +1706,6 @@ static bool is_timing_changed(struct dc_stream_state *cur_stream,
 | 
			
		|||
	if (cur_stream == NULL)
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	/* If sink pointer changed, it means this is a hotplug, we should do
 | 
			
		||||
	 * full hw setting.
 | 
			
		||||
	 */
 | 
			
		||||
	if (cur_stream->sink != new_stream->sink)
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	/* If output color space is changed, need to reprogram info frames */
 | 
			
		||||
	if (cur_stream->output_color_space != new_stream->output_color_space)
 | 
			
		||||
		return true;
 | 
			
		||||
| 
						 | 
				
			
			@ -2679,6 +2673,7 @@ void dc_resource_state_destruct(struct dc_state *context)
 | 
			
		|||
		dc_stream_release(context->streams[i]);
 | 
			
		||||
		context->streams[i] = NULL;
 | 
			
		||||
	}
 | 
			
		||||
	context->stream_count = 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void dc_resource_state_copy_construct(
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -45,7 +45,7 @@
 | 
			
		|||
/* forward declaration */
 | 
			
		||||
struct aux_payload;
 | 
			
		||||
 | 
			
		||||
#define DC_VER "3.2.132"
 | 
			
		||||
#define DC_VER "3.2.135.1"
 | 
			
		||||
 | 
			
		||||
#define MAX_SURFACES 3
 | 
			
		||||
#define MAX_PLANES 6
 | 
			
		||||
| 
						 | 
				
			
			@ -308,8 +308,6 @@ struct dc_config {
 | 
			
		|||
#endif
 | 
			
		||||
	uint64_t vblank_alignment_dto_params;
 | 
			
		||||
	uint8_t  vblank_alignment_max_frame_time_diff;
 | 
			
		||||
	bool is_asymmetric_memory;
 | 
			
		||||
	bool is_single_rank_dimm;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum visual_confirm {
 | 
			
		||||
| 
						 | 
				
			
			@ -600,7 +598,6 @@ struct dc_bounding_box_overrides {
 | 
			
		|||
	int min_dcfclk_mhz;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct dc_state;
 | 
			
		||||
struct resource_pool;
 | 
			
		||||
struct dce_hwseq;
 | 
			
		||||
struct gpu_info_soc_bounding_box_v1_0;
 | 
			
		||||
| 
						 | 
				
			
			@ -719,7 +716,6 @@ void dc_init_callbacks(struct dc *dc,
 | 
			
		|||
void dc_deinit_callbacks(struct dc *dc);
 | 
			
		||||
void dc_destroy(struct dc **dc);
 | 
			
		||||
 | 
			
		||||
void dc_wait_for_vblank(struct dc *dc, struct dc_stream_state *stream);
 | 
			
		||||
/*******************************************************************************
 | 
			
		||||
 * Surface Interfaces
 | 
			
		||||
 ******************************************************************************/
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -180,5 +180,5 @@ bool dc_dmub_srv_get_dmub_outbox0_msg(const struct dc *dc, struct dmcub_trace_bu
 | 
			
		|||
 | 
			
		||||
void dc_dmub_trace_event_control(struct dc *dc, bool enable)
 | 
			
		||||
{
 | 
			
		||||
	dm_helpers_dmub_outbox0_interrupt_control(dc->ctx, enable);
 | 
			
		||||
	dm_helpers_dmub_outbox_interrupt_control(dc->ctx, enable);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -95,6 +95,7 @@ enum dc_dp_training_pattern {
 | 
			
		|||
	DP_TRAINING_PATTERN_SEQUENCE_2,
 | 
			
		||||
	DP_TRAINING_PATTERN_SEQUENCE_3,
 | 
			
		||||
	DP_TRAINING_PATTERN_SEQUENCE_4,
 | 
			
		||||
	DP_TRAINING_PATTERN_VIDEOIDLE,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct dc_link_settings {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -78,7 +78,8 @@ bool dc_dsc_compute_config(
 | 
			
		|||
		const struct dc_crtc_timing *timing,
 | 
			
		||||
		struct dc_dsc_config *dsc_cfg);
 | 
			
		||||
 | 
			
		||||
uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16);
 | 
			
		||||
uint32_t dc_dsc_stream_bandwidth_in_kbps(const struct dc_crtc_timing *timing,
 | 
			
		||||
		uint32_t bpp_x16, uint32_t num_slices_h, bool is_dp);
 | 
			
		||||
 | 
			
		||||
void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
 | 
			
		||||
		uint32_t max_target_bpp_limit_override_x16,
 | 
			
		||||
| 
						 | 
				
			
			@ -88,6 +89,6 @@ void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit);
 | 
			
		|||
 | 
			
		||||
void dc_dsc_policy_set_enable_dsc_when_not_needed(bool enable);
 | 
			
		||||
 | 
			
		||||
uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16);
 | 
			
		||||
void dc_dsc_policy_set_disable_dsc_stream_overhead(bool disable);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
Some files were not shown because too many files have changed in this diff Show more
		Loading…
	
		Reference in a new issue