mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	This tag contains the following changes for kernel 5.9-rc1:
- Remove rate limiters from GAUDI configuration (no longer needed). - Set maximum amount of in-flight CS per ASIC type and increase the maximum amount for GAUDI. - Refactor signal/wait command submissions code - Calculate trace frequency from PLLs to show accurate profiling data - Rephrase error messages to make them more clear to the common user - Add statistics of dropped CS (counter per possible reason for drop) - Get ECC information from firmware - Remove support for partial SoC reset in Gaudi - Halt device CPU only when reset is certain to happen. Sometimes we abort the reset procedure and in that case we can't leave device CPU in halt mode. - set each CQ to its own work queue to prevent a race between completions on different CQs. - Use queue pi/ci in order to determine queue occupancy. This is done to make the code reusable between current and future ASICs. - Add more validations for user inputs. - Refactor PCIe controller configuration to make the code reusable between current and future ASICs. - Update firmware interface headers to latest version - Move all common code to a dedicated common sub-folder -----BEGIN PGP SIGNATURE----- iQFKBAABCgA0FiEE7TEboABC71LctBLFZR1NuKta54AFAl8bHekWHG9kZWQuZ2Fi YmF5QGdtYWlsLmNvbQAKCRBlHU24q1rngCwsCACGVS+NVbDO2E42TOpTxRst26Nu 7V8s8HcICbdhfHbkJWnexsbPkjzRUvi1pXfklmafGtGtoTunwpb1AXx+oWeMqmnT 8IoxVlp0A5doP9uh8WOdKPypDNWem8fL+89y+89C8/ImAHMeeC4X+v2hiIIoRBDu aoO3zgxPjLmNfcUGfUqcaLcar8w/EhVY/JIPjSc21PXouyUer7Jx9oe2MS1OkQ0h FIYBRW4oycEqZSYCs9SLog/ltBt84Kzk/TsWweTcW3V3EpXyhFjq5yG9nBmmNB9z U9wmJV4tpGOg/ehvy3MWMbCtv4xK/xS3R3pn25IJw79X2RWUKizBorgYlXu/ =ZOVe -----END PGP SIGNATURE----- Merge tag 'misc-habanalabs-next-2020-07-24' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next Oded writes: This tag contains the following changes for kernel 5.9-rc1: - Remove rate limiters from GAUDI configuration (no longer needed). - Set maximum amount of in-flight CS per ASIC type and increase the maximum amount for GAUDI. - Refactor signal/wait command submissions code - Calculate trace frequency from PLLs to show accurate profiling data - Rephrase error messages to make them more clear to the common user - Add statistics of dropped CS (counter per possible reason for drop) - Get ECC information from firmware - Remove support for partial SoC reset in Gaudi - Halt device CPU only when reset is certain to happen. Sometimes we abort the reset procedure and in that case we can't leave device CPU in halt mode. - set each CQ to its own work queue to prevent a race between completions on different CQs. - Use queue pi/ci in order to determine queue occupancy. This is done to make the code reusable between current and future ASICs. - Add more validations for user inputs. - Refactor PCIe controller configuration to make the code reusable between current and future ASICs. - Update firmware interface headers to latest version - Move all common code to a dedicated common sub-folder * tag 'misc-habanalabs-next-2020-07-24' of git://people.freedesktop.org/~gabbayo/linux: (28 commits) habanalabs: Fix memory leak in error flow of context initialization habanalabs: use no flags on MMU cache invalidation habanalabs: enable device before hw_init() habanalabs: create internal CB pool habanalabs: update hl_boot_if.h from firmware habanalabs: create common folder habanalabs: check for DMA errors when clearing memory habanalabs: verify queue can contain all cs jobs habanalabs: Assign each CQ with its own work queue habanalabs: halt device CPU only upon certain reset habanalabs: remove unused hash habanalabs: use queue pi/ci in order to determine queue occupancy habanalabs: configure maximum queues per asic habanalabs: remove soft-reset support from GAUDI habanalabs: PCIe iATU refactoring habanalabs: Extract ECC information from FW habanalabs: Add dropped cs statistics info struct habanalabs: extract cpu boot status lookup habanalabs: rephrase error messages habanalabs: Increase queues depth ...
This commit is contained in:
		
						commit
						860e73b49c
					
				
					 34 changed files with 1260 additions and 992 deletions
				
			
		| 
						 | 
				
			
			@ -3,16 +3,15 @@
 | 
			
		|||
# Makefile for HabanaLabs AI accelerators driver
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
obj-m	:= habanalabs.o
 | 
			
		||||
obj-$(CONFIG_HABANA_AI) := habanalabs.o
 | 
			
		||||
 | 
			
		||||
habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
 | 
			
		||||
		command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \
 | 
			
		||||
		command_submission.o mmu.o firmware_if.o pci.o
 | 
			
		||||
 | 
			
		||||
habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o
 | 
			
		||||
include $(src)/common/Makefile
 | 
			
		||||
habanalabs-y += $(HL_COMMON_FILES)
 | 
			
		||||
 | 
			
		||||
include $(src)/goya/Makefile
 | 
			
		||||
habanalabs-y += $(HL_GOYA_FILES)
 | 
			
		||||
 | 
			
		||||
include $(src)/gaudi/Makefile
 | 
			
		||||
habanalabs-y += $(HL_GAUDI_FILES)
 | 
			
		||||
 | 
			
		||||
habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										9
									
								
								drivers/misc/habanalabs/common/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								drivers/misc/habanalabs/common/Makefile
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,9 @@
 | 
			
		|||
# SPDX-License-Identifier: GPL-2.0-only
 | 
			
		||||
subdir-ccflags-y += -I$(src)/common
 | 
			
		||||
 | 
			
		||||
HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
 | 
			
		||||
		common/asid.o common/habanalabs_ioctl.o \
 | 
			
		||||
		common/command_buffer.o common/hw_queue.o common/irq.o \
 | 
			
		||||
		common/sysfs.o common/hwmon.o common/memory.o \
 | 
			
		||||
		common/command_submission.o common/mmu.o common/firmware_if.o \
 | 
			
		||||
		common/pci.o
 | 
			
		||||
| 
						 | 
				
			
			@ -10,12 +10,18 @@
 | 
			
		|||
 | 
			
		||||
#include <linux/mm.h>
 | 
			
		||||
#include <linux/slab.h>
 | 
			
		||||
#include <linux/genalloc.h>
 | 
			
		||||
 | 
			
		||||
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
 | 
			
		||||
{
 | 
			
		||||
	hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
 | 
			
		||||
			(void *) (uintptr_t) cb->kernel_address,
 | 
			
		||||
			cb->bus_address);
 | 
			
		||||
	if (cb->is_internal)
 | 
			
		||||
		gen_pool_free(hdev->internal_cb_pool,
 | 
			
		||||
				cb->kernel_address, cb->size);
 | 
			
		||||
	else
 | 
			
		||||
		hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
 | 
			
		||||
				(void *) (uintptr_t) cb->kernel_address,
 | 
			
		||||
				cb->bus_address);
 | 
			
		||||
 | 
			
		||||
	kfree(cb);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -44,9 +50,10 @@ static void cb_release(struct kref *ref)
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
 | 
			
		||||
					int ctx_id)
 | 
			
		||||
					int ctx_id, bool internal_cb)
 | 
			
		||||
{
 | 
			
		||||
	struct hl_cb *cb;
 | 
			
		||||
	u32 cb_offset;
 | 
			
		||||
	void *p;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -65,13 +72,25 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
 | 
			
		|||
	if (!cb)
 | 
			
		||||
		return NULL;
 | 
			
		||||
 | 
			
		||||
	if (ctx_id == HL_KERNEL_ASID_ID)
 | 
			
		||||
	if (internal_cb) {
 | 
			
		||||
		p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size);
 | 
			
		||||
		if (!p) {
 | 
			
		||||
			kfree(cb);
 | 
			
		||||
			return NULL;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		cb_offset = p - hdev->internal_cb_pool_virt_addr;
 | 
			
		||||
		cb->is_internal = true;
 | 
			
		||||
		cb->bus_address =  hdev->internal_cb_va_base + cb_offset;
 | 
			
		||||
	} else if (ctx_id == HL_KERNEL_ASID_ID) {
 | 
			
		||||
		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
 | 
			
		||||
						&cb->bus_address, GFP_ATOMIC);
 | 
			
		||||
	else
 | 
			
		||||
	} else {
 | 
			
		||||
		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
 | 
			
		||||
						&cb->bus_address,
 | 
			
		||||
						GFP_USER | __GFP_ZERO);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!p) {
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"failed to allocate %d of dma memory for CB\n",
 | 
			
		||||
| 
						 | 
				
			
			@ -87,7 +106,7 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
 | 
			
		||||
			u32 cb_size, u64 *handle, int ctx_id)
 | 
			
		||||
			u32 cb_size, u64 *handle, int ctx_id, bool internal_cb)
 | 
			
		||||
{
 | 
			
		||||
	struct hl_cb *cb;
 | 
			
		||||
	bool alloc_new_cb = true;
 | 
			
		||||
| 
						 | 
				
			
			@ -112,28 +131,30 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
 | 
			
		|||
		goto out_err;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Minimum allocation must be PAGE SIZE */
 | 
			
		||||
	if (cb_size < PAGE_SIZE)
 | 
			
		||||
		cb_size = PAGE_SIZE;
 | 
			
		||||
	if (!internal_cb) {
 | 
			
		||||
		/* Minimum allocation must be PAGE SIZE */
 | 
			
		||||
		if (cb_size < PAGE_SIZE)
 | 
			
		||||
			cb_size = PAGE_SIZE;
 | 
			
		||||
 | 
			
		||||
	if (ctx_id == HL_KERNEL_ASID_ID &&
 | 
			
		||||
			cb_size <= hdev->asic_prop.cb_pool_cb_size) {
 | 
			
		||||
		if (ctx_id == HL_KERNEL_ASID_ID &&
 | 
			
		||||
				cb_size <= hdev->asic_prop.cb_pool_cb_size) {
 | 
			
		||||
 | 
			
		||||
		spin_lock(&hdev->cb_pool_lock);
 | 
			
		||||
		if (!list_empty(&hdev->cb_pool)) {
 | 
			
		||||
			cb = list_first_entry(&hdev->cb_pool, typeof(*cb),
 | 
			
		||||
					pool_list);
 | 
			
		||||
			list_del(&cb->pool_list);
 | 
			
		||||
			spin_unlock(&hdev->cb_pool_lock);
 | 
			
		||||
			alloc_new_cb = false;
 | 
			
		||||
		} else {
 | 
			
		||||
			spin_unlock(&hdev->cb_pool_lock);
 | 
			
		||||
			dev_dbg(hdev->dev, "CB pool is empty\n");
 | 
			
		||||
			spin_lock(&hdev->cb_pool_lock);
 | 
			
		||||
			if (!list_empty(&hdev->cb_pool)) {
 | 
			
		||||
				cb = list_first_entry(&hdev->cb_pool,
 | 
			
		||||
						typeof(*cb), pool_list);
 | 
			
		||||
				list_del(&cb->pool_list);
 | 
			
		||||
				spin_unlock(&hdev->cb_pool_lock);
 | 
			
		||||
				alloc_new_cb = false;
 | 
			
		||||
			} else {
 | 
			
		||||
				spin_unlock(&hdev->cb_pool_lock);
 | 
			
		||||
				dev_dbg(hdev->dev, "CB pool is empty\n");
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (alloc_new_cb) {
 | 
			
		||||
		cb = hl_cb_alloc(hdev, cb_size, ctx_id);
 | 
			
		||||
		cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb);
 | 
			
		||||
		if (!cb) {
 | 
			
		||||
			rc = -ENOMEM;
 | 
			
		||||
			goto out_err;
 | 
			
		||||
| 
						 | 
				
			
			@ -229,8 +250,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
 | 
			
		|||
			rc = -EINVAL;
 | 
			
		||||
		} else {
 | 
			
		||||
			rc = hl_cb_create(hdev, &hpriv->cb_mgr,
 | 
			
		||||
						args->in.cb_size, &handle,
 | 
			
		||||
						hpriv->ctx->asid);
 | 
			
		||||
					args->in.cb_size, &handle,
 | 
			
		||||
					hpriv->ctx->asid, false);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		memset(args, 0, sizeof(*args));
 | 
			
		||||
| 
						 | 
				
			
			@ -398,14 +419,15 @@ void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
 | 
			
		|||
	idr_destroy(&mgr->cb_handles);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
 | 
			
		||||
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
 | 
			
		||||
					bool internal_cb)
 | 
			
		||||
{
 | 
			
		||||
	u64 cb_handle;
 | 
			
		||||
	struct hl_cb *cb;
 | 
			
		||||
	int rc;
 | 
			
		||||
 | 
			
		||||
	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
 | 
			
		||||
			HL_KERNEL_ASID_ID);
 | 
			
		||||
			HL_KERNEL_ASID_ID, internal_cb);
 | 
			
		||||
	if (rc) {
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Failed to allocate CB for the kernel driver %d\n", rc);
 | 
			
		||||
| 
						 | 
				
			
			@ -437,7 +459,7 @@ int hl_cb_pool_init(struct hl_device *hdev)
 | 
			
		|||
 | 
			
		||||
	for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
 | 
			
		||||
		cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
 | 
			
		||||
				HL_KERNEL_ASID_ID);
 | 
			
		||||
				HL_KERNEL_ASID_ID, false);
 | 
			
		||||
		if (cb) {
 | 
			
		||||
			cb->is_pool = true;
 | 
			
		||||
			list_add(&cb->pool_list, &hdev->cb_pool);
 | 
			
		||||
| 
						 | 
				
			
			@ -246,6 +246,18 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
 | 
			
		|||
	kfree(job);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
 | 
			
		||||
{
 | 
			
		||||
	hdev->aggregated_cs_counters.device_in_reset_drop_cnt +=
 | 
			
		||||
			ctx->cs_counters.device_in_reset_drop_cnt;
 | 
			
		||||
	hdev->aggregated_cs_counters.out_of_mem_drop_cnt +=
 | 
			
		||||
			ctx->cs_counters.out_of_mem_drop_cnt;
 | 
			
		||||
	hdev->aggregated_cs_counters.parsing_drop_cnt +=
 | 
			
		||||
			ctx->cs_counters.parsing_drop_cnt;
 | 
			
		||||
	hdev->aggregated_cs_counters.queue_full_drop_cnt +=
 | 
			
		||||
			ctx->cs_counters.queue_full_drop_cnt;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void cs_do_release(struct kref *ref)
 | 
			
		||||
{
 | 
			
		||||
	struct hl_cs *cs = container_of(ref, struct hl_cs,
 | 
			
		||||
| 
						 | 
				
			
			@ -349,6 +361,9 @@ static void cs_do_release(struct kref *ref)
 | 
			
		|||
	dma_fence_signal(cs->fence);
 | 
			
		||||
	dma_fence_put(cs->fence);
 | 
			
		||||
 | 
			
		||||
	cs_counters_aggregate(hdev, cs->ctx);
 | 
			
		||||
 | 
			
		||||
	kfree(cs->jobs_in_queue_cnt);
 | 
			
		||||
	kfree(cs);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -373,9 +388,9 @@ static void cs_timedout(struct work_struct *work)
 | 
			
		|||
	hdev = cs->ctx->hdev;
 | 
			
		||||
	ctx_asid = cs->ctx->asid;
 | 
			
		||||
 | 
			
		||||
	/* TODO: add information about last signaled seq and last emitted seq */
 | 
			
		||||
	dev_err(hdev->dev, "User %d command submission %llu got stuck!\n",
 | 
			
		||||
		ctx_asid, cs->sequence);
 | 
			
		||||
	dev_err(hdev->dev,
 | 
			
		||||
		"Command submission %llu has not finished in time!\n",
 | 
			
		||||
		cs->sequence);
 | 
			
		||||
 | 
			
		||||
	cs_put(cs);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -418,21 +433,29 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 | 
			
		|||
	spin_lock(&ctx->cs_lock);
 | 
			
		||||
 | 
			
		||||
	cs_cmpl->cs_seq = ctx->cs_sequence;
 | 
			
		||||
	other = ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)];
 | 
			
		||||
	other = ctx->cs_pending[cs_cmpl->cs_seq &
 | 
			
		||||
				(hdev->asic_prop.max_pending_cs - 1)];
 | 
			
		||||
	if ((other) && (!dma_fence_is_signaled(other))) {
 | 
			
		||||
		spin_unlock(&ctx->cs_lock);
 | 
			
		||||
		dev_dbg(hdev->dev,
 | 
			
		||||
			"Rejecting CS because of too many in-flights CS\n");
 | 
			
		||||
		rc = -EAGAIN;
 | 
			
		||||
		goto free_fence;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
 | 
			
		||||
			sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
 | 
			
		||||
	if (!cs->jobs_in_queue_cnt) {
 | 
			
		||||
		rc = -ENOMEM;
 | 
			
		||||
		goto free_fence;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
 | 
			
		||||
			ctx->asid, ctx->cs_sequence);
 | 
			
		||||
 | 
			
		||||
	cs->sequence = cs_cmpl->cs_seq;
 | 
			
		||||
 | 
			
		||||
	ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)] =
 | 
			
		||||
	ctx->cs_pending[cs_cmpl->cs_seq &
 | 
			
		||||
			(hdev->asic_prop.max_pending_cs - 1)] =
 | 
			
		||||
							&cs_cmpl->base_fence;
 | 
			
		||||
	ctx->cs_sequence++;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -447,6 +470,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 | 
			
		|||
	return 0;
 | 
			
		||||
 | 
			
		||||
free_fence:
 | 
			
		||||
	spin_unlock(&ctx->cs_lock);
 | 
			
		||||
	kfree(cs_cmpl);
 | 
			
		||||
free_cs:
 | 
			
		||||
	kfree(cs);
 | 
			
		||||
| 
						 | 
				
			
			@ -463,10 +487,12 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
 | 
			
		|||
 | 
			
		||||
void hl_cs_rollback_all(struct hl_device *hdev)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
	struct hl_cs *cs, *tmp;
 | 
			
		||||
 | 
			
		||||
	/* flush all completions */
 | 
			
		||||
	flush_workqueue(hdev->cq_wq);
 | 
			
		||||
	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 | 
			
		||||
		flush_workqueue(hdev->cq_wq[i]);
 | 
			
		||||
 | 
			
		||||
	/* Make sure we don't have leftovers in the H/W queues mirror list */
 | 
			
		||||
	list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
 | 
			
		||||
| 
						 | 
				
			
			@ -499,10 +525,18 @@ static int validate_queue_index(struct hl_device *hdev,
 | 
			
		|||
	struct asic_fixed_properties *asic = &hdev->asic_prop;
 | 
			
		||||
	struct hw_queue_properties *hw_queue_prop;
 | 
			
		||||
 | 
			
		||||
	/* This must be checked here to prevent out-of-bounds access to
 | 
			
		||||
	 * hw_queues_props array
 | 
			
		||||
	 */
 | 
			
		||||
	if (chunk->queue_index >= asic->max_queues) {
 | 
			
		||||
		dev_err(hdev->dev, "Queue index %d is invalid\n",
 | 
			
		||||
			chunk->queue_index);
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
 | 
			
		||||
 | 
			
		||||
	if ((chunk->queue_index >= HL_MAX_QUEUES) ||
 | 
			
		||||
			(hw_queue_prop->type == QUEUE_TYPE_NA)) {
 | 
			
		||||
	if (hw_queue_prop->type == QUEUE_TYPE_NA) {
 | 
			
		||||
		dev_err(hdev->dev, "Queue index %d is invalid\n",
 | 
			
		||||
			chunk->queue_index);
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
| 
						 | 
				
			
			@ -630,12 +664,15 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 | 
			
		|||
 | 
			
		||||
		rc = validate_queue_index(hdev, chunk, &queue_type,
 | 
			
		||||
						&is_kernel_allocated_cb);
 | 
			
		||||
		if (rc)
 | 
			
		||||
		if (rc) {
 | 
			
		||||
			hpriv->ctx->cs_counters.parsing_drop_cnt++;
 | 
			
		||||
			goto free_cs_object;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (is_kernel_allocated_cb) {
 | 
			
		||||
			cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
 | 
			
		||||
			if (!cb) {
 | 
			
		||||
				hpriv->ctx->cs_counters.parsing_drop_cnt++;
 | 
			
		||||
				rc = -EINVAL;
 | 
			
		||||
				goto free_cs_object;
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			@ -649,6 +686,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 | 
			
		|||
		job = hl_cs_allocate_job(hdev, queue_type,
 | 
			
		||||
						is_kernel_allocated_cb);
 | 
			
		||||
		if (!job) {
 | 
			
		||||
			hpriv->ctx->cs_counters.out_of_mem_drop_cnt++;
 | 
			
		||||
			dev_err(hdev->dev, "Failed to allocate a new job\n");
 | 
			
		||||
			rc = -ENOMEM;
 | 
			
		||||
			if (is_kernel_allocated_cb)
 | 
			
		||||
| 
						 | 
				
			
			@ -681,6 +719,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 | 
			
		|||
 | 
			
		||||
		rc = cs_parser(hpriv, job);
 | 
			
		||||
		if (rc) {
 | 
			
		||||
			hpriv->ctx->cs_counters.parsing_drop_cnt++;
 | 
			
		||||
			dev_err(hdev->dev,
 | 
			
		||||
				"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
 | 
			
		||||
				cs->ctx->asid, cs->sequence, job->id, rc);
 | 
			
		||||
| 
						 | 
				
			
			@ -689,6 +728,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	if (int_queues_only) {
 | 
			
		||||
		hpriv->ctx->cs_counters.parsing_drop_cnt++;
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Reject CS %d.%llu because only internal queues jobs are present\n",
 | 
			
		||||
			cs->ctx->asid, cs->sequence);
 | 
			
		||||
| 
						 | 
				
			
			@ -738,6 +778,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 | 
			
		|||
	struct hl_cs_job *job;
 | 
			
		||||
	struct hl_cs *cs;
 | 
			
		||||
	struct hl_cb *cb;
 | 
			
		||||
	enum hl_queue_type q_type;
 | 
			
		||||
	u64 *signal_seq_arr = NULL, signal_seq;
 | 
			
		||||
	u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
 | 
			
		||||
	int rc;
 | 
			
		||||
| 
						 | 
				
			
			@ -770,9 +811,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 | 
			
		|||
	chunk = &cs_chunk_array[0];
 | 
			
		||||
	q_idx = chunk->queue_index;
 | 
			
		||||
	hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
 | 
			
		||||
	q_type = hw_queue_prop->type;
 | 
			
		||||
 | 
			
		||||
	if ((q_idx >= HL_MAX_QUEUES) ||
 | 
			
		||||
			(hw_queue_prop->type != QUEUE_TYPE_EXT)) {
 | 
			
		||||
	if ((q_idx >= hdev->asic_prop.max_queues) ||
 | 
			
		||||
			(!hw_queue_prop->supports_sync_stream)) {
 | 
			
		||||
		dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
 | 
			
		||||
		rc = -EINVAL;
 | 
			
		||||
		goto free_cs_chunk_array;
 | 
			
		||||
| 
						 | 
				
			
			@ -869,25 +911,28 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 | 
			
		|||
 | 
			
		||||
	*cs_seq = cs->sequence;
 | 
			
		||||
 | 
			
		||||
	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
 | 
			
		||||
	job = hl_cs_allocate_job(hdev, q_type, true);
 | 
			
		||||
	if (!job) {
 | 
			
		||||
		ctx->cs_counters.out_of_mem_drop_cnt++;
 | 
			
		||||
		dev_err(hdev->dev, "Failed to allocate a new job\n");
 | 
			
		||||
		rc = -ENOMEM;
 | 
			
		||||
		goto put_cs;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
 | 
			
		||||
	if (!cb) {
 | 
			
		||||
		kfree(job);
 | 
			
		||||
		rc = -EFAULT;
 | 
			
		||||
		goto put_cs;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (cs->type == CS_TYPE_WAIT)
 | 
			
		||||
		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
 | 
			
		||||
	else
 | 
			
		||||
		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
 | 
			
		||||
 | 
			
		||||
	cb = hl_cb_kernel_create(hdev, cb_size,
 | 
			
		||||
				q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
 | 
			
		||||
	if (!cb) {
 | 
			
		||||
		ctx->cs_counters.out_of_mem_drop_cnt++;
 | 
			
		||||
		kfree(job);
 | 
			
		||||
		rc = -EFAULT;
 | 
			
		||||
		goto put_cs;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	job->id = 0;
 | 
			
		||||
	job->cs = cs;
 | 
			
		||||
	job->user_cb = cb;
 | 
			
		||||
| 
						 | 
				
			
			@ -1126,7 +1171,7 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
 | 
			
		|||
		rc = PTR_ERR(fence);
 | 
			
		||||
		if (rc == -EINVAL)
 | 
			
		||||
			dev_notice_ratelimited(hdev->dev,
 | 
			
		||||
				"Can't wait on seq %llu because current CS is at seq %llu\n",
 | 
			
		||||
				"Can't wait on CS %llu because current CS is at seq %llu\n",
 | 
			
		||||
				seq, ctx->cs_sequence);
 | 
			
		||||
	} else if (fence) {
 | 
			
		||||
		rc = dma_fence_wait_timeout(fence, true, timeout);
 | 
			
		||||
| 
						 | 
				
			
			@ -1159,15 +1204,21 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 | 
			
		|||
	memset(args, 0, sizeof(*args));
 | 
			
		||||
 | 
			
		||||
	if (rc < 0) {
 | 
			
		||||
		dev_err_ratelimited(hdev->dev,
 | 
			
		||||
				"Error %ld on waiting for CS handle %llu\n",
 | 
			
		||||
				rc, seq);
 | 
			
		||||
		if (rc == -ERESTARTSYS) {
 | 
			
		||||
			dev_err_ratelimited(hdev->dev,
 | 
			
		||||
				"user process got signal while waiting for CS handle %llu\n",
 | 
			
		||||
				seq);
 | 
			
		||||
			args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
 | 
			
		||||
			rc = -EINTR;
 | 
			
		||||
		} else if (rc == -ETIMEDOUT) {
 | 
			
		||||
			dev_err_ratelimited(hdev->dev,
 | 
			
		||||
				"CS %llu has timed-out while user process is waiting for it\n",
 | 
			
		||||
				seq);
 | 
			
		||||
			args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
 | 
			
		||||
		} else if (rc == -EIO) {
 | 
			
		||||
			dev_err_ratelimited(hdev->dev,
 | 
			
		||||
				"CS %llu has been aborted while user process is waiting for it\n",
 | 
			
		||||
				seq);
 | 
			
		||||
			args->out.status = HL_WAIT_CS_STATUS_ABORTED;
 | 
			
		||||
		}
 | 
			
		||||
		return rc;
 | 
			
		||||
| 
						 | 
				
			
			@ -22,9 +22,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 | 
			
		|||
	 * to this function unless the ref count is 0
 | 
			
		||||
	 */
 | 
			
		||||
 | 
			
		||||
	for (i = 0 ; i < HL_MAX_PENDING_CS ; i++)
 | 
			
		||||
	for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
 | 
			
		||||
		dma_fence_put(ctx->cs_pending[i]);
 | 
			
		||||
 | 
			
		||||
	kfree(ctx->cs_pending);
 | 
			
		||||
 | 
			
		||||
	if (ctx->asid != HL_KERNEL_ASID_ID) {
 | 
			
		||||
		/* The engines are stopped as there is no executing CS, but the
 | 
			
		||||
		 * Coresight might be still working by accessing addresses
 | 
			
		||||
| 
						 | 
				
			
			@ -110,8 +112,7 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
 | 
			
		|||
		return;
 | 
			
		||||
 | 
			
		||||
	dev_warn(hdev->dev,
 | 
			
		||||
		"Context %d closed or terminated but its CS are executing\n",
 | 
			
		||||
		ctx->asid);
 | 
			
		||||
		"user process released device but its command submissions are still executing\n");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 | 
			
		||||
| 
						 | 
				
			
			@ -126,34 +127,49 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 | 
			
		|||
	spin_lock_init(&ctx->cs_lock);
 | 
			
		||||
	atomic_set(&ctx->thread_ctx_switch_token, 1);
 | 
			
		||||
	ctx->thread_ctx_switch_wait_token = 0;
 | 
			
		||||
	ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
 | 
			
		||||
				sizeof(struct dma_fence *),
 | 
			
		||||
				GFP_KERNEL);
 | 
			
		||||
	if (!ctx->cs_pending)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	if (is_kernel_ctx) {
 | 
			
		||||
		ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
 | 
			
		||||
		rc = hl_mmu_ctx_init(ctx);
 | 
			
		||||
		if (rc) {
 | 
			
		||||
			dev_err(hdev->dev, "Failed to init mmu ctx module\n");
 | 
			
		||||
			goto mem_ctx_err;
 | 
			
		||||
			goto err_free_cs_pending;
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		ctx->asid = hl_asid_alloc(hdev);
 | 
			
		||||
		if (!ctx->asid) {
 | 
			
		||||
			dev_err(hdev->dev, "No free ASID, failed to create context\n");
 | 
			
		||||
			return -ENOMEM;
 | 
			
		||||
			rc = -ENOMEM;
 | 
			
		||||
			goto err_free_cs_pending;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		rc = hl_vm_ctx_init(ctx);
 | 
			
		||||
		if (rc) {
 | 
			
		||||
			dev_err(hdev->dev, "Failed to init mem ctx module\n");
 | 
			
		||||
			rc = -ENOMEM;
 | 
			
		||||
			goto mem_ctx_err;
 | 
			
		||||
			goto err_asid_free;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		rc = hdev->asic_funcs->ctx_init(ctx);
 | 
			
		||||
		if (rc) {
 | 
			
		||||
			dev_err(hdev->dev, "ctx_init failed\n");
 | 
			
		||||
			goto err_vm_ctx_fini;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
 | 
			
		||||
mem_ctx_err:
 | 
			
		||||
	if (ctx->asid != HL_KERNEL_ASID_ID)
 | 
			
		||||
		hl_asid_free(hdev, ctx->asid);
 | 
			
		||||
err_vm_ctx_fini:
 | 
			
		||||
	hl_vm_ctx_fini(ctx);
 | 
			
		||||
err_asid_free:
 | 
			
		||||
	hl_asid_free(hdev, ctx->asid);
 | 
			
		||||
err_free_cs_pending:
 | 
			
		||||
	kfree(ctx->cs_pending);
 | 
			
		||||
 | 
			
		||||
	return rc;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -170,6 +186,7 @@ int hl_ctx_put(struct hl_ctx *ctx)
 | 
			
		|||
 | 
			
		||||
struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
 | 
			
		||||
{
 | 
			
		||||
	struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
 | 
			
		||||
	struct dma_fence *fence;
 | 
			
		||||
 | 
			
		||||
	spin_lock(&ctx->cs_lock);
 | 
			
		||||
| 
						 | 
				
			
			@ -179,13 +196,13 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
 | 
			
		|||
		return ERR_PTR(-EINVAL);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (seq + HL_MAX_PENDING_CS < ctx->cs_sequence) {
 | 
			
		||||
	if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
 | 
			
		||||
		spin_unlock(&ctx->cs_lock);
 | 
			
		||||
		return NULL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	fence = dma_fence_get(
 | 
			
		||||
			ctx->cs_pending[seq & (HL_MAX_PENDING_CS - 1)]);
 | 
			
		||||
			ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
 | 
			
		||||
	spin_unlock(&ctx->cs_lock);
 | 
			
		||||
 | 
			
		||||
	return fence;
 | 
			
		||||
| 
						 | 
				
			
			@ -249,7 +249,8 @@ static void device_cdev_sysfs_del(struct hl_device *hdev)
 | 
			
		|||
 */
 | 
			
		||||
static int device_early_init(struct hl_device *hdev)
 | 
			
		||||
{
 | 
			
		||||
	int rc;
 | 
			
		||||
	int i, rc;
 | 
			
		||||
	char workq_name[32];
 | 
			
		||||
 | 
			
		||||
	switch (hdev->asic_type) {
 | 
			
		||||
	case ASIC_GOYA:
 | 
			
		||||
| 
						 | 
				
			
			@ -274,11 +275,24 @@ static int device_early_init(struct hl_device *hdev)
 | 
			
		|||
	if (rc)
 | 
			
		||||
		goto early_fini;
 | 
			
		||||
 | 
			
		||||
	hdev->cq_wq = alloc_workqueue("hl-free-jobs", WQ_UNBOUND, 0);
 | 
			
		||||
	if (hdev->cq_wq == NULL) {
 | 
			
		||||
		dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
 | 
			
		||||
		rc = -ENOMEM;
 | 
			
		||||
		goto asid_fini;
 | 
			
		||||
	if (hdev->asic_prop.completion_queues_count) {
 | 
			
		||||
		hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
 | 
			
		||||
				sizeof(*hdev->cq_wq),
 | 
			
		||||
				GFP_ATOMIC);
 | 
			
		||||
		if (!hdev->cq_wq) {
 | 
			
		||||
			rc = -ENOMEM;
 | 
			
		||||
			goto asid_fini;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
 | 
			
		||||
		snprintf(workq_name, 32, "hl-free-jobs-%u", i);
 | 
			
		||||
		hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
 | 
			
		||||
		if (hdev->cq_wq == NULL) {
 | 
			
		||||
			dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
 | 
			
		||||
			rc = -ENOMEM;
 | 
			
		||||
			goto free_cq_wq;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
 | 
			
		||||
| 
						 | 
				
			
			@ -321,7 +335,10 @@ static int device_early_init(struct hl_device *hdev)
 | 
			
		|||
free_eq_wq:
 | 
			
		||||
	destroy_workqueue(hdev->eq_wq);
 | 
			
		||||
free_cq_wq:
 | 
			
		||||
	destroy_workqueue(hdev->cq_wq);
 | 
			
		||||
	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 | 
			
		||||
		if (hdev->cq_wq[i])
 | 
			
		||||
			destroy_workqueue(hdev->cq_wq[i]);
 | 
			
		||||
	kfree(hdev->cq_wq);
 | 
			
		||||
asid_fini:
 | 
			
		||||
	hl_asid_fini(hdev);
 | 
			
		||||
early_fini:
 | 
			
		||||
| 
						 | 
				
			
			@ -339,6 +356,8 @@ static int device_early_init(struct hl_device *hdev)
 | 
			
		|||
 */
 | 
			
		||||
static void device_early_fini(struct hl_device *hdev)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	mutex_destroy(&hdev->mmu_cache_lock);
 | 
			
		||||
	mutex_destroy(&hdev->debug_lock);
 | 
			
		||||
	mutex_destroy(&hdev->send_cpu_message_lock);
 | 
			
		||||
| 
						 | 
				
			
			@ -351,7 +370,10 @@ static void device_early_fini(struct hl_device *hdev)
 | 
			
		|||
	kfree(hdev->hl_chip_info);
 | 
			
		||||
 | 
			
		||||
	destroy_workqueue(hdev->eq_wq);
 | 
			
		||||
	destroy_workqueue(hdev->cq_wq);
 | 
			
		||||
 | 
			
		||||
	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 | 
			
		||||
		destroy_workqueue(hdev->cq_wq[i]);
 | 
			
		||||
	kfree(hdev->cq_wq);
 | 
			
		||||
 | 
			
		||||
	hl_asid_fini(hdev);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -838,6 +860,22 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
 | 
			
		|||
		if (rc)
 | 
			
		||||
			return 0;
 | 
			
		||||
 | 
			
		||||
		if (hard_reset) {
 | 
			
		||||
			/* Disable PCI access from device F/W so he won't send
 | 
			
		||||
			 * us additional interrupts. We disable MSI/MSI-X at
 | 
			
		||||
			 * the halt_engines function and we can't have the F/W
 | 
			
		||||
			 * sending us interrupts after that. We need to disable
 | 
			
		||||
			 * the access here because if the device is marked
 | 
			
		||||
			 * disable, the message won't be send. Also, in case
 | 
			
		||||
			 * of heartbeat, the device CPU is marked as disable
 | 
			
		||||
			 * so this message won't be sent
 | 
			
		||||
			 */
 | 
			
		||||
			if (hl_fw_send_pci_access_msg(hdev,
 | 
			
		||||
					ARMCP_PACKET_DISABLE_PCI_ACCESS))
 | 
			
		||||
				dev_warn(hdev->dev,
 | 
			
		||||
					"Failed to disable PCI access by F/W\n");
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/* This also blocks future CS/VM/JOB completion operations */
 | 
			
		||||
		hdev->disabled = true;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -995,6 +1033,12 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
 | 
			
		|||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Device is now enabled as part of the initialization requires
 | 
			
		||||
	 * communication with the device firmware to get information that
 | 
			
		||||
	 * is required for the initialization itself
 | 
			
		||||
	 */
 | 
			
		||||
	hdev->disabled = false;
 | 
			
		||||
 | 
			
		||||
	rc = hdev->asic_funcs->hw_init(hdev);
 | 
			
		||||
	if (rc) {
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
| 
						 | 
				
			
			@ -1002,8 +1046,6 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
 | 
			
		|||
		goto out_err;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	hdev->disabled = false;
 | 
			
		||||
 | 
			
		||||
	/* Check that the communication with the device is working */
 | 
			
		||||
	rc = hdev->asic_funcs->test_queues(hdev);
 | 
			
		||||
	if (rc) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1144,14 +1186,17 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 | 
			
		|||
	 * because there the addresses of the completion queues are being
 | 
			
		||||
	 * passed as arguments to request_irq
 | 
			
		||||
	 */
 | 
			
		||||
	hdev->completion_queue = kcalloc(cq_cnt,
 | 
			
		||||
						sizeof(*hdev->completion_queue),
 | 
			
		||||
						GFP_KERNEL);
 | 
			
		||||
	if (cq_cnt) {
 | 
			
		||||
		hdev->completion_queue = kcalloc(cq_cnt,
 | 
			
		||||
				sizeof(*hdev->completion_queue),
 | 
			
		||||
				GFP_KERNEL);
 | 
			
		||||
 | 
			
		||||
	if (!hdev->completion_queue) {
 | 
			
		||||
		dev_err(hdev->dev, "failed to allocate completion queues\n");
 | 
			
		||||
		rc = -ENOMEM;
 | 
			
		||||
		goto hw_queues_destroy;
 | 
			
		||||
		if (!hdev->completion_queue) {
 | 
			
		||||
			dev_err(hdev->dev,
 | 
			
		||||
				"failed to allocate completion queues\n");
 | 
			
		||||
			rc = -ENOMEM;
 | 
			
		||||
			goto hw_queues_destroy;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1162,6 +1207,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 | 
			
		|||
				"failed to initialize completion queue\n");
 | 
			
		||||
			goto cq_fini;
 | 
			
		||||
		}
 | 
			
		||||
		hdev->completion_queue[i].cq_idx = i;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -1219,6 +1265,12 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 | 
			
		|||
	 */
 | 
			
		||||
	add_cdev_sysfs_on_err = true;
 | 
			
		||||
 | 
			
		||||
	/* Device is now enabled as part of the initialization requires
 | 
			
		||||
	 * communication with the device firmware to get information that
 | 
			
		||||
	 * is required for the initialization itself
 | 
			
		||||
	 */
 | 
			
		||||
	hdev->disabled = false;
 | 
			
		||||
 | 
			
		||||
	rc = hdev->asic_funcs->hw_init(hdev);
 | 
			
		||||
	if (rc) {
 | 
			
		||||
		dev_err(hdev->dev, "failed to initialize the H/W\n");
 | 
			
		||||
| 
						 | 
				
			
			@ -1226,8 +1278,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 | 
			
		|||
		goto out_disabled;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	hdev->disabled = false;
 | 
			
		||||
 | 
			
		||||
	/* Check that the communication with the device is working */
 | 
			
		||||
	rc = hdev->asic_funcs->test_queues(hdev);
 | 
			
		||||
	if (rc) {
 | 
			
		||||
| 
						 | 
				
			
			@ -6,7 +6,7 @@
 | 
			
		|||
 */
 | 
			
		||||
 | 
			
		||||
#include "habanalabs.h"
 | 
			
		||||
#include "include/hl_boot_if.h"
 | 
			
		||||
#include "include/common/hl_boot_if.h"
 | 
			
		||||
 | 
			
		||||
#include <linux/firmware.h>
 | 
			
		||||
#include <linux/genalloc.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -289,7 +289,7 @@ int hl_fw_armcp_info_get(struct hl_device *hdev)
 | 
			
		|||
					HL_ARMCP_INFO_TIMEOUT_USEC, &result);
 | 
			
		||||
	if (rc) {
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Failed to send ArmCP info pkt, error %d\n", rc);
 | 
			
		||||
			"Failed to handle ArmCP info pkt, error %d\n", rc);
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -340,7 +340,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
 | 
			
		|||
 | 
			
		||||
	if (rc) {
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Failed to send ArmCP EEPROM packet, error %d\n", rc);
 | 
			
		||||
			"Failed to handle ArmCP EEPROM packet, error %d\n", rc);
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -393,6 +393,53 @@ static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
 | 
			
		|||
			"Device boot error - NIC F/W initialization failed\n");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 | 
			
		||||
{
 | 
			
		||||
	switch (status) {
 | 
			
		||||
	case CPU_BOOT_STATUS_NA:
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Device boot error - BTL did NOT run\n");
 | 
			
		||||
		break;
 | 
			
		||||
	case CPU_BOOT_STATUS_IN_WFE:
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Device boot error - Stuck inside WFE loop\n");
 | 
			
		||||
		break;
 | 
			
		||||
	case CPU_BOOT_STATUS_IN_BTL:
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Device boot error - Stuck in BTL\n");
 | 
			
		||||
		break;
 | 
			
		||||
	case CPU_BOOT_STATUS_IN_PREBOOT:
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Device boot error - Stuck in Preboot\n");
 | 
			
		||||
		break;
 | 
			
		||||
	case CPU_BOOT_STATUS_IN_SPL:
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Device boot error - Stuck in SPL\n");
 | 
			
		||||
		break;
 | 
			
		||||
	case CPU_BOOT_STATUS_IN_UBOOT:
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Device boot error - Stuck in u-boot\n");
 | 
			
		||||
		break;
 | 
			
		||||
	case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Device boot error - DRAM initialization failed\n");
 | 
			
		||||
		break;
 | 
			
		||||
	case CPU_BOOT_STATUS_UBOOT_NOT_READY:
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Device boot error - u-boot stopped by user\n");
 | 
			
		||||
		break;
 | 
			
		||||
	case CPU_BOOT_STATUS_TS_INIT_FAIL:
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Device boot error - Thermal Sensor initialization failed\n");
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Device boot error - Invalid status code %d\n",
 | 
			
		||||
			status);
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 | 
			
		||||
			u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
 | 
			
		||||
			u32 boot_err0_reg, bool skip_bmc,
 | 
			
		||||
| 
						 | 
				
			
			@ -466,50 +513,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 | 
			
		|||
	 * versions but we keep them here for backward compatibility
 | 
			
		||||
	 */
 | 
			
		||||
	if (rc) {
 | 
			
		||||
		switch (status) {
 | 
			
		||||
		case CPU_BOOT_STATUS_NA:
 | 
			
		||||
			dev_err(hdev->dev,
 | 
			
		||||
				"Device boot error - BTL did NOT run\n");
 | 
			
		||||
			break;
 | 
			
		||||
		case CPU_BOOT_STATUS_IN_WFE:
 | 
			
		||||
			dev_err(hdev->dev,
 | 
			
		||||
				"Device boot error - Stuck inside WFE loop\n");
 | 
			
		||||
			break;
 | 
			
		||||
		case CPU_BOOT_STATUS_IN_BTL:
 | 
			
		||||
			dev_err(hdev->dev,
 | 
			
		||||
				"Device boot error - Stuck in BTL\n");
 | 
			
		||||
			break;
 | 
			
		||||
		case CPU_BOOT_STATUS_IN_PREBOOT:
 | 
			
		||||
			dev_err(hdev->dev,
 | 
			
		||||
				"Device boot error - Stuck in Preboot\n");
 | 
			
		||||
			break;
 | 
			
		||||
		case CPU_BOOT_STATUS_IN_SPL:
 | 
			
		||||
			dev_err(hdev->dev,
 | 
			
		||||
				"Device boot error - Stuck in SPL\n");
 | 
			
		||||
			break;
 | 
			
		||||
		case CPU_BOOT_STATUS_IN_UBOOT:
 | 
			
		||||
			dev_err(hdev->dev,
 | 
			
		||||
				"Device boot error - Stuck in u-boot\n");
 | 
			
		||||
			break;
 | 
			
		||||
		case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
 | 
			
		||||
			dev_err(hdev->dev,
 | 
			
		||||
				"Device boot error - DRAM initialization failed\n");
 | 
			
		||||
			break;
 | 
			
		||||
		case CPU_BOOT_STATUS_UBOOT_NOT_READY:
 | 
			
		||||
			dev_err(hdev->dev,
 | 
			
		||||
				"Device boot error - u-boot stopped by user\n");
 | 
			
		||||
			break;
 | 
			
		||||
		case CPU_BOOT_STATUS_TS_INIT_FAIL:
 | 
			
		||||
			dev_err(hdev->dev,
 | 
			
		||||
				"Device boot error - Thermal Sensor initialization failed\n");
 | 
			
		||||
			break;
 | 
			
		||||
		default:
 | 
			
		||||
			dev_err(hdev->dev,
 | 
			
		||||
				"Device boot error - Invalid status code %d\n",
 | 
			
		||||
				status);
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		hl_detect_cpu_boot_status(hdev, status);
 | 
			
		||||
		rc = -EIO;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -569,7 +573,8 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 | 
			
		|||
				"Device reports FIT image is corrupted\n");
 | 
			
		||||
		else
 | 
			
		||||
			dev_err(hdev->dev,
 | 
			
		||||
				"Device failed to load, %d\n", status);
 | 
			
		||||
				"Failed to load firmware to device, %d\n",
 | 
			
		||||
				status);
 | 
			
		||||
 | 
			
		||||
		rc = -EIO;
 | 
			
		||||
		goto out;
 | 
			
		||||
| 
						 | 
				
			
			@ -8,8 +8,9 @@
 | 
			
		|||
#ifndef HABANALABSP_H_
 | 
			
		||||
#define HABANALABSP_H_
 | 
			
		||||
 | 
			
		||||
#include "include/armcp_if.h"
 | 
			
		||||
#include "include/qman_if.h"
 | 
			
		||||
#include "include/common/armcp_if.h"
 | 
			
		||||
#include "include/common/qman_if.h"
 | 
			
		||||
#include <uapi/misc/habanalabs.h>
 | 
			
		||||
 | 
			
		||||
#include <linux/cdev.h>
 | 
			
		||||
#include <linux/iopoll.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -40,11 +41,6 @@
 | 
			
		|||
 | 
			
		||||
#define HL_SIM_MAX_TIMEOUT_US		10000000 /* 10s */
 | 
			
		||||
 | 
			
		||||
#define HL_MAX_QUEUES			128
 | 
			
		||||
 | 
			
		||||
/* MUST BE POWER OF 2 and larger than 1 */
 | 
			
		||||
#define HL_MAX_PENDING_CS		64
 | 
			
		||||
 | 
			
		||||
#define HL_IDLE_BUSY_TS_ARR_SIZE	4096
 | 
			
		||||
 | 
			
		||||
/* Memory */
 | 
			
		||||
| 
						 | 
				
			
			@ -53,6 +49,10 @@
 | 
			
		|||
/* MMU */
 | 
			
		||||
#define MMU_HASH_TABLE_BITS		7 /* 1 << 7 buckets */
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
 | 
			
		||||
 * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
 | 
			
		||||
 */
 | 
			
		||||
#define HL_RSVD_SOBS			4
 | 
			
		||||
#define HL_RSVD_MONS			2
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -61,6 +61,11 @@
 | 
			
		|||
 | 
			
		||||
#define HL_MAX_SOB_VAL			(1 << 15)
 | 
			
		||||
 | 
			
		||||
#define IS_POWER_OF_2(n)		(n != 0 && ((n & (n - 1)) == 0))
 | 
			
		||||
#define IS_MAX_PENDING_CS_VALID(n)	(IS_POWER_OF_2(n) && (n > 1))
 | 
			
		||||
 | 
			
		||||
#define HL_PCI_NUM_BARS			6
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * struct pgt_info - MMU hop page info.
 | 
			
		||||
 * @node: hash linked-list node for the pgts shadow hash of pgts.
 | 
			
		||||
| 
						 | 
				
			
			@ -85,6 +90,16 @@ struct pgt_info {
 | 
			
		|||
struct hl_device;
 | 
			
		||||
struct hl_fpriv;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * enum hl_pci_match_mode - pci match mode per region
 | 
			
		||||
 * @PCI_ADDRESS_MATCH_MODE: address match mode
 | 
			
		||||
 * @PCI_BAR_MATCH_MODE: bar match mode
 | 
			
		||||
 */
 | 
			
		||||
enum hl_pci_match_mode {
 | 
			
		||||
	PCI_ADDRESS_MATCH_MODE,
 | 
			
		||||
	PCI_BAR_MATCH_MODE
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * enum hl_fw_component - F/W components to read version through registers.
 | 
			
		||||
 * @FW_COMP_UBOOT: u-boot.
 | 
			
		||||
| 
						 | 
				
			
			@ -120,6 +135,32 @@ enum hl_cs_type {
 | 
			
		|||
	CS_TYPE_WAIT
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * struct hl_inbound_pci_region - inbound region descriptor
 | 
			
		||||
 * @mode: pci match mode for this region
 | 
			
		||||
 * @addr: region target address
 | 
			
		||||
 * @size: region size in bytes
 | 
			
		||||
 * @offset_in_bar: offset within bar (address match mode)
 | 
			
		||||
 * @bar: bar id
 | 
			
		||||
 */
 | 
			
		||||
struct hl_inbound_pci_region {
 | 
			
		||||
	enum hl_pci_match_mode	mode;
 | 
			
		||||
	u64			addr;
 | 
			
		||||
	u64			size;
 | 
			
		||||
	u64			offset_in_bar;
 | 
			
		||||
	u8			bar;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * struct hl_outbound_pci_region - outbound region descriptor
 | 
			
		||||
 * @addr: region target address
 | 
			
		||||
 * @size: region size in bytes
 | 
			
		||||
 */
 | 
			
		||||
struct hl_outbound_pci_region {
 | 
			
		||||
	u64	addr;
 | 
			
		||||
	u64	size;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * struct hl_hw_sob - H/W SOB info.
 | 
			
		||||
 * @hdev: habanalabs device structure.
 | 
			
		||||
| 
						 | 
				
			
			@ -141,11 +182,13 @@ struct hl_hw_sob {
 | 
			
		|||
 *               false otherwise.
 | 
			
		||||
 * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
 | 
			
		||||
 *                      queue, false otherwise (a CB address must be provided).
 | 
			
		||||
 * @supports_sync_stream: True if queue supports sync stream
 | 
			
		||||
 */
 | 
			
		||||
struct hw_queue_properties {
 | 
			
		||||
	enum hl_queue_type	type;
 | 
			
		||||
	u8			driver_only;
 | 
			
		||||
	u8			requires_kernel_cb;
 | 
			
		||||
	u8			supports_sync_stream;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			@ -241,14 +284,19 @@ struct hl_mmu_properties {
 | 
			
		|||
 * @psoc_pci_pll_nf: PCI PLL NF value.
 | 
			
		||||
 * @psoc_pci_pll_od: PCI PLL OD value.
 | 
			
		||||
 * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
 | 
			
		||||
 * @psoc_timestamp_frequency: frequency of the psoc timestamp clock.
 | 
			
		||||
 * @high_pll: high PLL frequency used by the device.
 | 
			
		||||
 * @cb_pool_cb_cnt: number of CBs in the CB pool.
 | 
			
		||||
 * @cb_pool_cb_size: size of each CB in the CB pool.
 | 
			
		||||
 * @max_pending_cs: maximum of concurrent pending command submissions
 | 
			
		||||
 * @max_queues: maximum amount of queues in the system
 | 
			
		||||
 * @sync_stream_first_sob: first sync object available for sync stream use
 | 
			
		||||
 * @sync_stream_first_mon: first monitor available for sync stream use
 | 
			
		||||
 * @tpc_enabled_mask: which TPCs are enabled.
 | 
			
		||||
 * @completion_queues_count: number of completion queues.
 | 
			
		||||
 */
 | 
			
		||||
struct asic_fixed_properties {
 | 
			
		||||
	struct hw_queue_properties	hw_queues_props[HL_MAX_QUEUES];
 | 
			
		||||
	struct hw_queue_properties	*hw_queues_props;
 | 
			
		||||
	struct armcp_info		armcp_info;
 | 
			
		||||
	char				uboot_ver[VERSION_MAX_LEN];
 | 
			
		||||
	char				preboot_ver[VERSION_MAX_LEN];
 | 
			
		||||
| 
						 | 
				
			
			@ -282,9 +330,14 @@ struct asic_fixed_properties {
 | 
			
		|||
	u32				psoc_pci_pll_nf;
 | 
			
		||||
	u32				psoc_pci_pll_od;
 | 
			
		||||
	u32				psoc_pci_pll_div_factor;
 | 
			
		||||
	u32				psoc_timestamp_frequency;
 | 
			
		||||
	u32				high_pll;
 | 
			
		||||
	u32				cb_pool_cb_cnt;
 | 
			
		||||
	u32				cb_pool_cb_size;
 | 
			
		||||
	u32				max_pending_cs;
 | 
			
		||||
	u32				max_queues;
 | 
			
		||||
	u16				sync_stream_first_sob;
 | 
			
		||||
	u16				sync_stream_first_mon;
 | 
			
		||||
	u8				tpc_enabled_mask;
 | 
			
		||||
	u8				completion_queues_count;
 | 
			
		||||
};
 | 
			
		||||
| 
						 | 
				
			
			@ -339,6 +392,7 @@ struct hl_cb_mgr {
 | 
			
		|||
 * @ctx_id: holds the ID of the owner's context.
 | 
			
		||||
 * @mmap: true if the CB is currently mmaped to user.
 | 
			
		||||
 * @is_pool: true if CB was acquired from the pool, false otherwise.
 | 
			
		||||
 * @is_internal: internaly allocated
 | 
			
		||||
 */
 | 
			
		||||
struct hl_cb {
 | 
			
		||||
	struct kref		refcount;
 | 
			
		||||
| 
						 | 
				
			
			@ -355,6 +409,7 @@ struct hl_cb {
 | 
			
		|||
	u32			ctx_id;
 | 
			
		||||
	u8			mmap;
 | 
			
		||||
	u8			is_pool;
 | 
			
		||||
	u8			is_internal;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -364,38 +419,19 @@ struct hl_cb {
 | 
			
		|||
 | 
			
		||||
struct hl_cs_job;
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Currently, there are two limitations on the maximum length of a queue:
 | 
			
		||||
 *
 | 
			
		||||
 * 1. The memory footprint of the queue. The current allocated space for the
 | 
			
		||||
 *    queue is PAGE_SIZE. Because each entry in the queue is HL_BD_SIZE,
 | 
			
		||||
 *    the maximum length of the queue can be PAGE_SIZE / HL_BD_SIZE,
 | 
			
		||||
 *    which currently is 4096/16 = 256 entries.
 | 
			
		||||
 *
 | 
			
		||||
 *    To increase that, we need either to decrease the size of the
 | 
			
		||||
 *    BD (difficult), or allocate more than a single page (easier).
 | 
			
		||||
 *
 | 
			
		||||
 * 2. Because the size of the JOB handle field in the BD CTL / completion queue
 | 
			
		||||
 *    is 10-bit, we can have up to 1024 open jobs per hardware queue.
 | 
			
		||||
 *    Therefore, each queue can hold up to 1024 entries.
 | 
			
		||||
 *
 | 
			
		||||
 * HL_QUEUE_LENGTH is in units of struct hl_bd.
 | 
			
		||||
 * HL_QUEUE_LENGTH * sizeof(struct hl_bd) should be <= HL_PAGE_SIZE
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#define HL_PAGE_SIZE			4096 /* minimum page size */
 | 
			
		||||
/* Must be power of 2 (HL_PAGE_SIZE / HL_BD_SIZE) */
 | 
			
		||||
#define HL_QUEUE_LENGTH			256
 | 
			
		||||
/* Queue length of external and HW queues */
 | 
			
		||||
#define HL_QUEUE_LENGTH			4096
 | 
			
		||||
#define HL_QUEUE_SIZE_IN_BYTES		(HL_QUEUE_LENGTH * HL_BD_SIZE)
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * HL_CQ_LENGTH is in units of struct hl_cq_entry.
 | 
			
		||||
 * HL_CQ_LENGTH should be <= HL_PAGE_SIZE
 | 
			
		||||
 */
 | 
			
		||||
#if (HL_MAX_JOBS_PER_CS > HL_QUEUE_LENGTH)
 | 
			
		||||
#error "HL_QUEUE_LENGTH must be greater than HL_MAX_JOBS_PER_CS"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* HL_CQ_LENGTH is in units of struct hl_cq_entry */
 | 
			
		||||
#define HL_CQ_LENGTH			HL_QUEUE_LENGTH
 | 
			
		||||
#define HL_CQ_SIZE_IN_BYTES		(HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
 | 
			
		||||
 | 
			
		||||
/* Must be power of 2 (HL_PAGE_SIZE / HL_EQ_ENTRY_SIZE) */
 | 
			
		||||
/* Must be power of 2 */
 | 
			
		||||
#define HL_EQ_LENGTH			64
 | 
			
		||||
#define HL_EQ_SIZE_IN_BYTES		(HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -422,6 +458,7 @@ struct hl_cs_job;
 | 
			
		|||
 *         exist).
 | 
			
		||||
 * @curr_sob_offset: the id offset to the currently used SOB from the
 | 
			
		||||
 *                   HL_RSVD_SOBS that are being used by this queue.
 | 
			
		||||
 * @supports_sync_stream: True if queue supports sync stream
 | 
			
		||||
 */
 | 
			
		||||
struct hl_hw_queue {
 | 
			
		||||
	struct hl_hw_sob	hw_sob[HL_RSVD_SOBS];
 | 
			
		||||
| 
						 | 
				
			
			@ -430,7 +467,7 @@ struct hl_hw_queue {
 | 
			
		|||
	u64			kernel_address;
 | 
			
		||||
	dma_addr_t		bus_address;
 | 
			
		||||
	u32			pi;
 | 
			
		||||
	u32			ci;
 | 
			
		||||
	atomic_t		ci;
 | 
			
		||||
	u32			hw_queue_id;
 | 
			
		||||
	u32			cq_id;
 | 
			
		||||
	u32			msi_vec;
 | 
			
		||||
| 
						 | 
				
			
			@ -440,6 +477,7 @@ struct hl_hw_queue {
 | 
			
		|||
	u16			base_mon_id;
 | 
			
		||||
	u8			valid;
 | 
			
		||||
	u8			curr_sob_offset;
 | 
			
		||||
	u8			supports_sync_stream;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			@ -447,6 +485,7 @@ struct hl_hw_queue {
 | 
			
		|||
 * @hdev: pointer to the device structure
 | 
			
		||||
 * @kernel_address: holds the queue's kernel virtual address
 | 
			
		||||
 * @bus_address: holds the queue's DMA address
 | 
			
		||||
 * @cq_idx: completion queue index in array
 | 
			
		||||
 * @hw_queue_id: the id of the matching H/W queue
 | 
			
		||||
 * @ci: ci inside the queue
 | 
			
		||||
 * @pi: pi inside the queue
 | 
			
		||||
| 
						 | 
				
			
			@ -456,6 +495,7 @@ struct hl_cq {
 | 
			
		|||
	struct hl_device	*hdev;
 | 
			
		||||
	u64			kernel_address;
 | 
			
		||||
	dma_addr_t		bus_address;
 | 
			
		||||
	u32			cq_idx;
 | 
			
		||||
	u32			hw_queue_id;
 | 
			
		||||
	u32			ci;
 | 
			
		||||
	u32			pi;
 | 
			
		||||
| 
						 | 
				
			
			@ -519,6 +559,15 @@ enum hl_pll_frequency {
 | 
			
		|||
	PLL_LAST
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define PLL_REF_CLK 50
 | 
			
		||||
 | 
			
		||||
enum div_select_defs {
 | 
			
		||||
	DIV_SEL_REF_CLK = 0,
 | 
			
		||||
	DIV_SEL_PLL_CLK = 1,
 | 
			
		||||
	DIV_SEL_DIVIDED_REF = 2,
 | 
			
		||||
	DIV_SEL_DIVIDED_PLL = 3,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * struct hl_asic_funcs - ASIC specific functions that are can be called from
 | 
			
		||||
 *                        common code.
 | 
			
		||||
| 
						 | 
				
			
			@ -596,14 +645,13 @@ enum hl_pll_frequency {
 | 
			
		|||
 * @rreg: Read a register. Needed for simulator support.
 | 
			
		||||
 * @wreg: Write a register. Needed for simulator support.
 | 
			
		||||
 * @halt_coresight: stop the ETF and ETR traces.
 | 
			
		||||
 * @ctx_init: context dependent initialization.
 | 
			
		||||
 * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
 | 
			
		||||
 * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
 | 
			
		||||
 * @read_device_fw_version: read the device's firmware versions that are
 | 
			
		||||
 *                          contained in registers
 | 
			
		||||
 * @load_firmware_to_device: load the firmware to the device's memory
 | 
			
		||||
 * @load_boot_fit_to_device: load boot fit to device's memory
 | 
			
		||||
 * @ext_queue_init: Initialize the given external queue.
 | 
			
		||||
 * @ext_queue_reset: Reset the given external queue.
 | 
			
		||||
 * @get_signal_cb_size: Get signal CB size.
 | 
			
		||||
 * @get_wait_cb_size: Get wait CB size.
 | 
			
		||||
 * @gen_signal_cb: Generate a signal CB.
 | 
			
		||||
| 
						 | 
				
			
			@ -700,14 +748,13 @@ struct hl_asic_funcs {
 | 
			
		|||
	u32 (*rreg)(struct hl_device *hdev, u32 reg);
 | 
			
		||||
	void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
 | 
			
		||||
	void (*halt_coresight)(struct hl_device *hdev);
 | 
			
		||||
	int (*ctx_init)(struct hl_ctx *ctx);
 | 
			
		||||
	int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
 | 
			
		||||
	u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
 | 
			
		||||
	void (*read_device_fw_version)(struct hl_device *hdev,
 | 
			
		||||
					enum hl_fw_component fwc);
 | 
			
		||||
	int (*load_firmware_to_device)(struct hl_device *hdev);
 | 
			
		||||
	int (*load_boot_fit_to_device)(struct hl_device *hdev);
 | 
			
		||||
	void (*ext_queue_init)(struct hl_device *hdev, u32 hw_queue_id);
 | 
			
		||||
	void (*ext_queue_reset)(struct hl_device *hdev, u32 hw_queue_id);
 | 
			
		||||
	u32 (*get_signal_cb_size)(struct hl_device *hdev);
 | 
			
		||||
	u32 (*get_wait_cb_size)(struct hl_device *hdev);
 | 
			
		||||
	void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
 | 
			
		||||
| 
						 | 
				
			
			@ -743,7 +790,6 @@ struct hl_va_range {
 | 
			
		|||
 * struct hl_ctx - user/kernel context.
 | 
			
		||||
 * @mem_hash: holds mapping from virtual address to virtual memory area
 | 
			
		||||
 *		descriptor (hl_vm_phys_pg_list or hl_userptr).
 | 
			
		||||
 * @mmu_phys_hash: holds a mapping from physical address to pgt_info structure.
 | 
			
		||||
 * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure.
 | 
			
		||||
 * @hpriv: pointer to the private (Kernel Driver) data of the process (fd).
 | 
			
		||||
 * @hdev: pointer to the device structure.
 | 
			
		||||
| 
						 | 
				
			
			@ -777,18 +823,18 @@ struct hl_va_range {
 | 
			
		|||
 */
 | 
			
		||||
struct hl_ctx {
 | 
			
		||||
	DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
 | 
			
		||||
	DECLARE_HASHTABLE(mmu_phys_hash, MMU_HASH_TABLE_BITS);
 | 
			
		||||
	DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
 | 
			
		||||
	struct hl_fpriv		*hpriv;
 | 
			
		||||
	struct hl_device	*hdev;
 | 
			
		||||
	struct kref		refcount;
 | 
			
		||||
	struct dma_fence	*cs_pending[HL_MAX_PENDING_CS];
 | 
			
		||||
	struct dma_fence	**cs_pending;
 | 
			
		||||
	struct hl_va_range	*host_va_range;
 | 
			
		||||
	struct hl_va_range	*host_huge_va_range;
 | 
			
		||||
	struct hl_va_range	*dram_va_range;
 | 
			
		||||
	struct mutex		mem_hash_lock;
 | 
			
		||||
	struct mutex		mmu_lock;
 | 
			
		||||
	struct list_head	debugfs_list;
 | 
			
		||||
	struct hl_cs_counters	cs_counters;
 | 
			
		||||
	u64			cs_sequence;
 | 
			
		||||
	u64			*dram_default_hops;
 | 
			
		||||
	spinlock_t		cs_lock;
 | 
			
		||||
| 
						 | 
				
			
			@ -863,7 +909,7 @@ struct hl_userptr {
 | 
			
		|||
 * @aborted: true if CS was aborted due to some device error.
 | 
			
		||||
 */
 | 
			
		||||
struct hl_cs {
 | 
			
		||||
	u16			jobs_in_queue_cnt[HL_MAX_QUEUES];
 | 
			
		||||
	u16			*jobs_in_queue_cnt;
 | 
			
		||||
	struct hl_ctx		*ctx;
 | 
			
		||||
	struct list_head	job_list;
 | 
			
		||||
	spinlock_t		job_lock;
 | 
			
		||||
| 
						 | 
				
			
			@ -1347,7 +1393,9 @@ struct hl_device_idle_busy_ts {
 | 
			
		|||
/**
 | 
			
		||||
 * struct hl_device - habanalabs device structure.
 | 
			
		||||
 * @pdev: pointer to PCI device, can be NULL in case of simulator device.
 | 
			
		||||
 * @pcie_bar: array of available PCIe bars.
 | 
			
		||||
 * @pcie_bar_phys: array of available PCIe bars physical addresses.
 | 
			
		||||
 *		   (required only for PCI address match mode)
 | 
			
		||||
 * @pcie_bar: array of available PCIe bars virtual addresses.
 | 
			
		||||
 * @rmmio: configuration area address on SRAM.
 | 
			
		||||
 * @cdev: related char device.
 | 
			
		||||
 * @cdev_ctrl: char device for control operations only (INFO IOCTL)
 | 
			
		||||
| 
						 | 
				
			
			@ -1358,7 +1406,8 @@ struct hl_device_idle_busy_ts {
 | 
			
		|||
 * @asic_name: ASIC specific nmae.
 | 
			
		||||
 * @asic_type: ASIC specific type.
 | 
			
		||||
 * @completion_queue: array of hl_cq.
 | 
			
		||||
 * @cq_wq: work queue of completion queues for executing work in process context
 | 
			
		||||
 * @cq_wq: work queues of completion queues for executing work in process
 | 
			
		||||
 *         context.
 | 
			
		||||
 * @eq_wq: work queue of event queue for executing work in process context.
 | 
			
		||||
 * @kernel_ctx: Kernel driver context structure.
 | 
			
		||||
 * @kernel_queues: array of hl_hw_queue.
 | 
			
		||||
| 
						 | 
				
			
			@ -1387,12 +1436,17 @@ struct hl_device_idle_busy_ts {
 | 
			
		|||
 * @hl_debugfs: device's debugfs manager.
 | 
			
		||||
 * @cb_pool: list of preallocated CBs.
 | 
			
		||||
 * @cb_pool_lock: protects the CB pool.
 | 
			
		||||
 * @internal_cb_pool_virt_addr: internal command buffer pool virtual address.
 | 
			
		||||
 * @internal_cb_pool_dma_addr: internal command buffer pool dma address.
 | 
			
		||||
 * @internal_cb_pool: internal command buffer memory pool.
 | 
			
		||||
 * @internal_cb_va_base: internal cb pool mmu virtual address base
 | 
			
		||||
 * @fpriv_list: list of file private data structures. Each structure is created
 | 
			
		||||
 *              when a user opens the device
 | 
			
		||||
 * @fpriv_list_lock: protects the fpriv_list
 | 
			
		||||
 * @compute_ctx: current compute context executing.
 | 
			
		||||
 * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
 | 
			
		||||
 *                    and vice-versa
 | 
			
		||||
 * @aggregated_cs_counters: aggregated cs counters among all contexts
 | 
			
		||||
 * @dram_used_mem: current DRAM memory consumption.
 | 
			
		||||
 * @timeout_jiffies: device CS timeout value.
 | 
			
		||||
 * @max_power: the max power of the device, as configured by the sysadmin. This
 | 
			
		||||
| 
						 | 
				
			
			@ -1435,12 +1489,14 @@ struct hl_device_idle_busy_ts {
 | 
			
		|||
 * @cdev_sysfs_created: were char devices and sysfs nodes created.
 | 
			
		||||
 * @stop_on_err: true if engines should stop on error.
 | 
			
		||||
 * @supports_sync_stream: is sync stream supported.
 | 
			
		||||
 * @sync_stream_queue_idx: helper index for sync stream queues initialization.
 | 
			
		||||
 * @supports_coresight: is CoreSight supported.
 | 
			
		||||
 * @supports_soft_reset: is soft reset supported.
 | 
			
		||||
 */
 | 
			
		||||
struct hl_device {
 | 
			
		||||
	struct pci_dev			*pdev;
 | 
			
		||||
	void __iomem			*pcie_bar[6];
 | 
			
		||||
	u64				pcie_bar_phys[HL_PCI_NUM_BARS];
 | 
			
		||||
	void __iomem			*pcie_bar[HL_PCI_NUM_BARS];
 | 
			
		||||
	void __iomem			*rmmio;
 | 
			
		||||
	struct cdev			cdev;
 | 
			
		||||
	struct cdev			cdev_ctrl;
 | 
			
		||||
| 
						 | 
				
			
			@ -1451,7 +1507,7 @@ struct hl_device {
 | 
			
		|||
	char				asic_name[16];
 | 
			
		||||
	enum hl_asic_type		asic_type;
 | 
			
		||||
	struct hl_cq			*completion_queue;
 | 
			
		||||
	struct workqueue_struct		*cq_wq;
 | 
			
		||||
	struct workqueue_struct		**cq_wq;
 | 
			
		||||
	struct workqueue_struct		*eq_wq;
 | 
			
		||||
	struct hl_ctx			*kernel_ctx;
 | 
			
		||||
	struct hl_hw_queue		*kernel_queues;
 | 
			
		||||
| 
						 | 
				
			
			@ -1483,6 +1539,11 @@ struct hl_device {
 | 
			
		|||
	struct list_head		cb_pool;
 | 
			
		||||
	spinlock_t			cb_pool_lock;
 | 
			
		||||
 | 
			
		||||
	void				*internal_cb_pool_virt_addr;
 | 
			
		||||
	dma_addr_t			internal_cb_pool_dma_addr;
 | 
			
		||||
	struct gen_pool			*internal_cb_pool;
 | 
			
		||||
	u64				internal_cb_va_base;
 | 
			
		||||
 | 
			
		||||
	struct list_head		fpriv_list;
 | 
			
		||||
	struct mutex			fpriv_list_lock;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1490,6 +1551,8 @@ struct hl_device {
 | 
			
		|||
 | 
			
		||||
	struct hl_device_idle_busy_ts	*idle_busy_ts_arr;
 | 
			
		||||
 | 
			
		||||
	struct hl_cs_counters		aggregated_cs_counters;
 | 
			
		||||
 | 
			
		||||
	atomic64_t			dram_used_mem;
 | 
			
		||||
	u64				timeout_jiffies;
 | 
			
		||||
	u64				max_power;
 | 
			
		||||
| 
						 | 
				
			
			@ -1522,6 +1585,7 @@ struct hl_device {
 | 
			
		|||
	u8				cdev_sysfs_created;
 | 
			
		||||
	u8				stop_on_err;
 | 
			
		||||
	u8				supports_sync_stream;
 | 
			
		||||
	u8				sync_stream_queue_idx;
 | 
			
		||||
	u8				supports_coresight;
 | 
			
		||||
	u8				supports_soft_reset;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1690,7 +1754,7 @@ int hl_hwmon_init(struct hl_device *hdev);
 | 
			
		|||
void hl_hwmon_fini(struct hl_device *hdev);
 | 
			
		||||
 | 
			
		||||
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
 | 
			
		||||
		u64 *handle, int ctx_id);
 | 
			
		||||
		u64 *handle, int ctx_id, bool internal_cb);
 | 
			
		||||
int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
 | 
			
		||||
int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
 | 
			
		||||
struct hl_cb *hl_cb_get(struct hl_device *hdev,	struct hl_cb_mgr *mgr,
 | 
			
		||||
| 
						 | 
				
			
			@ -1698,7 +1762,8 @@ struct hl_cb *hl_cb_get(struct hl_device *hdev,	struct hl_cb_mgr *mgr,
 | 
			
		|||
void hl_cb_put(struct hl_cb *cb);
 | 
			
		||||
void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
 | 
			
		||||
void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
 | 
			
		||||
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
 | 
			
		||||
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
 | 
			
		||||
					bool internal_cb);
 | 
			
		||||
int hl_cb_pool_init(struct hl_device *hdev);
 | 
			
		||||
int hl_cb_pool_fini(struct hl_device *hdev);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1762,9 +1827,10 @@ int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
 | 
			
		|||
int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
 | 
			
		||||
int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
 | 
			
		||||
				u64 addr);
 | 
			
		||||
int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
 | 
			
		||||
			u64 dram_base_address, u64 host_phys_base_address,
 | 
			
		||||
			u64 host_phys_size);
 | 
			
		||||
int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
 | 
			
		||||
		struct hl_inbound_pci_region *pci_region);
 | 
			
		||||
int hl_pci_set_outbound_region(struct hl_device *hdev,
 | 
			
		||||
		struct hl_outbound_pci_region *pci_region);
 | 
			
		||||
int hl_pci_init(struct hl_device *hdev);
 | 
			
		||||
void hl_pci_fini(struct hl_device *hdev);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -238,7 +238,6 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)
 | 
			
		|||
	hdev->axi_drain = 0;
 | 
			
		||||
	hdev->sram_scrambler_enable = 1;
 | 
			
		||||
	hdev->dram_scrambler_enable = 1;
 | 
			
		||||
	hdev->rl_enable = 1;
 | 
			
		||||
	hdev->bmc_enable = 1;
 | 
			
		||||
	hdev->hard_reset_on_fw_events = 1;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -276,6 +276,27 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
 | 
			
		|||
		min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 | 
			
		||||
{
 | 
			
		||||
	struct hl_device *hdev = hpriv->hdev;
 | 
			
		||||
	struct hl_info_cs_counters cs_counters = {0};
 | 
			
		||||
	u32 max_size = args->return_size;
 | 
			
		||||
	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
 | 
			
		||||
 | 
			
		||||
	if ((!max_size) || (!out))
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters,
 | 
			
		||||
			sizeof(struct hl_cs_counters));
 | 
			
		||||
 | 
			
		||||
	if (hpriv->ctx)
 | 
			
		||||
		memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters,
 | 
			
		||||
				sizeof(struct hl_cs_counters));
 | 
			
		||||
 | 
			
		||||
	return copy_to_user(out, &cs_counters,
 | 
			
		||||
		min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 | 
			
		||||
				struct device *dev)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -336,6 +357,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 | 
			
		|||
	case HL_INFO_TIME_SYNC:
 | 
			
		||||
		return time_sync_info(hdev, args);
 | 
			
		||||
 | 
			
		||||
	case HL_INFO_CS_COUNTERS:
 | 
			
		||||
		return cs_counters_info(hpriv, args);
 | 
			
		||||
 | 
			
		||||
	default:
 | 
			
		||||
		dev_err(dev, "Invalid request %d\n", args->op);
 | 
			
		||||
		rc = -ENOTTY;
 | 
			
		||||
| 
						 | 
				
			
			@ -23,10 +23,14 @@ inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
 | 
			
		|||
	ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
 | 
			
		||||
	return ptr;
 | 
			
		||||
}
 | 
			
		||||
static inline int queue_ci_get(atomic_t *ci, u32 queue_len)
 | 
			
		||||
{
 | 
			
		||||
	return atomic_read(ci) & ((queue_len << 1) - 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
 | 
			
		||||
{
 | 
			
		||||
	int delta = (q->pi - q->ci);
 | 
			
		||||
	int delta = (q->pi - queue_ci_get(&q->ci, queue_len));
 | 
			
		||||
 | 
			
		||||
	if (delta >= 0)
 | 
			
		||||
		return (queue_len - delta);
 | 
			
		||||
| 
						 | 
				
			
			@ -40,21 +44,14 @@ void hl_int_hw_queue_update_ci(struct hl_cs *cs)
 | 
			
		|||
	struct hl_hw_queue *q;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	hdev->asic_funcs->hw_queues_lock(hdev);
 | 
			
		||||
 | 
			
		||||
	if (hdev->disabled)
 | 
			
		||||
		goto out;
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	q = &hdev->kernel_queues[0];
 | 
			
		||||
	for (i = 0 ; i < HL_MAX_QUEUES ; i++, q++) {
 | 
			
		||||
		if (q->queue_type == QUEUE_TYPE_INT) {
 | 
			
		||||
			q->ci += cs->jobs_in_queue_cnt[i];
 | 
			
		||||
			q->ci &= ((q->int_queue_len << 1) - 1);
 | 
			
		||||
		}
 | 
			
		||||
	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
 | 
			
		||||
		if (q->queue_type == QUEUE_TYPE_INT)
 | 
			
		||||
			atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	hdev->asic_funcs->hw_queues_unlock(hdev);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -161,6 +158,13 @@ static int int_queue_sanity_checks(struct hl_device *hdev,
 | 
			
		|||
{
 | 
			
		||||
	int free_slots_cnt;
 | 
			
		||||
 | 
			
		||||
	if (num_of_entries > q->int_queue_len) {
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Cannot populate queue %u with %u jobs\n",
 | 
			
		||||
			q->hw_queue_id, num_of_entries);
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Check we have enough space in the queue */
 | 
			
		||||
	free_slots_cnt = queue_free_slots(q, q->int_queue_len);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -174,38 +178,26 @@ static int int_queue_sanity_checks(struct hl_device *hdev,
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * hw_queue_sanity_checks() - Perform some sanity checks on a H/W queue.
 | 
			
		||||
 * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
 | 
			
		||||
 * @hdev: Pointer to hl_device structure.
 | 
			
		||||
 * @q: Pointer to hl_hw_queue structure.
 | 
			
		||||
 * @num_of_entries: How many entries to check for space.
 | 
			
		||||
 *
 | 
			
		||||
 * Perform the following:
 | 
			
		||||
 * - Make sure we have enough space in the completion queue.
 | 
			
		||||
 *   This check also ensures that there is enough space in the h/w queue, as
 | 
			
		||||
 *   both queues are of the same size.
 | 
			
		||||
 * - Reserve space in the completion queue (needs to be reversed if there
 | 
			
		||||
 *   is a failure down the road before the actual submission of work).
 | 
			
		||||
 * Notice: We do not reserve queue entries so this function mustn't be called
 | 
			
		||||
 *         more than once per CS for the same queue
 | 
			
		||||
 *
 | 
			
		||||
 * Both operations are done using the "free_slots_cnt" field of the completion
 | 
			
		||||
 * queue. The CI counters of the queue and the completion queue are not
 | 
			
		||||
 * needed/used for the H/W queue type.
 | 
			
		||||
 */
 | 
			
		||||
static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
 | 
			
		||||
					int num_of_entries)
 | 
			
		||||
{
 | 
			
		||||
	atomic_t *free_slots =
 | 
			
		||||
			&hdev->completion_queue[q->cq_id].free_slots_cnt;
 | 
			
		||||
	int free_slots_cnt;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Check we have enough space in the completion queue.
 | 
			
		||||
	 * Add -1 to counter (decrement) unless counter was already 0.
 | 
			
		||||
	 * In that case, CQ is full so we can't submit a new CB.
 | 
			
		||||
	 * atomic_add_unless will return 0 if counter was already 0.
 | 
			
		||||
	 */
 | 
			
		||||
	if (atomic_add_negative(num_of_entries * -1, free_slots)) {
 | 
			
		||||
		dev_dbg(hdev->dev, "No space for %d entries on CQ %d\n",
 | 
			
		||||
			num_of_entries, q->hw_queue_id);
 | 
			
		||||
		atomic_add(num_of_entries, free_slots);
 | 
			
		||||
	/* Check we have enough space in the queue */
 | 
			
		||||
	free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
 | 
			
		||||
 | 
			
		||||
	if (free_slots_cnt < num_of_entries) {
 | 
			
		||||
		dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
 | 
			
		||||
			q->hw_queue_id, num_of_entries);
 | 
			
		||||
		return -EAGAIN;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -366,7 +358,6 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
 | 
			
		|||
{
 | 
			
		||||
	struct hl_device *hdev = job->cs->ctx->hdev;
 | 
			
		||||
	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
 | 
			
		||||
	struct hl_cq *cq;
 | 
			
		||||
	u64 ptr;
 | 
			
		||||
	u32 offset, ctl, len;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -376,7 +367,7 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
 | 
			
		|||
	 * write address offset in the SM block (QMAN LBW message).
 | 
			
		||||
	 * The write address offset is calculated as "COMP_OFFSET << 2".
 | 
			
		||||
	 */
 | 
			
		||||
	offset = job->cs->sequence & (HL_MAX_PENDING_CS - 1);
 | 
			
		||||
	offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
 | 
			
		||||
	ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
 | 
			
		||||
		((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -395,17 +386,6 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
 | 
			
		|||
	else
 | 
			
		||||
		ptr = (u64) (uintptr_t) job->user_cb;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * No need to protect pi_offset because scheduling to the
 | 
			
		||||
	 * H/W queues is done under the scheduler mutex
 | 
			
		||||
	 *
 | 
			
		||||
	 * No need to check if CQ is full because it was already
 | 
			
		||||
	 * checked in hw_queue_sanity_checks
 | 
			
		||||
	 */
 | 
			
		||||
	cq = &hdev->completion_queue[q->cq_id];
 | 
			
		||||
 | 
			
		||||
	cq->pi = hl_cq_inc_ptr(cq->pi);
 | 
			
		||||
 | 
			
		||||
	ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -509,19 +489,23 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 | 
			
		|||
	struct hl_device *hdev = ctx->hdev;
 | 
			
		||||
	struct hl_cs_job *job, *tmp;
 | 
			
		||||
	struct hl_hw_queue *q;
 | 
			
		||||
	u32 max_queues;
 | 
			
		||||
	int rc = 0, i, cq_cnt;
 | 
			
		||||
 | 
			
		||||
	hdev->asic_funcs->hw_queues_lock(hdev);
 | 
			
		||||
 | 
			
		||||
	if (hl_device_disabled_or_in_reset(hdev)) {
 | 
			
		||||
		ctx->cs_counters.device_in_reset_drop_cnt++;
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"device is disabled or in reset, CS rejected!\n");
 | 
			
		||||
		rc = -EPERM;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	max_queues = hdev->asic_prop.max_queues;
 | 
			
		||||
 | 
			
		||||
	q = &hdev->kernel_queues[0];
 | 
			
		||||
	for (i = 0, cq_cnt = 0 ; i < HL_MAX_QUEUES ; i++, q++) {
 | 
			
		||||
	for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) {
 | 
			
		||||
		if (cs->jobs_in_queue_cnt[i]) {
 | 
			
		||||
			switch (q->queue_type) {
 | 
			
		||||
			case QUEUE_TYPE_EXT:
 | 
			
		||||
| 
						 | 
				
			
			@ -543,11 +527,12 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 | 
			
		|||
				break;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if (rc)
 | 
			
		||||
			if (rc) {
 | 
			
		||||
				ctx->cs_counters.queue_full_drop_cnt++;
 | 
			
		||||
				goto unroll_cq_resv;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if (q->queue_type == QUEUE_TYPE_EXT ||
 | 
			
		||||
					q->queue_type == QUEUE_TYPE_HW)
 | 
			
		||||
			if (q->queue_type == QUEUE_TYPE_EXT)
 | 
			
		||||
				cq_cnt++;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -598,10 +583,9 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 | 
			
		|||
 | 
			
		||||
unroll_cq_resv:
 | 
			
		||||
	q = &hdev->kernel_queues[0];
 | 
			
		||||
	for (i = 0 ; (i < HL_MAX_QUEUES) && (cq_cnt > 0) ; i++, q++) {
 | 
			
		||||
		if ((q->queue_type == QUEUE_TYPE_EXT ||
 | 
			
		||||
				q->queue_type == QUEUE_TYPE_HW) &&
 | 
			
		||||
				cs->jobs_in_queue_cnt[i]) {
 | 
			
		||||
	for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
 | 
			
		||||
		if ((q->queue_type == QUEUE_TYPE_EXT) &&
 | 
			
		||||
						(cs->jobs_in_queue_cnt[i])) {
 | 
			
		||||
			atomic_t *free_slots =
 | 
			
		||||
				&hdev->completion_queue[i].free_slots_cnt;
 | 
			
		||||
			atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
 | 
			
		||||
| 
						 | 
				
			
			@ -625,7 +609,7 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
 | 
			
		|||
{
 | 
			
		||||
	struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
 | 
			
		||||
 | 
			
		||||
	q->ci = hl_queue_inc_ptr(q->ci);
 | 
			
		||||
	atomic_inc(&q->ci);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
 | 
			
		||||
| 
						 | 
				
			
			@ -660,12 +644,9 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	/* Make sure read/write pointers are initialized to start of queue */
 | 
			
		||||
	q->ci = 0;
 | 
			
		||||
	atomic_set(&q->ci, 0);
 | 
			
		||||
	q->pi = 0;
 | 
			
		||||
 | 
			
		||||
	if (!is_cpu_queue)
 | 
			
		||||
		hdev->asic_funcs->ext_queue_init(hdev, q->hw_queue_id);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
 | 
			
		||||
free_queue:
 | 
			
		||||
| 
						 | 
				
			
			@ -697,7 +678,7 @@ static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
 | 
			
		|||
 | 
			
		||||
	q->kernel_address = (u64) (uintptr_t) p;
 | 
			
		||||
	q->pi = 0;
 | 
			
		||||
	q->ci = 0;
 | 
			
		||||
	atomic_set(&q->ci, 0);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -726,12 +707,48 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
 | 
			
		|||
	q->kernel_address = (u64) (uintptr_t) p;
 | 
			
		||||
 | 
			
		||||
	/* Make sure read/write pointers are initialized to start of queue */
 | 
			
		||||
	q->ci = 0;
 | 
			
		||||
	atomic_set(&q->ci, 0);
 | 
			
		||||
	q->pi = 0;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
 | 
			
		||||
{
 | 
			
		||||
	struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
 | 
			
		||||
	struct asic_fixed_properties *prop = &hdev->asic_prop;
 | 
			
		||||
	struct hl_hw_sob *hw_sob;
 | 
			
		||||
	int sob, queue_idx = hdev->sync_stream_queue_idx++;
 | 
			
		||||
 | 
			
		||||
	hw_queue->base_sob_id =
 | 
			
		||||
		prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS;
 | 
			
		||||
	hw_queue->base_mon_id =
 | 
			
		||||
		prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS;
 | 
			
		||||
	hw_queue->next_sob_val = 1;
 | 
			
		||||
	hw_queue->curr_sob_offset = 0;
 | 
			
		||||
 | 
			
		||||
	for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
 | 
			
		||||
		hw_sob = &hw_queue->hw_sob[sob];
 | 
			
		||||
		hw_sob->hdev = hdev;
 | 
			
		||||
		hw_sob->sob_id = hw_queue->base_sob_id + sob;
 | 
			
		||||
		hw_sob->q_idx = q_idx;
 | 
			
		||||
		kref_init(&hw_sob->kref);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
 | 
			
		||||
{
 | 
			
		||||
	struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * In case we got here due to a stuck CS, the refcnt might be bigger
 | 
			
		||||
	 * than 1 and therefore we reset it.
 | 
			
		||||
	 */
 | 
			
		||||
	kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
 | 
			
		||||
	hw_queue->curr_sob_offset = 0;
 | 
			
		||||
	hw_queue->next_sob_val = 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * queue_init - main initialization function for H/W queue object
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -747,8 +764,6 @@ static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
 | 
			
		|||
{
 | 
			
		||||
	int rc;
 | 
			
		||||
 | 
			
		||||
	BUILD_BUG_ON(HL_QUEUE_SIZE_IN_BYTES > HL_PAGE_SIZE);
 | 
			
		||||
 | 
			
		||||
	q->hw_queue_id = hw_queue_id;
 | 
			
		||||
 | 
			
		||||
	switch (q->queue_type) {
 | 
			
		||||
| 
						 | 
				
			
			@ -774,6 +789,9 @@ static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
 | 
			
		|||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (q->supports_sync_stream)
 | 
			
		||||
		sync_stream_queue_init(hdev, q->hw_queue_id);
 | 
			
		||||
 | 
			
		||||
	if (rc)
 | 
			
		||||
		return rc;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -835,7 +853,7 @@ int hl_hw_queues_create(struct hl_device *hdev)
 | 
			
		|||
	struct hl_hw_queue *q;
 | 
			
		||||
	int i, rc, q_ready_cnt;
 | 
			
		||||
 | 
			
		||||
	hdev->kernel_queues = kcalloc(HL_MAX_QUEUES,
 | 
			
		||||
	hdev->kernel_queues = kcalloc(asic->max_queues,
 | 
			
		||||
				sizeof(*hdev->kernel_queues), GFP_KERNEL);
 | 
			
		||||
 | 
			
		||||
	if (!hdev->kernel_queues) {
 | 
			
		||||
| 
						 | 
				
			
			@ -845,9 +863,11 @@ int hl_hw_queues_create(struct hl_device *hdev)
 | 
			
		|||
 | 
			
		||||
	/* Initialize the H/W queues */
 | 
			
		||||
	for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
 | 
			
		||||
			i < HL_MAX_QUEUES ; i++, q_ready_cnt++, q++) {
 | 
			
		||||
			i < asic->max_queues ; i++, q_ready_cnt++, q++) {
 | 
			
		||||
 | 
			
		||||
		q->queue_type = asic->hw_queues_props[i].type;
 | 
			
		||||
		q->supports_sync_stream =
 | 
			
		||||
				asic->hw_queues_props[i].supports_sync_stream;
 | 
			
		||||
		rc = queue_init(hdev, q, i);
 | 
			
		||||
		if (rc) {
 | 
			
		||||
			dev_err(hdev->dev,
 | 
			
		||||
| 
						 | 
				
			
			@ -870,9 +890,10 @@ int hl_hw_queues_create(struct hl_device *hdev)
 | 
			
		|||
void hl_hw_queues_destroy(struct hl_device *hdev)
 | 
			
		||||
{
 | 
			
		||||
	struct hl_hw_queue *q;
 | 
			
		||||
	u32 max_queues = hdev->asic_prop.max_queues;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++)
 | 
			
		||||
	for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++)
 | 
			
		||||
		queue_fini(hdev, q);
 | 
			
		||||
 | 
			
		||||
	kfree(hdev->kernel_queues);
 | 
			
		||||
| 
						 | 
				
			
			@ -881,15 +902,17 @@ void hl_hw_queues_destroy(struct hl_device *hdev)
 | 
			
		|||
void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
 | 
			
		||||
{
 | 
			
		||||
	struct hl_hw_queue *q;
 | 
			
		||||
	u32 max_queues = hdev->asic_prop.max_queues;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++) {
 | 
			
		||||
	for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) {
 | 
			
		||||
		if ((!q->valid) ||
 | 
			
		||||
			((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
 | 
			
		||||
			continue;
 | 
			
		||||
		q->pi = q->ci = 0;
 | 
			
		||||
		q->pi = 0;
 | 
			
		||||
		atomic_set(&q->ci, 0);
 | 
			
		||||
 | 
			
		||||
		if (q->queue_type == QUEUE_TYPE_EXT)
 | 
			
		||||
			hdev->asic_funcs->ext_queue_reset(hdev, q->hw_queue_id);
 | 
			
		||||
		if (q->supports_sync_stream)
 | 
			
		||||
			sync_stream_queue_reset(hdev, q->hw_queue_id);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -119,15 +119,10 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
 | 
			
		|||
 | 
			
		||||
		if ((shadow_index_valid) && (!hdev->disabled)) {
 | 
			
		||||
			job = queue->shadow_queue[hl_pi_2_offset(shadow_index)];
 | 
			
		||||
			queue_work(hdev->cq_wq, &job->finish_work);
 | 
			
		||||
			queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/* Update ci of the context's queue. There is no
 | 
			
		||||
		 * need to protect it with spinlock because this update is
 | 
			
		||||
		 * done only inside IRQ and there is a different IRQ per
 | 
			
		||||
		 * queue
 | 
			
		||||
		 */
 | 
			
		||||
		queue->ci = hl_queue_inc_ptr(queue->ci);
 | 
			
		||||
		atomic_inc(&queue->ci);
 | 
			
		||||
 | 
			
		||||
		/* Clear CQ entry ready bit */
 | 
			
		||||
		cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) &
 | 
			
		||||
| 
						 | 
				
			
			@ -220,8 +215,6 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
 | 
			
		|||
{
 | 
			
		||||
	void *p;
 | 
			
		||||
 | 
			
		||||
	BUILD_BUG_ON(HL_CQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
 | 
			
		||||
 | 
			
		||||
	p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
 | 
			
		||||
				&q->bus_address, GFP_KERNEL | __GFP_ZERO);
 | 
			
		||||
	if (!p)
 | 
			
		||||
| 
						 | 
				
			
			@ -282,8 +275,6 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
 | 
			
		|||
{
 | 
			
		||||
	void *p;
 | 
			
		||||
 | 
			
		||||
	BUILD_BUG_ON(HL_EQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
 | 
			
		||||
 | 
			
		||||
	p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
 | 
			
		||||
							HL_EQ_SIZE_IN_BYTES,
 | 
			
		||||
							&q->bus_address);
 | 
			
		||||
| 
						 | 
				
			
			@ -1730,8 +1730,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
 | 
			
		|||
	 */
 | 
			
		||||
	if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
 | 
			
		||||
		dev_notice(hdev->dev,
 | 
			
		||||
				"ctx %d is freed while it has va in use\n",
 | 
			
		||||
				ctx->asid);
 | 
			
		||||
			"user released device without removing its memory mappings\n");
 | 
			
		||||
 | 
			
		||||
	hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
 | 
			
		||||
		dev_dbg(hdev->dev,
 | 
			
		||||
| 
						 | 
				
			
			@ -502,7 +502,6 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx)
 | 
			
		|||
		return 0;
 | 
			
		||||
 | 
			
		||||
	mutex_init(&ctx->mmu_lock);
 | 
			
		||||
	hash_init(ctx->mmu_phys_hash);
 | 
			
		||||
	hash_init(ctx->mmu_shadow_hash);
 | 
			
		||||
 | 
			
		||||
	return dram_default_mapping_init(ctx);
 | 
			
		||||
| 
						 | 
				
			
			@ -9,9 +9,15 @@
 | 
			
		|||
#include "include/hw_ip/pci/pci_general.h"
 | 
			
		||||
 | 
			
		||||
#include <linux/pci.h>
 | 
			
		||||
#include <linux/bitfield.h>
 | 
			
		||||
 | 
			
		||||
#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC	(HL_PCI_ELBI_TIMEOUT_MSEC * 10)
 | 
			
		||||
 | 
			
		||||
#define IATU_REGION_CTRL_REGION_EN_MASK		BIT(31)
 | 
			
		||||
#define IATU_REGION_CTRL_MATCH_MODE_MASK	BIT(30)
 | 
			
		||||
#define IATU_REGION_CTRL_NUM_MATCH_EN_MASK	BIT(19)
 | 
			
		||||
#define IATU_REGION_CTRL_BAR_NUM_MASK		GENMASK(10, 8)
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * hl_pci_bars_map() - Map PCI BARs.
 | 
			
		||||
 * @hdev: Pointer to hl_device structure.
 | 
			
		||||
| 
						 | 
				
			
			@ -187,110 +193,94 @@ static void hl_pci_reset_link_through_bridge(struct hl_device *hdev)
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * hl_pci_set_dram_bar_base() - Set DDR BAR to map specific device address.
 | 
			
		||||
 * hl_pci_set_inbound_region() - Configure inbound region
 | 
			
		||||
 * @hdev: Pointer to hl_device structure.
 | 
			
		||||
 * @inbound_region: Inbound region number.
 | 
			
		||||
 * @bar: PCI BAR number.
 | 
			
		||||
 * @addr: Address in DRAM. Must be aligned to DRAM bar size.
 | 
			
		||||
 * @region: Inbound region number.
 | 
			
		||||
 * @pci_region: Inbound region parameters.
 | 
			
		||||
 *
 | 
			
		||||
 * Configure the iATU so that the DRAM bar will start at the specified address.
 | 
			
		||||
 * Configure the iATU inbound region.
 | 
			
		||||
 *
 | 
			
		||||
 * Return: 0 on success, negative value for failure.
 | 
			
		||||
 */
 | 
			
		||||
int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
 | 
			
		||||
				u64 addr)
 | 
			
		||||
int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
 | 
			
		||||
		struct hl_inbound_pci_region *pci_region)
 | 
			
		||||
{
 | 
			
		||||
	struct asic_fixed_properties *prop = &hdev->asic_prop;
 | 
			
		||||
	u32 offset;
 | 
			
		||||
	int rc;
 | 
			
		||||
	u64 bar_phys_base, region_base, region_end_address;
 | 
			
		||||
	u32 offset, ctrl_reg_val;
 | 
			
		||||
	int rc = 0;
 | 
			
		||||
 | 
			
		||||
	switch (inbound_region) {
 | 
			
		||||
	case 0:
 | 
			
		||||
		offset = 0x100;
 | 
			
		||||
		break;
 | 
			
		||||
	case 1:
 | 
			
		||||
		offset = 0x300;
 | 
			
		||||
		break;
 | 
			
		||||
	case 2:
 | 
			
		||||
		offset = 0x500;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		dev_err(hdev->dev, "Invalid inbound region %d\n",
 | 
			
		||||
			inbound_region);
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	}
 | 
			
		||||
	/* region offset */
 | 
			
		||||
	offset = (0x200 * region) + 0x100;
 | 
			
		||||
 | 
			
		||||
	if (bar != 0 && bar != 2 && bar != 4) {
 | 
			
		||||
		dev_err(hdev->dev, "Invalid PCI BAR %d\n", bar);
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	if (pci_region->mode == PCI_ADDRESS_MATCH_MODE) {
 | 
			
		||||
		bar_phys_base = hdev->pcie_bar_phys[pci_region->bar];
 | 
			
		||||
		region_base = bar_phys_base + pci_region->offset_in_bar;
 | 
			
		||||
		region_end_address = region_base + pci_region->size - 1;
 | 
			
		||||
 | 
			
		||||
		rc |= hl_pci_iatu_write(hdev, offset + 0x8,
 | 
			
		||||
				lower_32_bits(region_base));
 | 
			
		||||
		rc |= hl_pci_iatu_write(hdev, offset + 0xC,
 | 
			
		||||
				upper_32_bits(region_base));
 | 
			
		||||
		rc |= hl_pci_iatu_write(hdev, offset + 0x10,
 | 
			
		||||
				lower_32_bits(region_end_address));
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Point to the specified address */
 | 
			
		||||
	rc = hl_pci_iatu_write(hdev, offset + 0x14, lower_32_bits(addr));
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, offset + 0x18, upper_32_bits(addr));
 | 
			
		||||
	rc = hl_pci_iatu_write(hdev, offset + 0x14,
 | 
			
		||||
			lower_32_bits(pci_region->addr));
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, offset + 0x18,
 | 
			
		||||
			upper_32_bits(pci_region->addr));
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0);
 | 
			
		||||
	/* Enable + BAR match + match enable + BAR number */
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, offset + 0x4, 0xC0080000 | (bar << 8));
 | 
			
		||||
 | 
			
		||||
	/* Enable + bar/address match + match enable + bar number */
 | 
			
		||||
	ctrl_reg_val = FIELD_PREP(IATU_REGION_CTRL_REGION_EN_MASK, 1);
 | 
			
		||||
	ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_MATCH_MODE_MASK,
 | 
			
		||||
			pci_region->mode);
 | 
			
		||||
	ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_NUM_MATCH_EN_MASK, 1);
 | 
			
		||||
 | 
			
		||||
	if (pci_region->mode == PCI_BAR_MATCH_MODE)
 | 
			
		||||
		ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_BAR_NUM_MASK,
 | 
			
		||||
				pci_region->bar);
 | 
			
		||||
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);
 | 
			
		||||
 | 
			
		||||
	/* Return the DBI window to the default location */
 | 
			
		||||
	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
 | 
			
		||||
	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
 | 
			
		||||
 | 
			
		||||
	if (rc)
 | 
			
		||||
		dev_err(hdev->dev, "failed to map DRAM bar to 0x%08llx\n",
 | 
			
		||||
			addr);
 | 
			
		||||
		dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n",
 | 
			
		||||
				pci_region->bar, pci_region->addr);
 | 
			
		||||
 | 
			
		||||
	return rc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * hl_pci_init_iatu() - Initialize the iATU unit inside the PCI controller.
 | 
			
		||||
 * hl_pci_set_outbound_region() - Configure outbound region 0
 | 
			
		||||
 * @hdev: Pointer to hl_device structure.
 | 
			
		||||
 * @sram_base_address: SRAM base address.
 | 
			
		||||
 * @dram_base_address: DRAM base address.
 | 
			
		||||
 * @host_phys_base_address: Base physical address of host memory for device
 | 
			
		||||
 *                          transactions.
 | 
			
		||||
 * @host_phys_size: Size of host memory for device transactions.
 | 
			
		||||
 * @pci_region: Outbound region parameters.
 | 
			
		||||
 *
 | 
			
		||||
 * This is needed in case the firmware doesn't initialize the iATU.
 | 
			
		||||
 * Configure the iATU outbound region 0.
 | 
			
		||||
 *
 | 
			
		||||
 * Return: 0 on success, negative value for failure.
 | 
			
		||||
 */
 | 
			
		||||
int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
 | 
			
		||||
			u64 dram_base_address, u64 host_phys_base_address,
 | 
			
		||||
			u64 host_phys_size)
 | 
			
		||||
int hl_pci_set_outbound_region(struct hl_device *hdev,
 | 
			
		||||
		struct hl_outbound_pci_region *pci_region)
 | 
			
		||||
{
 | 
			
		||||
	struct asic_fixed_properties *prop = &hdev->asic_prop;
 | 
			
		||||
	u64 host_phys_end_addr;
 | 
			
		||||
	u64 outbound_region_end_address;
 | 
			
		||||
	int rc = 0;
 | 
			
		||||
 | 
			
		||||
	/* Inbound Region 0 - Bar 0 - Point to SRAM base address */
 | 
			
		||||
	rc  = hl_pci_iatu_write(hdev, 0x114, lower_32_bits(sram_base_address));
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, 0x118, upper_32_bits(sram_base_address));
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, 0x100, 0);
 | 
			
		||||
	/* Enable + Bar match + match enable */
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, 0x104, 0xC0080000);
 | 
			
		||||
 | 
			
		||||
	/* Return the DBI window to the default location */
 | 
			
		||||
	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
 | 
			
		||||
	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
 | 
			
		||||
 | 
			
		||||
	hdev->asic_funcs->set_dma_mask_from_fw(hdev);
 | 
			
		||||
 | 
			
		||||
	/* Point to DRAM */
 | 
			
		||||
	if (!hdev->asic_funcs->set_dram_bar_base)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	if (hdev->asic_funcs->set_dram_bar_base(hdev, dram_base_address) ==
 | 
			
		||||
								U64_MAX)
 | 
			
		||||
		return -EIO;
 | 
			
		||||
 | 
			
		||||
	/* Outbound Region 0 - Point to Host */
 | 
			
		||||
	host_phys_end_addr = host_phys_base_address + host_phys_size - 1;
 | 
			
		||||
	/* Outbound Region 0 */
 | 
			
		||||
	outbound_region_end_address =
 | 
			
		||||
			pci_region->addr + pci_region->size - 1;
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, 0x008,
 | 
			
		||||
				lower_32_bits(host_phys_base_address));
 | 
			
		||||
				lower_32_bits(pci_region->addr));
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, 0x00C,
 | 
			
		||||
				upper_32_bits(host_phys_base_address));
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, 0x010, lower_32_bits(host_phys_end_addr));
 | 
			
		||||
				upper_32_bits(pci_region->addr));
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, 0x010,
 | 
			
		||||
				lower_32_bits(outbound_region_end_address));
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, 0x014, 0);
 | 
			
		||||
 | 
			
		||||
	if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
 | 
			
		||||
| 
						 | 
				
			
			@ -298,7 +288,8 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
 | 
			
		|||
	else
 | 
			
		||||
		rc |= hl_pci_iatu_write(hdev, 0x018, 0);
 | 
			
		||||
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, 0x020, upper_32_bits(host_phys_end_addr));
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, 0x020,
 | 
			
		||||
				upper_32_bits(outbound_region_end_address));
 | 
			
		||||
	/* Increase region size */
 | 
			
		||||
	rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000);
 | 
			
		||||
	/* Enable */
 | 
			
		||||
| 
						 | 
				
			
			@ -308,10 +299,7 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
 | 
			
		|||
	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
 | 
			
		||||
	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
 | 
			
		||||
 | 
			
		||||
	if (rc)
 | 
			
		||||
		return -EIO;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
	return rc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			@ -334,6 +334,9 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
 | 
			
		|||
	char *data;
 | 
			
		||||
	int rc;
 | 
			
		||||
 | 
			
		||||
	if (hl_device_disabled_or_in_reset(hdev))
 | 
			
		||||
		return -ENODEV;
 | 
			
		||||
 | 
			
		||||
	if (!max_size)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1,5 +1,5 @@
 | 
			
		|||
# SPDX-License-Identifier: GPL-2.0-only
 | 
			
		||||
subdir-ccflags-y += -I$(src)
 | 
			
		||||
subdir-ccflags-y += -I$(src)/common
 | 
			
		||||
 | 
			
		||||
HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_hwmgr.o gaudi/gaudi_security.o \
 | 
			
		||||
	gaudi/gaudi_coresight.o
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							| 
						 | 
				
			
			@ -10,7 +10,7 @@
 | 
			
		|||
 | 
			
		||||
#include <uapi/misc/habanalabs.h>
 | 
			
		||||
#include "habanalabs.h"
 | 
			
		||||
#include "include/hl_boot_if.h"
 | 
			
		||||
#include "include/common/hl_boot_if.h"
 | 
			
		||||
#include "include/gaudi/gaudi_packets.h"
 | 
			
		||||
#include "include/gaudi/gaudi.h"
 | 
			
		||||
#include "include/gaudi/gaudi_async_events.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -57,6 +57,12 @@
 | 
			
		|||
 | 
			
		||||
#define GAUDI_DEFAULT_CARD_NAME		"HL2000"
 | 
			
		||||
 | 
			
		||||
#define GAUDI_MAX_PENDING_CS		1024
 | 
			
		||||
 | 
			
		||||
#if !IS_MAX_PENDING_CS_VALID(GAUDI_MAX_PENDING_CS)
 | 
			
		||||
#error "GAUDI_MAX_PENDING_CS must be power of 2 and greater than 1"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define PCI_DMA_NUMBER_OF_CHNLS		3
 | 
			
		||||
#define HBM_DMA_NUMBER_OF_CHNLS		5
 | 
			
		||||
#define DMA_NUMBER_OF_CHNLS		(PCI_DMA_NUMBER_OF_CHNLS + \
 | 
			
		||||
| 
						 | 
				
			
			@ -117,14 +123,14 @@
 | 
			
		|||
 | 
			
		||||
/* Internal QMANs PQ sizes */
 | 
			
		||||
 | 
			
		||||
#define MME_QMAN_LENGTH			64
 | 
			
		||||
#define MME_QMAN_LENGTH			1024
 | 
			
		||||
#define MME_QMAN_SIZE_IN_BYTES		(MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
 | 
			
		||||
 | 
			
		||||
#define HBM_DMA_QMAN_LENGTH		64
 | 
			
		||||
#define HBM_DMA_QMAN_LENGTH		1024
 | 
			
		||||
#define HBM_DMA_QMAN_SIZE_IN_BYTES	\
 | 
			
		||||
				(HBM_DMA_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
 | 
			
		||||
 | 
			
		||||
#define TPC_QMAN_LENGTH			64
 | 
			
		||||
#define TPC_QMAN_LENGTH			1024
 | 
			
		||||
#define TPC_QMAN_SIZE_IN_BYTES		(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
 | 
			
		||||
 | 
			
		||||
#define SRAM_USER_BASE_OFFSET  GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START
 | 
			
		||||
| 
						 | 
				
			
			@ -228,7 +234,6 @@ struct gaudi_internal_qman_info {
 | 
			
		|||
 *                      engine.
 | 
			
		||||
 * @multi_msi_mode: whether we are working in multi MSI single MSI mode.
 | 
			
		||||
 *                  Multi MSI is possible only with IOMMU enabled.
 | 
			
		||||
 * @ext_queue_idx: helper index for external queues initialization.
 | 
			
		||||
 * @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
 | 
			
		||||
 *                    8-bit value so use u8.
 | 
			
		||||
 */
 | 
			
		||||
| 
						 | 
				
			
			@ -249,7 +254,6 @@ struct gaudi_device {
 | 
			
		|||
	u32				events_stat_aggregate[GAUDI_EVENT_SIZE];
 | 
			
		||||
	u32				hw_cap_initialized;
 | 
			
		||||
	u8				multi_msi_mode;
 | 
			
		||||
	u8				ext_queue_idx;
 | 
			
		||||
	u8				mmu_cache_inv_pi;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -392,6 +392,7 @@ static int gaudi_config_stm(struct hl_device *hdev,
 | 
			
		|||
{
 | 
			
		||||
	struct hl_debug_params_stm *input;
 | 
			
		||||
	u64 base_reg;
 | 
			
		||||
	u32 frequency;
 | 
			
		||||
	int rc;
 | 
			
		||||
 | 
			
		||||
	if (params->reg_idx >= ARRAY_SIZE(debug_stm_regs)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -420,7 +421,10 @@ static int gaudi_config_stm(struct hl_device *hdev,
 | 
			
		|||
		WREG32(base_reg + 0xE00, lower_32_bits(input->sp_mask));
 | 
			
		||||
		WREG32(base_reg + 0xEF4, input->id);
 | 
			
		||||
		WREG32(base_reg + 0xDF4, 0x80);
 | 
			
		||||
		WREG32(base_reg + 0xE8C, input->frequency);
 | 
			
		||||
		frequency = hdev->asic_prop.psoc_timestamp_frequency;
 | 
			
		||||
		if (frequency == 0)
 | 
			
		||||
			frequency = input->frequency;
 | 
			
		||||
		WREG32(base_reg + 0xE8C, frequency);
 | 
			
		||||
		WREG32(base_reg + 0xE90, 0x7FF);
 | 
			
		||||
 | 
			
		||||
		/* SW-2176 - SW WA for HW bug */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -337,11 +337,19 @@ static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
 | 
			
		|||
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
 | 
			
		||||
static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
 | 
			
		||||
 | 
			
		||||
void goya_get_fixed_properties(struct hl_device *hdev)
 | 
			
		||||
int goya_get_fixed_properties(struct hl_device *hdev)
 | 
			
		||||
{
 | 
			
		||||
	struct asic_fixed_properties *prop = &hdev->asic_prop;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	prop->max_queues = GOYA_QUEUE_ID_SIZE;
 | 
			
		||||
	prop->hw_queues_props = kcalloc(prop->max_queues,
 | 
			
		||||
			sizeof(struct hw_queue_properties),
 | 
			
		||||
			GFP_KERNEL);
 | 
			
		||||
 | 
			
		||||
	if (!prop->hw_queues_props)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
 | 
			
		||||
		prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
 | 
			
		||||
		prop->hw_queues_props[i].driver_only = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -361,9 +369,6 @@ void goya_get_fixed_properties(struct hl_device *hdev)
 | 
			
		|||
		prop->hw_queues_props[i].requires_kernel_cb = 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for (; i < HL_MAX_QUEUES; i++)
 | 
			
		||||
		prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
 | 
			
		||||
 | 
			
		||||
	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
 | 
			
		||||
 | 
			
		||||
	prop->dram_base_address = DRAM_PHYS_BASE;
 | 
			
		||||
| 
						 | 
				
			
			@ -426,6 +431,10 @@ void goya_get_fixed_properties(struct hl_device *hdev)
 | 
			
		|||
 | 
			
		||||
	strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
 | 
			
		||||
		CARD_NAME_MAX_LEN);
 | 
			
		||||
 | 
			
		||||
	prop->max_pending_cs = GOYA_MAX_PENDING_CS;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -456,6 +465,7 @@ static int goya_pci_bars_map(struct hl_device *hdev)
 | 
			
		|||
static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
 | 
			
		||||
{
 | 
			
		||||
	struct goya_device *goya = hdev->asic_specific;
 | 
			
		||||
	struct hl_inbound_pci_region pci_region;
 | 
			
		||||
	u64 old_addr = addr;
 | 
			
		||||
	int rc;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -463,7 +473,10 @@ static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
 | 
			
		|||
		return old_addr;
 | 
			
		||||
 | 
			
		||||
	/* Inbound Region 1 - Bar 4 - Point to DDR */
 | 
			
		||||
	rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr);
 | 
			
		||||
	pci_region.mode = PCI_BAR_MATCH_MODE;
 | 
			
		||||
	pci_region.bar = DDR_BAR_ID;
 | 
			
		||||
	pci_region.addr = addr;
 | 
			
		||||
	rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
 | 
			
		||||
	if (rc)
 | 
			
		||||
		return U64_MAX;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -485,8 +498,35 @@ static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
 | 
			
		|||
 */
 | 
			
		||||
static int goya_init_iatu(struct hl_device *hdev)
 | 
			
		||||
{
 | 
			
		||||
	return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
 | 
			
		||||
				HOST_PHYS_BASE, HOST_PHYS_SIZE);
 | 
			
		||||
	struct hl_inbound_pci_region inbound_region;
 | 
			
		||||
	struct hl_outbound_pci_region outbound_region;
 | 
			
		||||
	int rc;
 | 
			
		||||
 | 
			
		||||
	/* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
 | 
			
		||||
	inbound_region.mode = PCI_BAR_MATCH_MODE;
 | 
			
		||||
	inbound_region.bar = SRAM_CFG_BAR_ID;
 | 
			
		||||
	inbound_region.addr = SRAM_BASE_ADDR;
 | 
			
		||||
	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
 | 
			
		||||
	if (rc)
 | 
			
		||||
		goto done;
 | 
			
		||||
 | 
			
		||||
	/* Inbound Region 1 - Bar 4 - Point to DDR */
 | 
			
		||||
	inbound_region.mode = PCI_BAR_MATCH_MODE;
 | 
			
		||||
	inbound_region.bar = DDR_BAR_ID;
 | 
			
		||||
	inbound_region.addr = DRAM_PHYS_BASE;
 | 
			
		||||
	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
 | 
			
		||||
	if (rc)
 | 
			
		||||
		goto done;
 | 
			
		||||
 | 
			
		||||
	hdev->asic_funcs->set_dma_mask_from_fw(hdev);
 | 
			
		||||
 | 
			
		||||
	/* Outbound Region 0 - Point to Host  */
 | 
			
		||||
	outbound_region.addr = HOST_PHYS_BASE;
 | 
			
		||||
	outbound_region.size = HOST_PHYS_SIZE;
 | 
			
		||||
	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
 | 
			
		||||
 | 
			
		||||
done:
 | 
			
		||||
	return rc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -507,7 +547,11 @@ static int goya_early_init(struct hl_device *hdev)
 | 
			
		|||
	u32 val;
 | 
			
		||||
	int rc;
 | 
			
		||||
 | 
			
		||||
	goya_get_fixed_properties(hdev);
 | 
			
		||||
	rc = goya_get_fixed_properties(hdev);
 | 
			
		||||
	if (rc) {
 | 
			
		||||
		dev_err(hdev->dev, "Failed to get fixed properties\n");
 | 
			
		||||
		return rc;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Check BAR sizes */
 | 
			
		||||
	if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
 | 
			
		||||
| 
						 | 
				
			
			@ -517,7 +561,8 @@ static int goya_early_init(struct hl_device *hdev)
 | 
			
		|||
			(unsigned long long) pci_resource_len(pdev,
 | 
			
		||||
							SRAM_CFG_BAR_ID),
 | 
			
		||||
			CFG_BAR_SIZE);
 | 
			
		||||
		return -ENODEV;
 | 
			
		||||
		rc = -ENODEV;
 | 
			
		||||
		goto free_queue_props;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
 | 
			
		||||
| 
						 | 
				
			
			@ -527,14 +572,15 @@ static int goya_early_init(struct hl_device *hdev)
 | 
			
		|||
			(unsigned long long) pci_resource_len(pdev,
 | 
			
		||||
								MSIX_BAR_ID),
 | 
			
		||||
			MSIX_BAR_SIZE);
 | 
			
		||||
		return -ENODEV;
 | 
			
		||||
		rc = -ENODEV;
 | 
			
		||||
		goto free_queue_props;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
 | 
			
		||||
 | 
			
		||||
	rc = hl_pci_init(hdev);
 | 
			
		||||
	if (rc)
 | 
			
		||||
		return rc;
 | 
			
		||||
		goto free_queue_props;
 | 
			
		||||
 | 
			
		||||
	if (!hdev->pldm) {
 | 
			
		||||
		val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
 | 
			
		||||
| 
						 | 
				
			
			@ -544,6 +590,10 @@ static int goya_early_init(struct hl_device *hdev)
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
 | 
			
		||||
free_queue_props:
 | 
			
		||||
	kfree(hdev->asic_prop.hw_queues_props);
 | 
			
		||||
	return rc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -556,6 +606,7 @@ static int goya_early_init(struct hl_device *hdev)
 | 
			
		|||
 */
 | 
			
		||||
static int goya_early_fini(struct hl_device *hdev)
 | 
			
		||||
{
 | 
			
		||||
	kfree(hdev->asic_prop.hw_queues_props);
 | 
			
		||||
	hl_pci_fini(hdev);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -592,11 +643,36 @@ static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
 | 
			
		|||
static void goya_fetch_psoc_frequency(struct hl_device *hdev)
 | 
			
		||||
{
 | 
			
		||||
	struct asic_fixed_properties *prop = &hdev->asic_prop;
 | 
			
		||||
	u32 trace_freq = 0;
 | 
			
		||||
	u32 pll_clk = 0;
 | 
			
		||||
	u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
 | 
			
		||||
	u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
 | 
			
		||||
	u32 nr = RREG32(mmPSOC_PCI_PLL_NR);
 | 
			
		||||
	u32 nf = RREG32(mmPSOC_PCI_PLL_NF);
 | 
			
		||||
	u32 od = RREG32(mmPSOC_PCI_PLL_OD);
 | 
			
		||||
 | 
			
		||||
	prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
 | 
			
		||||
	prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
 | 
			
		||||
	prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
 | 
			
		||||
	prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
 | 
			
		||||
	if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
 | 
			
		||||
		if (div_sel == DIV_SEL_REF_CLK)
 | 
			
		||||
			trace_freq = PLL_REF_CLK;
 | 
			
		||||
		else
 | 
			
		||||
			trace_freq = PLL_REF_CLK / (div_fctr + 1);
 | 
			
		||||
	} else if (div_sel == DIV_SEL_PLL_CLK ||
 | 
			
		||||
					div_sel == DIV_SEL_DIVIDED_PLL) {
 | 
			
		||||
		pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
 | 
			
		||||
		if (div_sel == DIV_SEL_PLL_CLK)
 | 
			
		||||
			trace_freq = pll_clk;
 | 
			
		||||
		else
 | 
			
		||||
			trace_freq = pll_clk / (div_fctr + 1);
 | 
			
		||||
	} else {
 | 
			
		||||
		dev_warn(hdev->dev,
 | 
			
		||||
			"Received invalid div select value: %d", div_sel);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	prop->psoc_timestamp_frequency = trace_freq;
 | 
			
		||||
	prop->psoc_pci_pll_nr = nr;
 | 
			
		||||
	prop->psoc_pci_pll_nf = nf;
 | 
			
		||||
	prop->psoc_pci_pll_od = od;
 | 
			
		||||
	prop->psoc_pci_pll_div_factor = div_fctr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int goya_late_init(struct hl_device *hdev)
 | 
			
		||||
| 
						 | 
				
			
			@ -2164,29 +2240,15 @@ static void goya_disable_timestamp(struct hl_device *hdev)
 | 
			
		|||
 | 
			
		||||
static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
 | 
			
		||||
{
 | 
			
		||||
	u32 wait_timeout_ms, cpu_timeout_ms;
 | 
			
		||||
	u32 wait_timeout_ms;
 | 
			
		||||
 | 
			
		||||
	dev_info(hdev->dev,
 | 
			
		||||
		"Halting compute engines and disabling interrupts\n");
 | 
			
		||||
 | 
			
		||||
	if (hdev->pldm) {
 | 
			
		||||
	if (hdev->pldm)
 | 
			
		||||
		wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
 | 
			
		||||
		cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
 | 
			
		||||
	} else {
 | 
			
		||||
	else
 | 
			
		||||
		wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
 | 
			
		||||
		cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (hard_reset) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * I don't know what is the state of the CPU so make sure it is
 | 
			
		||||
		 * stopped in any means necessary
 | 
			
		||||
		 */
 | 
			
		||||
		WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
 | 
			
		||||
		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
 | 
			
		||||
			GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
 | 
			
		||||
		msleep(cpu_timeout_ms);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	goya_stop_external_queues(hdev);
 | 
			
		||||
	goya_stop_internal_queues(hdev);
 | 
			
		||||
| 
						 | 
				
			
			@ -2491,14 +2553,26 @@ static int goya_hw_init(struct hl_device *hdev)
 | 
			
		|||
static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
 | 
			
		||||
{
 | 
			
		||||
	struct goya_device *goya = hdev->asic_specific;
 | 
			
		||||
	u32 reset_timeout_ms, status;
 | 
			
		||||
	u32 reset_timeout_ms, cpu_timeout_ms, status;
 | 
			
		||||
 | 
			
		||||
	if (hdev->pldm)
 | 
			
		||||
	if (hdev->pldm) {
 | 
			
		||||
		reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
 | 
			
		||||
	else
 | 
			
		||||
		cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
 | 
			
		||||
	} else {
 | 
			
		||||
		reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
 | 
			
		||||
		cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (hard_reset) {
 | 
			
		||||
		/* I don't know what is the state of the CPU so make sure it is
 | 
			
		||||
		 * stopped in any means necessary
 | 
			
		||||
		 */
 | 
			
		||||
		WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
 | 
			
		||||
		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
 | 
			
		||||
			GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
 | 
			
		||||
 | 
			
		||||
		msleep(cpu_timeout_ms);
 | 
			
		||||
 | 
			
		||||
		goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
 | 
			
		||||
		goya_disable_clk_rlx(hdev);
 | 
			
		||||
		goya_set_pll_refclk(hdev);
 | 
			
		||||
| 
						 | 
				
			
			@ -3697,9 +3771,8 @@ static int goya_parse_cb_mmu(struct hl_device *hdev,
 | 
			
		|||
	parser->patched_cb_size = parser->user_cb_size +
 | 
			
		||||
			sizeof(struct packet_msg_prot) * 2;
 | 
			
		||||
 | 
			
		||||
	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
 | 
			
		||||
				parser->patched_cb_size,
 | 
			
		||||
				&patched_cb_handle, HL_KERNEL_ASID_ID);
 | 
			
		||||
	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
 | 
			
		||||
			&patched_cb_handle, HL_KERNEL_ASID_ID, false);
 | 
			
		||||
 | 
			
		||||
	if (rc) {
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
| 
						 | 
				
			
			@ -3771,9 +3844,8 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
 | 
			
		|||
	if (rc)
 | 
			
		||||
		goto free_userptr;
 | 
			
		||||
 | 
			
		||||
	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
 | 
			
		||||
				parser->patched_cb_size,
 | 
			
		||||
				&patched_cb_handle, HL_KERNEL_ASID_ID);
 | 
			
		||||
	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
 | 
			
		||||
			&patched_cb_handle, HL_KERNEL_ASID_ID, false);
 | 
			
		||||
	if (rc) {
 | 
			
		||||
		dev_err(hdev->dev,
 | 
			
		||||
			"Failed to allocate patched CB for DMA CS %d\n", rc);
 | 
			
		||||
| 
						 | 
				
			
			@ -4619,7 +4691,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
 | 
			
		|||
	lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
 | 
			
		||||
	cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
 | 
			
		||||
						sizeof(struct packet_msg_prot);
 | 
			
		||||
	cb = hl_cb_kernel_create(hdev, cb_size);
 | 
			
		||||
	cb = hl_cb_kernel_create(hdev, cb_size, false);
 | 
			
		||||
	if (!cb)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -5149,21 +5221,16 @@ static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
 | 
			
		|||
	return RREG32(mmHW_STATE);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int goya_ctx_init(struct hl_ctx *ctx)
 | 
			
		||||
{
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
 | 
			
		||||
{
 | 
			
		||||
	return cq_idx;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void goya_ext_queue_init(struct hl_device *hdev, u32 q_idx)
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void goya_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static u32 goya_get_signal_cb_size(struct hl_device *hdev)
 | 
			
		||||
{
 | 
			
		||||
	return 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -5272,13 +5339,12 @@ static const struct hl_asic_funcs goya_funcs = {
 | 
			
		|||
	.rreg = hl_rreg,
 | 
			
		||||
	.wreg = hl_wreg,
 | 
			
		||||
	.halt_coresight = goya_halt_coresight,
 | 
			
		||||
	.ctx_init = goya_ctx_init,
 | 
			
		||||
	.get_clk_rate = goya_get_clk_rate,
 | 
			
		||||
	.get_queue_id_for_cq = goya_get_queue_id_for_cq,
 | 
			
		||||
	.read_device_fw_version = goya_read_device_fw_version,
 | 
			
		||||
	.load_firmware_to_device = goya_load_firmware_to_device,
 | 
			
		||||
	.load_boot_fit_to_device = goya_load_boot_fit_to_device,
 | 
			
		||||
	.ext_queue_init = goya_ext_queue_init,
 | 
			
		||||
	.ext_queue_reset = goya_ext_queue_reset,
 | 
			
		||||
	.get_signal_cb_size = goya_get_signal_cb_size,
 | 
			
		||||
	.get_wait_cb_size = goya_get_wait_cb_size,
 | 
			
		||||
	.gen_signal_cb = goya_gen_signal_cb,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -10,7 +10,7 @@
 | 
			
		|||
 | 
			
		||||
#include <uapi/misc/habanalabs.h>
 | 
			
		||||
#include "habanalabs.h"
 | 
			
		||||
#include "include/hl_boot_if.h"
 | 
			
		||||
#include "include/common/hl_boot_if.h"
 | 
			
		||||
#include "include/goya/goya_packets.h"
 | 
			
		||||
#include "include/goya/goya.h"
 | 
			
		||||
#include "include/goya/goya_async_events.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -31,10 +31,6 @@
 | 
			
		|||
 */
 | 
			
		||||
#define NUMBER_OF_INTERRUPTS		(NUMBER_OF_CMPLT_QUEUES + 1)
 | 
			
		||||
 | 
			
		||||
#if (NUMBER_OF_HW_QUEUES >= HL_MAX_QUEUES)
 | 
			
		||||
#error "Number of H/W queues must be smaller than HL_MAX_QUEUES"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if (NUMBER_OF_INTERRUPTS > GOYA_MSIX_ENTRIES)
 | 
			
		||||
#error "Number of MSIX interrupts must be smaller or equal to GOYA_MSIX_ENTRIES"
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			@ -57,6 +53,12 @@
 | 
			
		|||
 | 
			
		||||
#define GOYA_DEFAULT_CARD_NAME		"HL1000"
 | 
			
		||||
 | 
			
		||||
#define GOYA_MAX_PENDING_CS		64
 | 
			
		||||
 | 
			
		||||
#if !IS_MAX_PENDING_CS_VALID(GOYA_MAX_PENDING_CS)
 | 
			
		||||
#error "GOYA_MAX_PENDING_CS must be power of 2 and greater than 1"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* DRAM Memory Map */
 | 
			
		||||
 | 
			
		||||
#define CPU_FW_IMAGE_SIZE		0x10000000	/* 256MB */
 | 
			
		||||
| 
						 | 
				
			
			@ -164,7 +166,7 @@ struct goya_device {
 | 
			
		|||
	u8		device_cpu_mmu_mappings_done;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void goya_get_fixed_properties(struct hl_device *hdev);
 | 
			
		||||
int goya_get_fixed_properties(struct hl_device *hdev);
 | 
			
		||||
int goya_mmu_init(struct hl_device *hdev);
 | 
			
		||||
void goya_init_dma_qmans(struct hl_device *hdev);
 | 
			
		||||
void goya_init_mme_qmans(struct hl_device *hdev);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -232,6 +232,7 @@ static int goya_config_stm(struct hl_device *hdev,
 | 
			
		|||
{
 | 
			
		||||
	struct hl_debug_params_stm *input;
 | 
			
		||||
	u64 base_reg;
 | 
			
		||||
	u32 frequency;
 | 
			
		||||
	int rc;
 | 
			
		||||
 | 
			
		||||
	if (params->reg_idx >= ARRAY_SIZE(debug_stm_regs)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -264,7 +265,10 @@ static int goya_config_stm(struct hl_device *hdev,
 | 
			
		|||
		WREG32(base_reg + 0xE20, 0xFFFFFFFF);
 | 
			
		||||
		WREG32(base_reg + 0xEF4, input->id);
 | 
			
		||||
		WREG32(base_reg + 0xDF4, 0x80);
 | 
			
		||||
		WREG32(base_reg + 0xE8C, input->frequency);
 | 
			
		||||
		frequency = hdev->asic_prop.psoc_timestamp_frequency;
 | 
			
		||||
		if (frequency == 0)
 | 
			
		||||
			frequency = input->frequency;
 | 
			
		||||
		WREG32(base_reg + 0xE8C, frequency);
 | 
			
		||||
		WREG32(base_reg + 0xE90, 0x7FF);
 | 
			
		||||
		WREG32(base_reg + 0xE80, 0x27 | (input->id << 16));
 | 
			
		||||
	} else {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -19,9 +19,19 @@ struct hl_eq_header {
 | 
			
		|||
	__le32 ctl;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct hl_eq_ecc_data {
 | 
			
		||||
	__le64 ecc_address;
 | 
			
		||||
	__le64 ecc_syndrom;
 | 
			
		||||
	__u8 memory_wrapper_idx;
 | 
			
		||||
	__u8 pad[7];
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct hl_eq_entry {
 | 
			
		||||
	struct hl_eq_header hdr;
 | 
			
		||||
	__le64 data[7];
 | 
			
		||||
	union {
 | 
			
		||||
		struct hl_eq_ecc_data ecc_data;
 | 
			
		||||
		__le64 data[7];
 | 
			
		||||
	};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define HL_EQ_ENTRY_SIZE		sizeof(struct hl_eq_entry)
 | 
			
		||||
| 
						 | 
				
			
			@ -276,6 +286,8 @@ struct armcp_packet {
 | 
			
		|||
		/* For get Armcp info/EEPROM data */
 | 
			
		||||
		__le32 data_max_size;
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	__le32 reserved;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct armcp_unmask_irq_arr_packet {
 | 
			
		||||
| 
						 | 
				
			
			@ -44,6 +44,15 @@
 | 
			
		|||
 *					The NIC FW loading and initialization
 | 
			
		||||
 *					failed. This means NICs are not usable.
 | 
			
		||||
 *
 | 
			
		||||
 * CPU_BOOT_ERR0_SECURITY_NOT_RDY	Chip security initialization has been
 | 
			
		||||
 *					started, but is not ready yet - chip
 | 
			
		||||
 *					cannot be accessed.
 | 
			
		||||
 *
 | 
			
		||||
 * CPU_BOOT_ERR0_SECURITY_FAIL		Security related tasks have failed.
 | 
			
		||||
 *					The tasks are security init (root of
 | 
			
		||||
 *					trust), boot authentication (chain of
 | 
			
		||||
 *					trust), data packets authentication.
 | 
			
		||||
 *
 | 
			
		||||
 * CPU_BOOT_ERR0_ENABLED		Error registers enabled.
 | 
			
		||||
 *					This is a main indication that the
 | 
			
		||||
 *					running FW populates the error
 | 
			
		||||
| 
						 | 
				
			
			@ -57,6 +66,8 @@
 | 
			
		|||
#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED		(1 << 4)
 | 
			
		||||
#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY		(1 << 5)
 | 
			
		||||
#define CPU_BOOT_ERR0_NIC_FW_FAIL		(1 << 6)
 | 
			
		||||
#define CPU_BOOT_ERR0_SECURITY_NOT_RDY		(1 << 7)
 | 
			
		||||
#define CPU_BOOT_ERR0_SECURITY_FAIL		(1 << 8)
 | 
			
		||||
#define CPU_BOOT_ERR0_ENABLED			(1 << 31)
 | 
			
		||||
 | 
			
		||||
enum cpu_boot_status {
 | 
			
		||||
| 
						 | 
				
			
			@ -79,7 +90,10 @@ enum cpu_boot_status {
 | 
			
		|||
	CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */
 | 
			
		||||
	/* Last boot loader progress status, ready to receive commands */
 | 
			
		||||
	CPU_BOOT_STATUS_READY_TO_BOOT = 15,
 | 
			
		||||
	/* Internal Boot finished, ready for boot-fit */
 | 
			
		||||
	CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16,
 | 
			
		||||
	/* Internal Security has been initialized, device can be accessed */
 | 
			
		||||
	CPU_BOOT_STATUS_SECURITY_READY = 17,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum kmd_msg {
 | 
			
		||||
| 
						 | 
				
			
			@ -91,18 +91,16 @@
 | 
			
		|||
 | 
			
		||||
#include "psoc_pci_pll_regs.h"
 | 
			
		||||
#include "psoc_hbm_pll_regs.h"
 | 
			
		||||
#include "psoc_cpu_pll_regs.h"
 | 
			
		||||
 | 
			
		||||
#define GAUDI_ECC_MEM_SEL_OFFSET	0xF18
 | 
			
		||||
#define GAUDI_ECC_ADDRESS_OFFSET	0xF1C
 | 
			
		||||
#define GAUDI_ECC_SYNDROME_OFFSET	0xF20
 | 
			
		||||
#define GAUDI_ECC_SERR0_OFFSET		0xF30
 | 
			
		||||
#define GAUDI_ECC_SERR1_OFFSET		0xF34
 | 
			
		||||
#define GAUDI_ECC_SERR2_OFFSET		0xF38
 | 
			
		||||
#define GAUDI_ECC_SERR3_OFFSET		0xF3C
 | 
			
		||||
#define GAUDI_ECC_DERR0_OFFSET		0xF40
 | 
			
		||||
#define GAUDI_ECC_DERR1_OFFSET		0xF44
 | 
			
		||||
#define GAUDI_ECC_DERR2_OFFSET		0xF48
 | 
			
		||||
#define GAUDI_ECC_DERR3_OFFSET		0xF4C
 | 
			
		||||
#define GAUDI_ECC_MEM_SEL_OFFSET		0xF18
 | 
			
		||||
#define GAUDI_ECC_ADDRESS_OFFSET		0xF1C
 | 
			
		||||
#define GAUDI_ECC_SYNDROME_OFFSET		0xF20
 | 
			
		||||
#define GAUDI_ECC_MEM_INFO_CLR_OFFSET		0xF28
 | 
			
		||||
#define GAUDI_ECC_MEM_INFO_CLR_SERR_MASK	BIT(8)
 | 
			
		||||
#define GAUDI_ECC_MEM_INFO_CLR_DERR_MASK	BIT(9)
 | 
			
		||||
#define GAUDI_ECC_SERR0_OFFSET			0xF30
 | 
			
		||||
#define GAUDI_ECC_DERR0_OFFSET			0xF40
 | 
			
		||||
 | 
			
		||||
#define mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0                     0x492000
 | 
			
		||||
#define mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0               0x494000
 | 
			
		||||
| 
						 | 
				
			
			@ -294,6 +292,7 @@
 | 
			
		|||
 | 
			
		||||
#define mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG                           0xC02000
 | 
			
		||||
 | 
			
		||||
#define mmPCIE_AUX_FLR_CTRL                                          0xC07394
 | 
			
		||||
#define mmPCIE_AUX_DBI                                               0xC07490
 | 
			
		||||
 | 
			
		||||
#endif /* ASIC_REG_GAUDI_REGS_H_ */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,114 @@
 | 
			
		|||
/* SPDX-License-Identifier: GPL-2.0
 | 
			
		||||
 *
 | 
			
		||||
 * Copyright 2016-2018 HabanaLabs, Ltd.
 | 
			
		||||
 * All Rights Reserved.
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/************************************
 | 
			
		||||
 ** This is an auto-generated file **
 | 
			
		||||
 **       DO NOT EDIT BELOW        **
 | 
			
		||||
 ************************************/
 | 
			
		||||
 | 
			
		||||
#ifndef ASIC_REG_PSOC_CPU_PLL_REGS_H_
 | 
			
		||||
#define ASIC_REG_PSOC_CPU_PLL_REGS_H_
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *****************************************
 | 
			
		||||
 *   PSOC_CPU_PLL (Prototype: PLL)
 | 
			
		||||
 *****************************************
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_NR                                            0xC70100
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_NF                                            0xC70104
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_OD                                            0xC70108
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_NB                                            0xC7010C
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_CFG                                           0xC70110
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_LOSE_MASK                                     0xC70120
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_LOCK_INTR                                     0xC70128
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_LOCK_BYPASS                                   0xC7012C
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DATA_CHNG                                     0xC70130
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_RST                                           0xC70134
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_SLIP_WD_CNTR                                  0xC70150
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_FACTOR_0                                  0xC70200
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_FACTOR_1                                  0xC70204
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_FACTOR_2                                  0xC70208
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_FACTOR_3                                  0xC7020C
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_FACTOR_CMD_0                              0xC70220
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_FACTOR_CMD_1                              0xC70224
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_FACTOR_CMD_2                              0xC70228
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_FACTOR_CMD_3                              0xC7022C
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_SEL_0                                     0xC70280
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_SEL_1                                     0xC70284
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_SEL_2                                     0xC70288
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_SEL_3                                     0xC7028C
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_EN_0                                      0xC702A0
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_EN_1                                      0xC702A4
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_EN_2                                      0xC702A8
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_EN_3                                      0xC702AC
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_FACTOR_BUSY_0                             0xC702C0
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_FACTOR_BUSY_1                             0xC702C4
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_FACTOR_BUSY_2                             0xC702C8
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_DIV_FACTOR_BUSY_3                             0xC702CC
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_CLK_GATER                                     0xC70300
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_CLK_RLX_0                                     0xC70310
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_CLK_RLX_1                                     0xC70314
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_CLK_RLX_2                                     0xC70318
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_CLK_RLX_3                                     0xC7031C
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_REF_CNTR_PERIOD                               0xC70400
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_REF_LOW_THRESHOLD                             0xC70410
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_REF_HIGH_THRESHOLD                            0xC70420
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_PLL_NOT_STABLE                                0xC70430
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_FREQ_CALC_EN                                  0xC70440
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_RLX_BITMAP_CFG                                0xC70500
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_RLX_BITMAP_0                                  0xC70510
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_RLX_BITMAP_1                                  0xC70514
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_RLX_BITMAP_2                                  0xC70518
 | 
			
		||||
 | 
			
		||||
#define mmPSOC_CPU_PLL_RLX_BITMAP_3                                  0xC7051C
 | 
			
		||||
 | 
			
		||||
#endif /* ASIC_REG_PSOC_CPU_PLL_REGS_H_ */
 | 
			
		||||
| 
						 | 
				
			
			@ -455,4 +455,7 @@ enum axi_id {
 | 
			
		|||
					QM_ARB_ERR_MSG_EN_CHOISE_WDT_MASK |\
 | 
			
		||||
					QM_ARB_ERR_MSG_EN_AXI_LBW_ERR_MASK)
 | 
			
		||||
 | 
			
		||||
#define PCIE_AUX_FLR_CTRL_HW_CTRL_MASK                               0x1
 | 
			
		||||
#define PCIE_AUX_FLR_CTRL_INT_MASK_MASK                              0x2
 | 
			
		||||
 | 
			
		||||
#endif /* GAUDI_MASKS_H_ */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -85,7 +85,7 @@ struct packet_msg_long {
 | 
			
		|||
};
 | 
			
		||||
 | 
			
		||||
#define GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_SHIFT	0
 | 
			
		||||
#define GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK	0x0000EFFF
 | 
			
		||||
#define GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK	0x00007FFF
 | 
			
		||||
 | 
			
		||||
#define GAUDI_PKT_SHORT_VAL_SOB_MOD_SHIFT	31
 | 
			
		||||
#define GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK	0x80000000
 | 
			
		||||
| 
						 | 
				
			
			@ -141,7 +141,7 @@ struct packet_msg_prot {
 | 
			
		|||
#define GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK	0x00FF0000
 | 
			
		||||
 | 
			
		||||
#define GAUDI_PKT_FENCE_CFG_ID_SHIFT		30
 | 
			
		||||
#define GAUDI_PKT_FENCE_CFG_ID_MASK		0xC000000
 | 
			
		||||
#define GAUDI_PKT_FENCE_CFG_ID_MASK		0xC0000000
 | 
			
		||||
 | 
			
		||||
#define GAUDI_PKT_FENCE_CTL_PRED_SHIFT		0
 | 
			
		||||
#define GAUDI_PKT_FENCE_CTL_PRED_MASK		0x0000001F
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -263,6 +263,7 @@ enum hl_device_status {
 | 
			
		|||
 *                         time the driver was loaded.
 | 
			
		||||
 * HL_INFO_TIME_SYNC     - Retrieve the device's time alongside the host's time
 | 
			
		||||
 *                         for synchronization.
 | 
			
		||||
 * HL_INFO_CS_COUNTERS   - Retrieve command submission counters
 | 
			
		||||
 */
 | 
			
		||||
#define HL_INFO_HW_IP_INFO		0
 | 
			
		||||
#define HL_INFO_HW_EVENTS		1
 | 
			
		||||
| 
						 | 
				
			
			@ -274,6 +275,7 @@ enum hl_device_status {
 | 
			
		|||
#define HL_INFO_CLK_RATE		8
 | 
			
		||||
#define HL_INFO_RESET_COUNT		9
 | 
			
		||||
#define HL_INFO_TIME_SYNC		10
 | 
			
		||||
#define HL_INFO_CS_COUNTERS		11
 | 
			
		||||
 | 
			
		||||
#define HL_INFO_VERSION_MAX_LEN	128
 | 
			
		||||
#define HL_INFO_CARD_NAME_MAX_LEN	16
 | 
			
		||||
| 
						 | 
				
			
			@ -338,6 +340,25 @@ struct hl_info_time_sync {
 | 
			
		|||
	__u64 host_time;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * struct hl_info_cs_counters - command submission counters
 | 
			
		||||
 * @out_of_mem_drop_cnt: dropped due to memory allocation issue
 | 
			
		||||
 * @parsing_drop_cnt: dropped due to error in packet parsing
 | 
			
		||||
 * @queue_full_drop_cnt: dropped due to queue full
 | 
			
		||||
 * @device_in_reset_drop_cnt: dropped due to device in reset
 | 
			
		||||
 */
 | 
			
		||||
struct hl_cs_counters {
 | 
			
		||||
	__u64 out_of_mem_drop_cnt;
 | 
			
		||||
	__u64 parsing_drop_cnt;
 | 
			
		||||
	__u64 queue_full_drop_cnt;
 | 
			
		||||
	__u64 device_in_reset_drop_cnt;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct hl_info_cs_counters {
 | 
			
		||||
	struct hl_cs_counters cs_counters;
 | 
			
		||||
	struct hl_cs_counters ctx_cs_counters;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct hl_info_args {
 | 
			
		||||
	/* Location of relevant struct in userspace */
 | 
			
		||||
	__u64 return_pointer;
 | 
			
		||||
| 
						 | 
				
			
			@ -530,13 +551,13 @@ union hl_wait_cs_args {
 | 
			
		|||
	struct hl_wait_cs_out out;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* Opcode to alloc device memory */
 | 
			
		||||
/* Opcode to allocate device memory */
 | 
			
		||||
#define HL_MEM_OP_ALLOC			0
 | 
			
		||||
/* Opcode to free previously allocated device memory */
 | 
			
		||||
#define HL_MEM_OP_FREE			1
 | 
			
		||||
/* Opcode to map host memory */
 | 
			
		||||
/* Opcode to map host and device memory */
 | 
			
		||||
#define HL_MEM_OP_MAP			2
 | 
			
		||||
/* Opcode to unmap previously mapped host memory */
 | 
			
		||||
/* Opcode to unmap previously mapped host and device memory */
 | 
			
		||||
#define HL_MEM_OP_UNMAP			3
 | 
			
		||||
 | 
			
		||||
/* Memory flags */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue