forked from mirrors/linux
		
	cxl: Prevent adapter reset if an active context exists
This patch prevents resetting the cxl adapter via sysfs in presence of
one or more active cxl_context on it. This protects against an
unrecoverable error caused by PSL owning a dirty cache line even after
reset and host tries to touch the same cache line. In case a force reset
of the card is required irrespective of any active contexts, the int
value -1 can be stored in the 'reset' sysfs attribute of the card.
The patch introduces a new atomic_t member named contexts_num inside
struct cxl that holds the number of active context attached to the card
, which is checked against '0' before proceeding with the reset. To
prevent against a race condition where a context is activated just after
reset check is performed, the contexts_num is atomically set to '-1'
after reset-check to indicate that no more contexts can be activated on
the card anymore.
Before activating a context we atomically test if contexts_num is
non-negative and if so, increment its value by one. In case the value of
contexts_num is negative then it indicates that the card is about to be
reset and context activation is error-ed out at that point.
Fixes: 62fa19d4b4 ("cxl: Add ability to reset the card")
Cc: stable@vger.kernel.org # v4.0+
Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
			
			
This commit is contained in:
		
							parent
							
								
									65bc3ece84
								
							
						
					
					
						commit
						70b565bbdb
					
				
					 9 changed files with 121 additions and 7 deletions
				
			
		| 
						 | 
					@ -220,8 +220,11 @@ What:           /sys/class/cxl/<card>/reset
 | 
				
			||||||
Date:           October 2014
 | 
					Date:           October 2014
 | 
				
			||||||
Contact:        linuxppc-dev@lists.ozlabs.org
 | 
					Contact:        linuxppc-dev@lists.ozlabs.org
 | 
				
			||||||
Description:    write only
 | 
					Description:    write only
 | 
				
			||||||
                Writing 1 will issue a PERST to card which may cause the card
 | 
					                Writing 1 will issue a PERST to card provided there are no
 | 
				
			||||||
                to reload the FPGA depending on load_image_on_perst.
 | 
					                contexts active on any one of the card AFUs. This may cause
 | 
				
			||||||
 | 
					                the card to reload the FPGA depending on load_image_on_perst.
 | 
				
			||||||
 | 
					                Writing -1 will do a force PERST irrespective of any active
 | 
				
			||||||
 | 
					                contexts on the card AFUs.
 | 
				
			||||||
Users:		https://github.com/ibm-capi/libcxl
 | 
					Users:		https://github.com/ibm-capi/libcxl
 | 
				
			||||||
 | 
					
 | 
				
			||||||
What:		/sys/class/cxl/<card>/perst_reloads_same_image (not in a guest)
 | 
					What:		/sys/class/cxl/<card>/perst_reloads_same_image (not in a guest)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -229,6 +229,14 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
 | 
				
			||||||
	if (ctx->status == STARTED)
 | 
						if (ctx->status == STARTED)
 | 
				
			||||||
		goto out; /* already started */
 | 
							goto out; /* already started */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Increment the mapped context count for adapter. This also checks
 | 
				
			||||||
 | 
						 * if adapter_context_lock is taken.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						rc = cxl_adapter_context_get(ctx->afu->adapter);
 | 
				
			||||||
 | 
						if (rc)
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (task) {
 | 
						if (task) {
 | 
				
			||||||
		ctx->pid = get_task_pid(task, PIDTYPE_PID);
 | 
							ctx->pid = get_task_pid(task, PIDTYPE_PID);
 | 
				
			||||||
		ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID);
 | 
							ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID);
 | 
				
			||||||
| 
						 | 
					@ -240,6 +248,7 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
 | 
						if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
 | 
				
			||||||
		put_pid(ctx->pid);
 | 
							put_pid(ctx->pid);
 | 
				
			||||||
 | 
							cxl_adapter_context_put(ctx->afu->adapter);
 | 
				
			||||||
		cxl_ctx_put();
 | 
							cxl_ctx_put();
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -238,6 +238,9 @@ int __detach_context(struct cxl_context *ctx)
 | 
				
			||||||
	put_pid(ctx->glpid);
 | 
						put_pid(ctx->glpid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	cxl_ctx_put();
 | 
						cxl_ctx_put();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Decrease the attached context count on the adapter */
 | 
				
			||||||
 | 
						cxl_adapter_context_put(ctx->afu->adapter);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -618,6 +618,14 @@ struct cxl {
 | 
				
			||||||
	bool perst_select_user;
 | 
						bool perst_select_user;
 | 
				
			||||||
	bool perst_same_image;
 | 
						bool perst_same_image;
 | 
				
			||||||
	bool psl_timebase_synced;
 | 
						bool psl_timebase_synced;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * number of contexts mapped on to this card. Possible values are:
 | 
				
			||||||
 | 
						 * >0: Number of contexts mapped and new one can be mapped.
 | 
				
			||||||
 | 
						 *  0: No active contexts and new ones can be mapped.
 | 
				
			||||||
 | 
						 * -1: No contexts mapped and new ones cannot be mapped.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						atomic_t contexts_num;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int cxl_pci_alloc_one_irq(struct cxl *adapter);
 | 
					int cxl_pci_alloc_one_irq(struct cxl *adapter);
 | 
				
			||||||
| 
						 | 
					@ -944,4 +952,20 @@ bool cxl_pci_is_vphb_device(struct pci_dev *dev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* decode AFU error bits in the PSL register PSL_SERR_An */
 | 
					/* decode AFU error bits in the PSL register PSL_SERR_An */
 | 
				
			||||||
void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr);
 | 
					void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Increments the number of attached contexts on an adapter.
 | 
				
			||||||
 | 
					 * In case an adapter_context_lock is taken the return -EBUSY.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int cxl_adapter_context_get(struct cxl *adapter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Decrements the number of attached contexts on an adapter */
 | 
				
			||||||
 | 
					void cxl_adapter_context_put(struct cxl *adapter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* If no active contexts then prevents contexts from being attached */
 | 
				
			||||||
 | 
					int cxl_adapter_context_lock(struct cxl *adapter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Unlock the contexts-lock if taken. Warn and force unlock otherwise */
 | 
				
			||||||
 | 
					void cxl_adapter_context_unlock(struct cxl *adapter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -205,11 +205,22 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 | 
				
			||||||
	ctx->pid = get_task_pid(current, PIDTYPE_PID);
 | 
						ctx->pid = get_task_pid(current, PIDTYPE_PID);
 | 
				
			||||||
	ctx->glpid = get_task_pid(current->group_leader, PIDTYPE_PID);
 | 
						ctx->glpid = get_task_pid(current->group_leader, PIDTYPE_PID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Increment the mapped context count for adapter. This also checks
 | 
				
			||||||
 | 
						 * if adapter_context_lock is taken.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						rc = cxl_adapter_context_get(ctx->afu->adapter);
 | 
				
			||||||
 | 
						if (rc) {
 | 
				
			||||||
 | 
							afu_release_irqs(ctx, ctx);
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
 | 
						trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
 | 
						if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
 | 
				
			||||||
							amr))) {
 | 
												amr))) {
 | 
				
			||||||
		afu_release_irqs(ctx, ctx);
 | 
							afu_release_irqs(ctx, ctx);
 | 
				
			||||||
 | 
							cxl_adapter_context_put(ctx->afu->adapter);
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1152,6 +1152,9 @@ struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_devic
 | 
				
			||||||
	if ((rc = cxl_sysfs_adapter_add(adapter)))
 | 
						if ((rc = cxl_sysfs_adapter_add(adapter)))
 | 
				
			||||||
		goto err_put1;
 | 
							goto err_put1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* release the context lock as the adapter is configured */
 | 
				
			||||||
 | 
						cxl_adapter_context_unlock(adapter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return adapter;
 | 
						return adapter;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
err_put1:
 | 
					err_put1:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -243,8 +243,10 @@ struct cxl *cxl_alloc_adapter(void)
 | 
				
			||||||
	if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))
 | 
						if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))
 | 
				
			||||||
		goto err2;
 | 
							goto err2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return adapter;
 | 
						/* start with context lock taken */
 | 
				
			||||||
 | 
						atomic_set(&adapter->contexts_num, -1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return adapter;
 | 
				
			||||||
err2:
 | 
					err2:
 | 
				
			||||||
	cxl_remove_adapter_nr(adapter);
 | 
						cxl_remove_adapter_nr(adapter);
 | 
				
			||||||
err1:
 | 
					err1:
 | 
				
			||||||
| 
						 | 
					@ -286,6 +288,44 @@ int cxl_afu_select_best_mode(struct cxl_afu *afu)
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int cxl_adapter_context_get(struct cxl *adapter)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int rc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						rc = atomic_inc_unless_negative(&adapter->contexts_num);
 | 
				
			||||||
 | 
						return rc >= 0 ? 0 : -EBUSY;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void cxl_adapter_context_put(struct cxl *adapter)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						atomic_dec_if_positive(&adapter->contexts_num);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int cxl_adapter_context_lock(struct cxl *adapter)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int rc;
 | 
				
			||||||
 | 
						/* no active contexts -> contexts_num == 0 */
 | 
				
			||||||
 | 
						rc = atomic_cmpxchg(&adapter->contexts_num, 0, -1);
 | 
				
			||||||
 | 
						return rc ? -EBUSY : 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void cxl_adapter_context_unlock(struct cxl *adapter)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int val = atomic_cmpxchg(&adapter->contexts_num, -1, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * contexts lock taken -> contexts_num == -1
 | 
				
			||||||
 | 
						 * If not true then show a warning and force reset the lock.
 | 
				
			||||||
 | 
						 * This will happen when context_unlock was requested without
 | 
				
			||||||
 | 
						 * doing a context_lock.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (val != -1) {
 | 
				
			||||||
 | 
							atomic_set(&adapter->contexts_num, 0);
 | 
				
			||||||
 | 
							WARN(1, "Adapter context unlocked with %d active contexts",
 | 
				
			||||||
 | 
							     val);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int __init init_cxl(void)
 | 
					static int __init init_cxl(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int rc = 0;
 | 
						int rc = 0;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1487,6 +1487,8 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
 | 
				
			||||||
	if ((rc = cxl_native_register_psl_err_irq(adapter)))
 | 
						if ((rc = cxl_native_register_psl_err_irq(adapter)))
 | 
				
			||||||
		goto err;
 | 
							goto err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Release the context lock as adapter is configured */
 | 
				
			||||||
 | 
						cxl_adapter_context_unlock(adapter);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
err:
 | 
					err:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -75,12 +75,31 @@ static ssize_t reset_adapter_store(struct device *device,
 | 
				
			||||||
	int val;
 | 
						int val;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rc = sscanf(buf, "%i", &val);
 | 
						rc = sscanf(buf, "%i", &val);
 | 
				
			||||||
	if ((rc != 1) || (val != 1))
 | 
						if ((rc != 1) || (val != 1 && val != -1))
 | 
				
			||||||
		return -EINVAL;
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if ((rc = cxl_ops->adapter_reset(adapter)))
 | 
						/*
 | 
				
			||||||
		return rc;
 | 
						 * See if we can lock the context mapping that's only allowed
 | 
				
			||||||
	return count;
 | 
						 * when there are no contexts attached to the adapter. Once
 | 
				
			||||||
 | 
						 * taken this will also prevent any context from getting activated.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (val == 1) {
 | 
				
			||||||
 | 
							rc =  cxl_adapter_context_lock(adapter);
 | 
				
			||||||
 | 
							if (rc)
 | 
				
			||||||
 | 
								goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							rc = cxl_ops->adapter_reset(adapter);
 | 
				
			||||||
 | 
							/* In case reset failed release context lock */
 | 
				
			||||||
 | 
							if (rc)
 | 
				
			||||||
 | 
								cxl_adapter_context_unlock(adapter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						} else if (val == -1) {
 | 
				
			||||||
 | 
							/* Perform a forced adapter reset */
 | 
				
			||||||
 | 
							rc = cxl_ops->adapter_reset(adapter);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						return rc ? rc : count;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static ssize_t load_image_on_perst_show(struct device *device,
 | 
					static ssize_t load_image_on_perst_show(struct device *device,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue