mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	drm/i915/gt: Use intel_gt as the primary object for handling resets
Having taken the first step in encapsulating the functionality by moving the related files under gt/, the next step is to start encapsulating by passing around the relevant structs rather than the global drm_i915_private. In this step, we pass intel_gt to intel_reset.c Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190712192953.9187-1-chris@chris-wilson.co.uk
This commit is contained in:
		
							parent
							
								
									139ab81151
								
							
						
					
					
						commit
						cb823ed991
					
				
					 49 changed files with 908 additions and 919 deletions
				
			
		| 
						 | 
				
			
			@ -4271,12 +4271,13 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv)
 | 
			
		|||
		return;
 | 
			
		||||
 | 
			
		||||
	/* We have a modeset vs reset deadlock, defensively unbreak it. */
 | 
			
		||||
	set_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags);
 | 
			
		||||
	wake_up_all(&dev_priv->gpu_error.wait_queue);
 | 
			
		||||
	set_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags);
 | 
			
		||||
	smp_mb__after_atomic();
 | 
			
		||||
	wake_up_bit(&dev_priv->gt.reset.flags, I915_RESET_MODESET);
 | 
			
		||||
 | 
			
		||||
	if (atomic_read(&dev_priv->gpu_error.pending_fb_pin)) {
 | 
			
		||||
		DRM_DEBUG_KMS("Modeset potentially stuck, unbreaking through wedging\n");
 | 
			
		||||
		i915_gem_set_wedged(dev_priv);
 | 
			
		||||
		intel_gt_set_wedged(&dev_priv->gt);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -4322,7 +4323,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 | 
			
		|||
	int ret;
 | 
			
		||||
 | 
			
		||||
	/* reset doesn't touch the display */
 | 
			
		||||
	if (!test_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags))
 | 
			
		||||
	if (!test_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	state = fetch_and_zero(&dev_priv->modeset_restore_state);
 | 
			
		||||
| 
						 | 
				
			
			@ -4362,7 +4363,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 | 
			
		|||
	drm_modeset_acquire_fini(ctx);
 | 
			
		||||
	mutex_unlock(&dev->mode_config.mutex);
 | 
			
		||||
 | 
			
		||||
	clear_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags);
 | 
			
		||||
	clear_bit_unlock(I915_RESET_MODESET, &dev_priv->gt.reset.flags);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void icl_set_pipe_chicken(struct intel_crtc *crtc)
 | 
			
		||||
| 
						 | 
				
			
			@ -13873,18 +13874,21 @@ static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_stat
 | 
			
		|||
	for (;;) {
 | 
			
		||||
		prepare_to_wait(&intel_state->commit_ready.wait,
 | 
			
		||||
				&wait_fence, TASK_UNINTERRUPTIBLE);
 | 
			
		||||
		prepare_to_wait(&dev_priv->gpu_error.wait_queue,
 | 
			
		||||
		prepare_to_wait(bit_waitqueue(&dev_priv->gt.reset.flags,
 | 
			
		||||
					      I915_RESET_MODESET),
 | 
			
		||||
				&wait_reset, TASK_UNINTERRUPTIBLE);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		if (i915_sw_fence_done(&intel_state->commit_ready)
 | 
			
		||||
		    || test_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags))
 | 
			
		||||
		if (i915_sw_fence_done(&intel_state->commit_ready) ||
 | 
			
		||||
		    test_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags))
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		schedule();
 | 
			
		||||
	}
 | 
			
		||||
	finish_wait(&intel_state->commit_ready.wait, &wait_fence);
 | 
			
		||||
	finish_wait(&dev_priv->gpu_error.wait_queue, &wait_reset);
 | 
			
		||||
	finish_wait(bit_waitqueue(&dev_priv->gt.reset.flags,
 | 
			
		||||
				  I915_RESET_MODESET),
 | 
			
		||||
		    &wait_reset);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void intel_atomic_cleanup_work(struct work_struct *work)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2127,7 +2127,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 | 
			
		|||
	if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	ret = i915_terminally_wedged(i915);
 | 
			
		||||
	ret = intel_gt_terminally_wedged(&i915->gt);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		return ret;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2130,7 +2130,7 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 | 
			
		|||
	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
 | 
			
		||||
	 * EIO if the GPU is already wedged.
 | 
			
		||||
	 */
 | 
			
		||||
	err = i915_terminally_wedged(eb->i915);
 | 
			
		||||
	err = intel_gt_terminally_wedged(ce->engine->gt);
 | 
			
		||||
	if (err)
 | 
			
		||||
		return err;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,6 +7,8 @@
 | 
			
		|||
#include <linux/mman.h>
 | 
			
		||||
#include <linux/sizes.h>
 | 
			
		||||
 | 
			
		||||
#include "gt/intel_gt.h"
 | 
			
		||||
 | 
			
		||||
#include "i915_drv.h"
 | 
			
		||||
#include "i915_gem_gtt.h"
 | 
			
		||||
#include "i915_gem_ioctls.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -246,7 +248,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 | 
			
		|||
 | 
			
		||||
	wakeref = intel_runtime_pm_get(rpm);
 | 
			
		||||
 | 
			
		||||
	srcu = i915_reset_trylock(i915);
 | 
			
		||||
	srcu = intel_gt_reset_trylock(ggtt->vm.gt);
 | 
			
		||||
	if (srcu < 0) {
 | 
			
		||||
		ret = srcu;
 | 
			
		||||
		goto err_rpm;
 | 
			
		||||
| 
						 | 
				
			
			@ -326,7 +328,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 | 
			
		|||
err_unlock:
 | 
			
		||||
	mutex_unlock(&dev->struct_mutex);
 | 
			
		||||
err_reset:
 | 
			
		||||
	i915_reset_unlock(i915, srcu);
 | 
			
		||||
	intel_gt_reset_unlock(ggtt->vm.gt, srcu);
 | 
			
		||||
err_rpm:
 | 
			
		||||
	intel_runtime_pm_put(rpm, wakeref);
 | 
			
		||||
	i915_gem_object_unpin_pages(obj);
 | 
			
		||||
| 
						 | 
				
			
			@ -339,7 +341,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 | 
			
		|||
		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
 | 
			
		||||
		 * and so needs to be reported.
 | 
			
		||||
		 */
 | 
			
		||||
		if (!i915_terminally_wedged(i915))
 | 
			
		||||
		if (!intel_gt_is_wedged(ggtt->vm.gt))
 | 
			
		||||
			return VM_FAULT_SIGBUS;
 | 
			
		||||
		/* else: fall through */
 | 
			
		||||
	case -EAGAIN:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,6 +5,7 @@
 | 
			
		|||
 */
 | 
			
		||||
 | 
			
		||||
#include "gem/i915_gem_pm.h"
 | 
			
		||||
#include "gt/intel_gt.h"
 | 
			
		||||
#include "gt/intel_gt_pm.h"
 | 
			
		||||
 | 
			
		||||
#include "i915_drv.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -106,18 +107,18 @@ static int pm_notifier(struct notifier_block *nb,
 | 
			
		|||
	return NOTIFY_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
 | 
			
		||||
static bool switch_to_kernel_context_sync(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	bool result = !i915_terminally_wedged(i915);
 | 
			
		||||
	bool result = !intel_gt_is_wedged(gt);
 | 
			
		||||
 | 
			
		||||
	do {
 | 
			
		||||
		if (i915_gem_wait_for_idle(i915,
 | 
			
		||||
		if (i915_gem_wait_for_idle(gt->i915,
 | 
			
		||||
					   I915_WAIT_LOCKED |
 | 
			
		||||
					   I915_WAIT_FOR_IDLE_BOOST,
 | 
			
		||||
					   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
 | 
			
		||||
			/* XXX hide warning from gem_eio */
 | 
			
		||||
			if (i915_modparams.reset) {
 | 
			
		||||
				dev_err(i915->drm.dev,
 | 
			
		||||
				dev_err(gt->i915->drm.dev,
 | 
			
		||||
					"Failed to idle engines, declaring wedged!\n");
 | 
			
		||||
				GEM_TRACE_DUMP();
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			@ -126,18 +127,18 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
 | 
			
		|||
			 * Forcibly cancel outstanding work and leave
 | 
			
		||||
			 * the gpu quiet.
 | 
			
		||||
			 */
 | 
			
		||||
			i915_gem_set_wedged(i915);
 | 
			
		||||
			intel_gt_set_wedged(gt);
 | 
			
		||||
			result = false;
 | 
			
		||||
		}
 | 
			
		||||
	} while (i915_retire_requests(i915) && result);
 | 
			
		||||
	} while (i915_retire_requests(gt->i915) && result);
 | 
			
		||||
 | 
			
		||||
	GEM_BUG_ON(i915->gt.awake);
 | 
			
		||||
	GEM_BUG_ON(gt->awake);
 | 
			
		||||
	return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool i915_gem_load_power_context(struct drm_i915_private *i915)
 | 
			
		||||
{
 | 
			
		||||
	return switch_to_kernel_context_sync(i915);
 | 
			
		||||
	return switch_to_kernel_context_sync(&i915->gt);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void i915_gem_suspend(struct drm_i915_private *i915)
 | 
			
		||||
| 
						 | 
				
			
			@ -158,7 +159,7 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 | 
			
		|||
	 * state. Fortunately, the kernel_context is disposable and we do
 | 
			
		||||
	 * not rely on its state.
 | 
			
		||||
	 */
 | 
			
		||||
	switch_to_kernel_context_sync(i915);
 | 
			
		||||
	switch_to_kernel_context_sync(&i915->gt);
 | 
			
		||||
 | 
			
		||||
	mutex_unlock(&i915->drm.struct_mutex);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -169,7 +170,7 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 | 
			
		|||
	GEM_BUG_ON(i915->gt.awake);
 | 
			
		||||
	flush_work(&i915->gem.idle_work);
 | 
			
		||||
 | 
			
		||||
	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
 | 
			
		||||
	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
 | 
			
		||||
 | 
			
		||||
	i915_gem_drain_freed_objects(i915);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -277,10 +278,10 @@ void i915_gem_resume(struct drm_i915_private *i915)
 | 
			
		|||
	return;
 | 
			
		||||
 | 
			
		||||
err_wedged:
 | 
			
		||||
	if (!i915_reset_failed(i915)) {
 | 
			
		||||
	if (!intel_gt_is_wedged(&i915->gt)) {
 | 
			
		||||
		dev_err(i915->drm.dev,
 | 
			
		||||
			"Failed to re-initialize GPU, declaring it wedged!\n");
 | 
			
		||||
		i915_gem_set_wedged(i915);
 | 
			
		||||
		intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
	}
 | 
			
		||||
	goto out_unlock;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -41,7 +41,7 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
 | 
			
		|||
	long ret;
 | 
			
		||||
 | 
			
		||||
	/* ABI: return -EIO if already wedged */
 | 
			
		||||
	ret = i915_terminally_wedged(to_i915(dev));
 | 
			
		||||
	ret = intel_gt_terminally_wedged(&to_i915(dev)->gt);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		return ret;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1753,7 +1753,7 @@ int i915_gem_huge_page_mock_selftests(void)
 | 
			
		|||
	return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 | 
			
		||||
int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
 | 
			
		||||
{
 | 
			
		||||
	static const struct i915_subtest tests[] = {
 | 
			
		||||
		SUBTEST(igt_shrink_thp),
 | 
			
		||||
| 
						 | 
				
			
			@ -1768,22 +1768,22 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 | 
			
		|||
	intel_wakeref_t wakeref;
 | 
			
		||||
	int err;
 | 
			
		||||
 | 
			
		||||
	if (!HAS_PPGTT(dev_priv)) {
 | 
			
		||||
	if (!HAS_PPGTT(i915)) {
 | 
			
		||||
		pr_info("PPGTT not supported, skipping live-selftests\n");
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (i915_terminally_wedged(dev_priv))
 | 
			
		||||
	if (intel_gt_is_wedged(&i915->gt))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	file = mock_file(dev_priv);
 | 
			
		||||
	file = mock_file(i915);
 | 
			
		||||
	if (IS_ERR(file))
 | 
			
		||||
		return PTR_ERR(file);
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&dev_priv->drm.struct_mutex);
 | 
			
		||||
	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 | 
			
		||||
	mutex_lock(&i915->drm.struct_mutex);
 | 
			
		||||
	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 | 
			
		||||
 | 
			
		||||
	ctx = live_context(dev_priv, file);
 | 
			
		||||
	ctx = live_context(i915, file);
 | 
			
		||||
	if (IS_ERR(ctx)) {
 | 
			
		||||
		err = PTR_ERR(ctx);
 | 
			
		||||
		goto out_unlock;
 | 
			
		||||
| 
						 | 
				
			
			@ -1795,10 +1795,10 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 | 
			
		|||
	err = i915_subtests(tests, ctx);
 | 
			
		||||
 | 
			
		||||
out_unlock:
 | 
			
		||||
	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 | 
			
		||||
	mutex_unlock(&dev_priv->drm.struct_mutex);
 | 
			
		||||
	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 | 
			
		||||
	mutex_unlock(&i915->drm.struct_mutex);
 | 
			
		||||
 | 
			
		||||
	mock_file_free(dev_priv, file);
 | 
			
		||||
	mock_file_free(i915, file);
 | 
			
		||||
 | 
			
		||||
	return err;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,6 +5,8 @@
 | 
			
		|||
 | 
			
		||||
#include "i915_selftest.h"
 | 
			
		||||
 | 
			
		||||
#include "gt/intel_gt.h"
 | 
			
		||||
 | 
			
		||||
#include "selftests/igt_flush_test.h"
 | 
			
		||||
#include "selftests/mock_drm.h"
 | 
			
		||||
#include "mock_context.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -101,7 +103,7 @@ int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
 | 
			
		|||
		SUBTEST(igt_client_fill),
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	if (i915_terminally_wedged(i915))
 | 
			
		||||
	if (intel_gt_is_wedged(&i915->gt))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	if (!HAS_ENGINE(i915, BCS0))
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -6,6 +6,8 @@
 | 
			
		|||
 | 
			
		||||
#include <linux/prime_numbers.h>
 | 
			
		||||
 | 
			
		||||
#include "gt/intel_gt.h"
 | 
			
		||||
 | 
			
		||||
#include "i915_selftest.h"
 | 
			
		||||
#include "selftests/i915_random.h"
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -242,12 +244,12 @@ static bool always_valid(struct drm_i915_private *i915)
 | 
			
		|||
 | 
			
		||||
static bool needs_fence_registers(struct drm_i915_private *i915)
 | 
			
		||||
{
 | 
			
		||||
	return !i915_terminally_wedged(i915);
 | 
			
		||||
	return !intel_gt_is_wedged(&i915->gt);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool needs_mi_store_dword(struct drm_i915_private *i915)
 | 
			
		||||
{
 | 
			
		||||
	if (i915_terminally_wedged(i915))
 | 
			
		||||
	if (intel_gt_is_wedged(&i915->gt))
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	if (!HAS_ENGINE(i915, RCS0))
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,6 +7,7 @@
 | 
			
		|||
#include <linux/prime_numbers.h>
 | 
			
		||||
 | 
			
		||||
#include "gem/i915_gem_pm.h"
 | 
			
		||||
#include "gt/intel_gt.h"
 | 
			
		||||
#include "gt/intel_reset.h"
 | 
			
		||||
#include "i915_selftest.h"
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -83,7 +84,7 @@ static int live_nop_switch(void *arg)
 | 
			
		|||
		}
 | 
			
		||||
		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 | 
			
		||||
			pr_err("Failed to populated %d contexts\n", nctx);
 | 
			
		||||
			i915_gem_set_wedged(i915);
 | 
			
		||||
			intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
			err = -EIO;
 | 
			
		||||
			goto out_unlock;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -127,7 +128,7 @@ static int live_nop_switch(void *arg)
 | 
			
		|||
			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 | 
			
		||||
				pr_err("Switching between %ld contexts timed out\n",
 | 
			
		||||
				       prime);
 | 
			
		||||
				i915_gem_set_wedged(i915);
 | 
			
		||||
				intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -956,7 +957,7 @@ __sseu_finish(struct drm_i915_private *i915,
 | 
			
		|||
	int ret = 0;
 | 
			
		||||
 | 
			
		||||
	if (flags & TEST_RESET) {
 | 
			
		||||
		ret = i915_reset_engine(ce->engine, "sseu");
 | 
			
		||||
		ret = intel_engine_reset(ce->engine, "sseu");
 | 
			
		||||
		if (ret)
 | 
			
		||||
			goto out;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -1059,7 +1060,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 | 
			
		|||
		return PTR_ERR(file);
 | 
			
		||||
 | 
			
		||||
	if (flags & TEST_RESET)
 | 
			
		||||
		igt_global_reset_lock(i915);
 | 
			
		||||
		igt_global_reset_lock(&i915->gt);
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&i915->drm.struct_mutex);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1120,7 +1121,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 | 
			
		|||
	mutex_unlock(&i915->drm.struct_mutex);
 | 
			
		||||
 | 
			
		||||
	if (flags & TEST_RESET)
 | 
			
		||||
		igt_global_reset_unlock(i915);
 | 
			
		||||
		igt_global_reset_unlock(&i915->gt);
 | 
			
		||||
 | 
			
		||||
	mock_file_free(i915, file);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1722,7 +1723,7 @@ int i915_gem_context_mock_selftests(void)
 | 
			
		|||
	return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 | 
			
		||||
int i915_gem_context_live_selftests(struct drm_i915_private *i915)
 | 
			
		||||
{
 | 
			
		||||
	static const struct i915_subtest tests[] = {
 | 
			
		||||
		SUBTEST(live_nop_switch),
 | 
			
		||||
| 
						 | 
				
			
			@ -1733,8 +1734,8 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 | 
			
		|||
		SUBTEST(igt_vm_isolation),
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	if (i915_terminally_wedged(dev_priv))
 | 
			
		||||
	if (intel_gt_is_wedged(&i915->gt))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	return i915_live_subtests(tests, dev_priv);
 | 
			
		||||
	return i915_live_subtests(tests, i915);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -478,7 +478,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
 | 
			
		|||
 | 
			
		||||
	/* Now fill with busy dead objects that we expect to reap */
 | 
			
		||||
	for (loop = 0; loop < 3; loop++) {
 | 
			
		||||
		if (i915_terminally_wedged(i915))
 | 
			
		||||
		if (intel_gt_is_wedged(&i915->gt))
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,6 +3,8 @@
 | 
			
		|||
 * Copyright © 2019 Intel Corporation
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "gt/intel_gt.h"
 | 
			
		||||
 | 
			
		||||
#include "i915_selftest.h"
 | 
			
		||||
 | 
			
		||||
#include "selftests/igt_flush_test.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -95,7 +97,7 @@ int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
 | 
			
		|||
		SUBTEST(igt_fill_blt),
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	if (i915_terminally_wedged(i915))
 | 
			
		||||
	if (intel_gt_is_wedged(&i915->gt))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	if (!HAS_ENGINE(i915, BCS0))
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -411,8 +411,8 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
 | 
			
		|||
	return cs;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void intel_engine_reset(struct intel_engine_cs *engine,
 | 
			
		||||
				      bool stalled)
 | 
			
		||||
static inline void __intel_engine_reset(struct intel_engine_cs *engine,
 | 
			
		||||
					bool stalled)
 | 
			
		||||
{
 | 
			
		||||
	if (engine->reset.reset)
 | 
			
		||||
		engine->reset.reset(engine, stalled);
 | 
			
		||||
| 
						 | 
				
			
			@ -420,9 +420,9 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine,
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
bool intel_engine_is_idle(struct intel_engine_cs *engine);
 | 
			
		||||
bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 | 
			
		||||
bool intel_engines_are_idle(struct intel_gt *gt);
 | 
			
		||||
 | 
			
		||||
void intel_engines_reset_default_submission(struct drm_i915_private *i915);
 | 
			
		||||
void intel_engines_reset_default_submission(struct intel_gt *gt);
 | 
			
		||||
unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
 | 
			
		||||
 | 
			
		||||
bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1138,7 +1138,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
 | 
			
		|||
bool intel_engine_is_idle(struct intel_engine_cs *engine)
 | 
			
		||||
{
 | 
			
		||||
	/* More white lies, if wedged, hw state is inconsistent */
 | 
			
		||||
	if (i915_reset_failed(engine->i915))
 | 
			
		||||
	if (intel_gt_is_wedged(engine->gt))
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	if (!intel_engine_pm_is_awake(engine))
 | 
			
		||||
| 
						 | 
				
			
			@ -1174,7 +1174,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 | 
			
		|||
	return ring_is_idle(engine);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool intel_engines_are_idle(struct drm_i915_private *i915)
 | 
			
		||||
bool intel_engines_are_idle(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	enum intel_engine_id id;
 | 
			
		||||
| 
						 | 
				
			
			@ -1183,14 +1183,14 @@ bool intel_engines_are_idle(struct drm_i915_private *i915)
 | 
			
		|||
	 * If the driver is wedged, HW state may be very inconsistent and
 | 
			
		||||
	 * report that it is still busy, even though we have stopped using it.
 | 
			
		||||
	 */
 | 
			
		||||
	if (i915_reset_failed(i915))
 | 
			
		||||
	if (intel_gt_is_wedged(gt))
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	/* Already parked (and passed an idleness test); must still be idle */
 | 
			
		||||
	if (!READ_ONCE(i915->gt.awake))
 | 
			
		||||
	if (!READ_ONCE(gt->awake))
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	for_each_engine(engine, i915, id) {
 | 
			
		||||
	for_each_engine(engine, gt->i915, id) {
 | 
			
		||||
		if (!intel_engine_is_idle(engine))
 | 
			
		||||
			return false;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -1198,12 +1198,12 @@ bool intel_engines_are_idle(struct drm_i915_private *i915)
 | 
			
		|||
	return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 | 
			
		||||
void intel_engines_reset_default_submission(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	enum intel_engine_id id;
 | 
			
		||||
 | 
			
		||||
	for_each_engine(engine, i915, id)
 | 
			
		||||
	for_each_engine(engine, gt->i915, id)
 | 
			
		||||
		engine->set_default_submission(engine);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1480,7 +1480,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 | 
			
		|||
		va_end(ap);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (i915_reset_failed(engine->i915))
 | 
			
		||||
	if (intel_gt_is_wedged(engine->gt))
 | 
			
		||||
		drm_printf(m, "*** WEDGED ***\n");
 | 
			
		||||
 | 
			
		||||
	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,6 +8,7 @@
 | 
			
		|||
 | 
			
		||||
#include "intel_engine.h"
 | 
			
		||||
#include "intel_engine_pm.h"
 | 
			
		||||
#include "intel_gt.h"
 | 
			
		||||
#include "intel_gt_pm.h"
 | 
			
		||||
 | 
			
		||||
static int __engine_unpark(struct intel_wakeref *wf)
 | 
			
		||||
| 
						 | 
				
			
			@ -66,7 +67,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 | 
			
		|||
		return true;
 | 
			
		||||
 | 
			
		||||
	/* GPU is pointing to the void, as good as in the kernel context. */
 | 
			
		||||
	if (i915_reset_failed(engine->i915))
 | 
			
		||||
	if (intel_gt_is_wedged(engine->gt))
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -19,6 +19,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 | 
			
		|||
 | 
			
		||||
	spin_lock_init(>->closed_lock);
 | 
			
		||||
 | 
			
		||||
	intel_gt_init_hangcheck(gt);
 | 
			
		||||
	intel_gt_init_reset(gt);
 | 
			
		||||
	intel_gt_pm_init_early(gt);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -241,3 +243,8 @@ void intel_gt_fini_scratch(struct intel_gt *gt)
 | 
			
		|||
{
 | 
			
		||||
	i915_vma_unpin_and_release(>->scratch, 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void intel_gt_cleanup_early(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	intel_gt_fini_reset(gt);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,12 +8,15 @@
 | 
			
		|||
 | 
			
		||||
#include "intel_engine_types.h"
 | 
			
		||||
#include "intel_gt_types.h"
 | 
			
		||||
#include "intel_reset.h"
 | 
			
		||||
 | 
			
		||||
struct drm_i915_private;
 | 
			
		||||
 | 
			
		||||
void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
 | 
			
		||||
void intel_gt_init_hw(struct drm_i915_private *i915);
 | 
			
		||||
 | 
			
		||||
void intel_gt_cleanup_early(struct intel_gt *gt);
 | 
			
		||||
 | 
			
		||||
void intel_gt_check_and_clear_faults(struct intel_gt *gt);
 | 
			
		||||
void intel_gt_clear_error_registers(struct intel_gt *gt,
 | 
			
		||||
				    intel_engine_mask_t engine_mask);
 | 
			
		||||
| 
						 | 
				
			
			@ -21,6 +24,8 @@ void intel_gt_clear_error_registers(struct intel_gt *gt,
 | 
			
		|||
void intel_gt_flush_ggtt_writes(struct intel_gt *gt);
 | 
			
		||||
void intel_gt_chipset_flush(struct intel_gt *gt);
 | 
			
		||||
 | 
			
		||||
void intel_gt_init_hangcheck(struct intel_gt *gt);
 | 
			
		||||
 | 
			
		||||
int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size);
 | 
			
		||||
void intel_gt_fini_scratch(struct intel_gt *gt);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -30,4 +35,11 @@ static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt,
 | 
			
		|||
	return i915_ggtt_offset(gt->scratch) + field;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline bool intel_gt_is_wedged(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	return __intel_reset_failed(>->reset);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void intel_gt_queue_hangcheck(struct intel_gt *gt);
 | 
			
		||||
 | 
			
		||||
#endif /* __INTEL_GT_H__ */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,7 +5,9 @@
 | 
			
		|||
 */
 | 
			
		||||
 | 
			
		||||
#include "i915_drv.h"
 | 
			
		||||
#include "i915_params.h"
 | 
			
		||||
#include "intel_engine_pm.h"
 | 
			
		||||
#include "intel_gt.h"
 | 
			
		||||
#include "intel_gt_pm.h"
 | 
			
		||||
#include "intel_pm.h"
 | 
			
		||||
#include "intel_wakeref.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -17,8 +19,8 @@ static void pm_notify(struct drm_i915_private *i915, int state)
 | 
			
		|||
 | 
			
		||||
static int intel_gt_unpark(struct intel_wakeref *wf)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_i915_private *i915 =
 | 
			
		||||
		container_of(wf, typeof(*i915), gt.wakeref);
 | 
			
		||||
	struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
 | 
			
		||||
	struct drm_i915_private *i915 = gt->i915;
 | 
			
		||||
 | 
			
		||||
	GEM_TRACE("\n");
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -33,8 +35,8 @@ static int intel_gt_unpark(struct intel_wakeref *wf)
 | 
			
		|||
	 * Work around it by grabbing a GT IRQ power domain whilst there is any
 | 
			
		||||
	 * GT activity, preventing any DC state transitions.
 | 
			
		||||
	 */
 | 
			
		||||
	i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
 | 
			
		||||
	GEM_BUG_ON(!i915->gt.awake);
 | 
			
		||||
	gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
 | 
			
		||||
	GEM_BUG_ON(!gt->awake);
 | 
			
		||||
 | 
			
		||||
	intel_enable_gt_powersave(i915);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -44,7 +46,7 @@ static int intel_gt_unpark(struct intel_wakeref *wf)
 | 
			
		|||
 | 
			
		||||
	i915_pmu_gt_unparked(i915);
 | 
			
		||||
 | 
			
		||||
	i915_queue_hangcheck(i915);
 | 
			
		||||
	intel_gt_queue_hangcheck(gt);
 | 
			
		||||
 | 
			
		||||
	pm_notify(i915, INTEL_GT_UNPARK);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -91,12 +93,12 @@ void intel_gt_pm_init_early(struct intel_gt *gt)
 | 
			
		|||
	BLOCKING_INIT_NOTIFIER_HEAD(>->pm_notifications);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool reset_engines(struct drm_i915_private *i915)
 | 
			
		||||
static bool reset_engines(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
 | 
			
		||||
	if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	return intel_gpu_reset(i915, ALL_ENGINES) == 0;
 | 
			
		||||
	return __intel_gt_reset(gt, ALL_ENGINES) == 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			@ -116,11 +118,11 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force)
 | 
			
		|||
 | 
			
		||||
	GEM_TRACE("\n");
 | 
			
		||||
 | 
			
		||||
	if (!reset_engines(gt->i915) && !force)
 | 
			
		||||
	if (!reset_engines(gt) && !force)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	for_each_engine(engine, gt->i915, id)
 | 
			
		||||
		intel_engine_reset(engine, false);
 | 
			
		||||
		__intel_engine_reset(engine, false);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int intel_gt_resume(struct intel_gt *gt)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -14,12 +14,21 @@
 | 
			
		|||
#include <linux/types.h>
 | 
			
		||||
 | 
			
		||||
#include "i915_vma.h"
 | 
			
		||||
#include "intel_reset_types.h"
 | 
			
		||||
#include "intel_wakeref.h"
 | 
			
		||||
 | 
			
		||||
struct drm_i915_private;
 | 
			
		||||
struct i915_ggtt;
 | 
			
		||||
struct intel_uncore;
 | 
			
		||||
 | 
			
		||||
struct intel_hangcheck {
 | 
			
		||||
	/* For hangcheck timer */
 | 
			
		||||
#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
 | 
			
		||||
#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
 | 
			
		||||
 | 
			
		||||
	struct delayed_work work;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct intel_gt {
 | 
			
		||||
	struct drm_i915_private *i915;
 | 
			
		||||
	struct intel_uncore *uncore;
 | 
			
		||||
| 
						 | 
				
			
			@ -41,6 +50,9 @@ struct intel_gt {
 | 
			
		|||
	struct list_head closed_vma;
 | 
			
		||||
	spinlock_t closed_lock; /* guards the list of closed_vma */
 | 
			
		||||
 | 
			
		||||
	struct intel_hangcheck hangcheck;
 | 
			
		||||
	struct intel_reset reset;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Is the GPU currently considered idle, or busy executing
 | 
			
		||||
	 * userspace requests? Whilst idle, we allow runtime power
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -22,8 +22,10 @@
 | 
			
		|||
 *
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "intel_reset.h"
 | 
			
		||||
#include "i915_drv.h"
 | 
			
		||||
#include "intel_engine.h"
 | 
			
		||||
#include "intel_gt.h"
 | 
			
		||||
#include "intel_reset.h"
 | 
			
		||||
 | 
			
		||||
struct hangcheck {
 | 
			
		||||
	u64 acthd;
 | 
			
		||||
| 
						 | 
				
			
			@ -100,7 +102,6 @@ head_stuck(struct intel_engine_cs *engine, u64 acthd)
 | 
			
		|||
static enum intel_engine_hangcheck_action
 | 
			
		||||
engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_i915_private *dev_priv = engine->i915;
 | 
			
		||||
	enum intel_engine_hangcheck_action ha;
 | 
			
		||||
	u32 tmp;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -108,7 +109,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 | 
			
		|||
	if (ha != ENGINE_DEAD)
 | 
			
		||||
		return ha;
 | 
			
		||||
 | 
			
		||||
	if (IS_GEN(dev_priv, 2))
 | 
			
		||||
	if (IS_GEN(engine->i915, 2))
 | 
			
		||||
		return ENGINE_DEAD;
 | 
			
		||||
 | 
			
		||||
	/* Is the chip hanging on a WAIT_FOR_EVENT?
 | 
			
		||||
| 
						 | 
				
			
			@ -118,8 +119,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 | 
			
		|||
	 */
 | 
			
		||||
	tmp = ENGINE_READ(engine, RING_CTL);
 | 
			
		||||
	if (tmp & RING_WAIT) {
 | 
			
		||||
		i915_handle_error(dev_priv, engine->mask, 0,
 | 
			
		||||
				  "stuck wait on %s", engine->name);
 | 
			
		||||
		intel_gt_handle_error(engine->gt, engine->mask, 0,
 | 
			
		||||
				      "stuck wait on %s", engine->name);
 | 
			
		||||
		ENGINE_WRITE(engine, RING_CTL, tmp);
 | 
			
		||||
		return ENGINE_WAIT_KICK;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -219,7 +220,7 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
 | 
			
		|||
				 I915_ENGINE_WEDGED_TIMEOUT);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void hangcheck_declare_hang(struct drm_i915_private *i915,
 | 
			
		||||
static void hangcheck_declare_hang(struct intel_gt *gt,
 | 
			
		||||
				   intel_engine_mask_t hung,
 | 
			
		||||
				   intel_engine_mask_t stuck)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -235,12 +236,12 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915,
 | 
			
		|||
		hung &= ~stuck;
 | 
			
		||||
	len = scnprintf(msg, sizeof(msg),
 | 
			
		||||
			"%s on ", stuck == hung ? "no progress" : "hang");
 | 
			
		||||
	for_each_engine_masked(engine, i915, hung, tmp)
 | 
			
		||||
	for_each_engine_masked(engine, gt->i915, hung, tmp)
 | 
			
		||||
		len += scnprintf(msg + len, sizeof(msg) - len,
 | 
			
		||||
				 "%s, ", engine->name);
 | 
			
		||||
	msg[len-2] = '\0';
 | 
			
		||||
 | 
			
		||||
	return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg);
 | 
			
		||||
	return intel_gt_handle_error(gt, hung, I915_ERROR_CAPTURE, "%s", msg);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -251,11 +252,10 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915,
 | 
			
		|||
 * we kick the ring. If we see no progress on three subsequent calls
 | 
			
		||||
 * we assume chip is wedged and try to fix it by resetting the chip.
 | 
			
		||||
 */
 | 
			
		||||
static void i915_hangcheck_elapsed(struct work_struct *work)
 | 
			
		||||
static void hangcheck_elapsed(struct work_struct *work)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_i915_private *dev_priv =
 | 
			
		||||
		container_of(work, typeof(*dev_priv),
 | 
			
		||||
			     gpu_error.hangcheck_work.work);
 | 
			
		||||
	struct intel_gt *gt =
 | 
			
		||||
		container_of(work, typeof(*gt), hangcheck.work.work);
 | 
			
		||||
	intel_engine_mask_t hung = 0, stuck = 0, wedged = 0;
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	enum intel_engine_id id;
 | 
			
		||||
| 
						 | 
				
			
			@ -264,13 +264,13 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 | 
			
		|||
	if (!i915_modparams.enable_hangcheck)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (!READ_ONCE(dev_priv->gt.awake))
 | 
			
		||||
	if (!READ_ONCE(gt->awake))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (i915_terminally_wedged(dev_priv))
 | 
			
		||||
	if (intel_gt_is_wedged(gt))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm);
 | 
			
		||||
	wakeref = intel_runtime_pm_get_if_in_use(>->i915->runtime_pm);
 | 
			
		||||
	if (!wakeref)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -278,9 +278,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 | 
			
		|||
	 * periodically arm the mmio checker to see if we are triggering
 | 
			
		||||
	 * any invalid access.
 | 
			
		||||
	 */
 | 
			
		||||
	intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore);
 | 
			
		||||
	intel_uncore_arm_unclaimed_mmio_detection(gt->uncore);
 | 
			
		||||
 | 
			
		||||
	for_each_engine(engine, dev_priv, id) {
 | 
			
		||||
	for_each_engine(engine, gt->i915, id) {
 | 
			
		||||
		struct hangcheck hc;
 | 
			
		||||
 | 
			
		||||
		intel_engine_signal_breadcrumbs(engine);
 | 
			
		||||
| 
						 | 
				
			
			@ -302,7 +302,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 | 
			
		|||
	if (GEM_SHOW_DEBUG() && (hung | stuck)) {
 | 
			
		||||
		struct drm_printer p = drm_debug_printer("hangcheck");
 | 
			
		||||
 | 
			
		||||
		for_each_engine(engine, dev_priv, id) {
 | 
			
		||||
		for_each_engine(engine, gt->i915, id) {
 | 
			
		||||
			if (intel_engine_is_idle(engine))
 | 
			
		||||
				continue;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -311,20 +311,37 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	if (wedged) {
 | 
			
		||||
		dev_err(dev_priv->drm.dev,
 | 
			
		||||
		dev_err(gt->i915->drm.dev,
 | 
			
		||||
			"GPU recovery timed out,"
 | 
			
		||||
			" cancelling all in-flight rendering.\n");
 | 
			
		||||
		GEM_TRACE_DUMP();
 | 
			
		||||
		i915_gem_set_wedged(dev_priv);
 | 
			
		||||
		intel_gt_set_wedged(gt);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (hung)
 | 
			
		||||
		hangcheck_declare_hang(dev_priv, hung, stuck);
 | 
			
		||||
		hangcheck_declare_hang(gt, hung, stuck);
 | 
			
		||||
 | 
			
		||||
	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 | 
			
		||||
	intel_runtime_pm_put(>->i915->runtime_pm, wakeref);
 | 
			
		||||
 | 
			
		||||
	/* Reset timer in case GPU hangs without another request being added */
 | 
			
		||||
	i915_queue_hangcheck(dev_priv);
 | 
			
		||||
	intel_gt_queue_hangcheck(gt);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void intel_gt_queue_hangcheck(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long delay;
 | 
			
		||||
 | 
			
		||||
	if (unlikely(!i915_modparams.enable_hangcheck))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Don't continually defer the hangcheck so that it is always run at
 | 
			
		||||
	 * least once after work has been scheduled on any ring. Otherwise,
 | 
			
		||||
	 * we will ignore a hung ring if a second ring is kept busy.
 | 
			
		||||
	 */
 | 
			
		||||
 | 
			
		||||
	delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES);
 | 
			
		||||
	queue_delayed_work(system_long_wq, >->hangcheck.work, delay);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
 | 
			
		||||
| 
						 | 
				
			
			@ -333,10 +350,9 @@ void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
 | 
			
		|||
	engine->hangcheck.action_timestamp = jiffies;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void intel_hangcheck_init(struct drm_i915_private *i915)
 | 
			
		||||
void intel_gt_init_hangcheck(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work,
 | 
			
		||||
			  i915_hangcheck_elapsed);
 | 
			
		||||
	INIT_DELAYED_WORK(>->hangcheck.work, hangcheck_elapsed);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2293,7 +2293,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 | 
			
		|||
	 * and have to at least restore the RING register in the context
 | 
			
		||||
	 * image back to the expected values to skip over the guilty request.
 | 
			
		||||
	 */
 | 
			
		||||
	i915_reset_request(rq, stalled);
 | 
			
		||||
	__i915_request_reset(rq, stalled);
 | 
			
		||||
	if (!stalled)
 | 
			
		||||
		goto out_replay;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -114,7 +114,7 @@ static void context_mark_innocent(struct i915_gem_context *ctx)
 | 
			
		|||
	atomic_inc(&ctx->active_count);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void i915_reset_request(struct i915_request *rq, bool guilty)
 | 
			
		||||
void __i915_request_reset(struct i915_request *rq, bool guilty)
 | 
			
		||||
{
 | 
			
		||||
	GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n",
 | 
			
		||||
		  rq->engine->name,
 | 
			
		||||
| 
						 | 
				
			
			@ -164,16 +164,15 @@ static void gen3_stop_engine(struct intel_engine_cs *engine)
 | 
			
		|||
			  intel_uncore_read_fw(uncore, RING_HEAD(base)));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void i915_stop_engines(struct drm_i915_private *i915,
 | 
			
		||||
			      intel_engine_mask_t engine_mask)
 | 
			
		||||
static void stop_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask)
 | 
			
		||||
{
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	intel_engine_mask_t tmp;
 | 
			
		||||
 | 
			
		||||
	if (INTEL_GEN(i915) < 3)
 | 
			
		||||
	if (INTEL_GEN(gt->i915) < 3)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	for_each_engine_masked(engine, i915, engine_mask, tmp)
 | 
			
		||||
	for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
 | 
			
		||||
		gen3_stop_engine(engine);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -185,11 +184,11 @@ static bool i915_in_reset(struct pci_dev *pdev)
 | 
			
		|||
	return gdrst & GRDOM_RESET_STATUS;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int i915_do_reset(struct drm_i915_private *i915,
 | 
			
		||||
static int i915_do_reset(struct intel_gt *gt,
 | 
			
		||||
			 intel_engine_mask_t engine_mask,
 | 
			
		||||
			 unsigned int retry)
 | 
			
		||||
{
 | 
			
		||||
	struct pci_dev *pdev = i915->drm.pdev;
 | 
			
		||||
	struct pci_dev *pdev = gt->i915->drm.pdev;
 | 
			
		||||
	int err;
 | 
			
		||||
 | 
			
		||||
	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
 | 
			
		||||
| 
						 | 
				
			
			@ -214,22 +213,22 @@ static bool g4x_reset_complete(struct pci_dev *pdev)
 | 
			
		|||
	return (gdrst & GRDOM_RESET_ENABLE) == 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int g33_do_reset(struct drm_i915_private *i915,
 | 
			
		||||
static int g33_do_reset(struct intel_gt *gt,
 | 
			
		||||
			intel_engine_mask_t engine_mask,
 | 
			
		||||
			unsigned int retry)
 | 
			
		||||
{
 | 
			
		||||
	struct pci_dev *pdev = i915->drm.pdev;
 | 
			
		||||
	struct pci_dev *pdev = gt->i915->drm.pdev;
 | 
			
		||||
 | 
			
		||||
	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
 | 
			
		||||
	return wait_for_atomic(g4x_reset_complete(pdev), 50);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int g4x_do_reset(struct drm_i915_private *i915,
 | 
			
		||||
static int g4x_do_reset(struct intel_gt *gt,
 | 
			
		||||
			intel_engine_mask_t engine_mask,
 | 
			
		||||
			unsigned int retry)
 | 
			
		||||
{
 | 
			
		||||
	struct pci_dev *pdev = i915->drm.pdev;
 | 
			
		||||
	struct intel_uncore *uncore = &i915->uncore;
 | 
			
		||||
	struct pci_dev *pdev = gt->i915->drm.pdev;
 | 
			
		||||
	struct intel_uncore *uncore = gt->uncore;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
 | 
			
		||||
| 
						 | 
				
			
			@ -261,11 +260,11 @@ static int g4x_do_reset(struct drm_i915_private *i915,
 | 
			
		|||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int ironlake_do_reset(struct drm_i915_private *i915,
 | 
			
		||||
static int ironlake_do_reset(struct intel_gt *gt,
 | 
			
		||||
			     intel_engine_mask_t engine_mask,
 | 
			
		||||
			     unsigned int retry)
 | 
			
		||||
{
 | 
			
		||||
	struct intel_uncore *uncore = &i915->uncore;
 | 
			
		||||
	struct intel_uncore *uncore = gt->uncore;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	intel_uncore_write_fw(uncore, ILK_GDSR,
 | 
			
		||||
| 
						 | 
				
			
			@ -297,10 +296,9 @@ static int ironlake_do_reset(struct drm_i915_private *i915,
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
 | 
			
		||||
static int gen6_hw_domain_reset(struct drm_i915_private *i915,
 | 
			
		||||
				u32 hw_domain_mask)
 | 
			
		||||
static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
 | 
			
		||||
{
 | 
			
		||||
	struct intel_uncore *uncore = &i915->uncore;
 | 
			
		||||
	struct intel_uncore *uncore = gt->uncore;
 | 
			
		||||
	int err;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -322,7 +320,7 @@ static int gen6_hw_domain_reset(struct drm_i915_private *i915,
 | 
			
		|||
	return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int gen6_reset_engines(struct drm_i915_private *i915,
 | 
			
		||||
static int gen6_reset_engines(struct intel_gt *gt,
 | 
			
		||||
			      intel_engine_mask_t engine_mask,
 | 
			
		||||
			      unsigned int retry)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -342,13 +340,13 @@ static int gen6_reset_engines(struct drm_i915_private *i915,
 | 
			
		|||
		intel_engine_mask_t tmp;
 | 
			
		||||
 | 
			
		||||
		hw_mask = 0;
 | 
			
		||||
		for_each_engine_masked(engine, i915, engine_mask, tmp) {
 | 
			
		||||
		for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
 | 
			
		||||
			GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
 | 
			
		||||
			hw_mask |= hw_engine_mask[engine->id];
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return gen6_hw_domain_reset(i915, hw_mask);
 | 
			
		||||
	return gen6_hw_domain_reset(gt, hw_mask);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
 | 
			
		||||
| 
						 | 
				
			
			@ -446,7 +444,7 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
 | 
			
		|||
	rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int gen11_reset_engines(struct drm_i915_private *i915,
 | 
			
		||||
static int gen11_reset_engines(struct intel_gt *gt,
 | 
			
		||||
			       intel_engine_mask_t engine_mask,
 | 
			
		||||
			       unsigned int retry)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -469,17 +467,17 @@ static int gen11_reset_engines(struct drm_i915_private *i915,
 | 
			
		|||
		hw_mask = GEN11_GRDOM_FULL;
 | 
			
		||||
	} else {
 | 
			
		||||
		hw_mask = 0;
 | 
			
		||||
		for_each_engine_masked(engine, i915, engine_mask, tmp) {
 | 
			
		||||
		for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
 | 
			
		||||
			GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
 | 
			
		||||
			hw_mask |= hw_engine_mask[engine->id];
 | 
			
		||||
			hw_mask |= gen11_lock_sfc(engine);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ret = gen6_hw_domain_reset(i915, hw_mask);
 | 
			
		||||
	ret = gen6_hw_domain_reset(gt, hw_mask);
 | 
			
		||||
 | 
			
		||||
	if (engine_mask != ALL_ENGINES)
 | 
			
		||||
		for_each_engine_masked(engine, i915, engine_mask, tmp)
 | 
			
		||||
		for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
 | 
			
		||||
			gen11_unlock_sfc(engine);
 | 
			
		||||
 | 
			
		||||
	return ret;
 | 
			
		||||
| 
						 | 
				
			
			@ -529,7 +527,7 @@ static void gen8_engine_reset_cancel(struct intel_engine_cs *engine)
 | 
			
		|||
			      _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int gen8_reset_engines(struct drm_i915_private *i915,
 | 
			
		||||
static int gen8_reset_engines(struct intel_gt *gt,
 | 
			
		||||
			      intel_engine_mask_t engine_mask,
 | 
			
		||||
			      unsigned int retry)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -538,7 +536,7 @@ static int gen8_reset_engines(struct drm_i915_private *i915,
 | 
			
		|||
	intel_engine_mask_t tmp;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	for_each_engine_masked(engine, i915, engine_mask, tmp) {
 | 
			
		||||
	for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
 | 
			
		||||
		ret = gen8_engine_reset_prepare(engine);
 | 
			
		||||
		if (ret && !reset_non_ready)
 | 
			
		||||
			goto skip_reset;
 | 
			
		||||
| 
						 | 
				
			
			@ -554,23 +552,23 @@ static int gen8_reset_engines(struct drm_i915_private *i915,
 | 
			
		|||
		 * We rather take context corruption instead of
 | 
			
		||||
		 * failed reset with a wedged driver/gpu. And
 | 
			
		||||
		 * active bb execution case should be covered by
 | 
			
		||||
		 * i915_stop_engines we have before the reset.
 | 
			
		||||
		 * stop_engines() we have before the reset.
 | 
			
		||||
		 */
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (INTEL_GEN(i915) >= 11)
 | 
			
		||||
		ret = gen11_reset_engines(i915, engine_mask, retry);
 | 
			
		||||
	if (INTEL_GEN(gt->i915) >= 11)
 | 
			
		||||
		ret = gen11_reset_engines(gt, engine_mask, retry);
 | 
			
		||||
	else
 | 
			
		||||
		ret = gen6_reset_engines(i915, engine_mask, retry);
 | 
			
		||||
		ret = gen6_reset_engines(gt, engine_mask, retry);
 | 
			
		||||
 | 
			
		||||
skip_reset:
 | 
			
		||||
	for_each_engine_masked(engine, i915, engine_mask, tmp)
 | 
			
		||||
	for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
 | 
			
		||||
		gen8_engine_reset_cancel(engine);
 | 
			
		||||
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
typedef int (*reset_func)(struct drm_i915_private *,
 | 
			
		||||
typedef int (*reset_func)(struct intel_gt *,
 | 
			
		||||
			  intel_engine_mask_t engine_mask,
 | 
			
		||||
			  unsigned int retry);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -592,15 +590,14 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
 | 
			
		|||
		return NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int intel_gpu_reset(struct drm_i915_private *i915,
 | 
			
		||||
		    intel_engine_mask_t engine_mask)
 | 
			
		||||
int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
 | 
			
		||||
{
 | 
			
		||||
	const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
 | 
			
		||||
	reset_func reset;
 | 
			
		||||
	int ret = -ETIMEDOUT;
 | 
			
		||||
	int retry;
 | 
			
		||||
 | 
			
		||||
	reset = intel_get_gpu_reset(i915);
 | 
			
		||||
	reset = intel_get_gpu_reset(gt->i915);
 | 
			
		||||
	if (!reset)
 | 
			
		||||
		return -ENODEV;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -608,7 +605,7 @@ int intel_gpu_reset(struct drm_i915_private *i915,
 | 
			
		|||
	 * If the power well sleeps during the reset, the reset
 | 
			
		||||
	 * request may be dropped and never completes (causing -EIO).
 | 
			
		||||
	 */
 | 
			
		||||
	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
 | 
			
		||||
	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 | 
			
		||||
	for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * We stop engines, otherwise we might get failed reset and a
 | 
			
		||||
| 
						 | 
				
			
			@ -625,14 +622,14 @@ int intel_gpu_reset(struct drm_i915_private *i915,
 | 
			
		|||
		 * FIXME: Wa for more modern gens needs to be validated
 | 
			
		||||
		 */
 | 
			
		||||
		if (retry)
 | 
			
		||||
			i915_stop_engines(i915, engine_mask);
 | 
			
		||||
			stop_engines(gt, engine_mask);
 | 
			
		||||
 | 
			
		||||
		GEM_TRACE("engine_mask=%x\n", engine_mask);
 | 
			
		||||
		preempt_disable();
 | 
			
		||||
		ret = reset(i915, engine_mask, retry);
 | 
			
		||||
		ret = reset(gt, engine_mask, retry);
 | 
			
		||||
		preempt_enable();
 | 
			
		||||
	}
 | 
			
		||||
	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
 | 
			
		||||
	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
 | 
			
		||||
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -650,17 +647,17 @@ bool intel_has_reset_engine(struct drm_i915_private *i915)
 | 
			
		|||
	return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int intel_reset_guc(struct drm_i915_private *i915)
 | 
			
		||||
int intel_reset_guc(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	u32 guc_domain =
 | 
			
		||||
		INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
 | 
			
		||||
		INTEL_GEN(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	GEM_BUG_ON(!HAS_GUC(i915));
 | 
			
		||||
	GEM_BUG_ON(!HAS_GUC(gt->i915));
 | 
			
		||||
 | 
			
		||||
	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
 | 
			
		||||
	ret = gen6_hw_domain_reset(i915, guc_domain);
 | 
			
		||||
	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
 | 
			
		||||
	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 | 
			
		||||
	ret = gen6_hw_domain_reset(gt, guc_domain);
 | 
			
		||||
	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
 | 
			
		||||
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -682,56 +679,55 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
 | 
			
		|||
	engine->reset.prepare(engine);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void revoke_mmaps(struct drm_i915_private *i915)
 | 
			
		||||
static void revoke_mmaps(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < i915->ggtt.num_fences; i++) {
 | 
			
		||||
	for (i = 0; i < gt->ggtt->num_fences; i++) {
 | 
			
		||||
		struct drm_vma_offset_node *node;
 | 
			
		||||
		struct i915_vma *vma;
 | 
			
		||||
		u64 vma_offset;
 | 
			
		||||
 | 
			
		||||
		vma = READ_ONCE(i915->ggtt.fence_regs[i].vma);
 | 
			
		||||
		vma = READ_ONCE(gt->ggtt->fence_regs[i].vma);
 | 
			
		||||
		if (!vma)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		if (!i915_vma_has_userfault(vma))
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		GEM_BUG_ON(vma->fence != &i915->ggtt.fence_regs[i]);
 | 
			
		||||
		GEM_BUG_ON(vma->fence != >->ggtt->fence_regs[i]);
 | 
			
		||||
		node = &vma->obj->base.vma_node;
 | 
			
		||||
		vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
 | 
			
		||||
		unmap_mapping_range(i915->drm.anon_inode->i_mapping,
 | 
			
		||||
		unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping,
 | 
			
		||||
				    drm_vma_node_offset_addr(node) + vma_offset,
 | 
			
		||||
				    vma->size,
 | 
			
		||||
				    1);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static intel_engine_mask_t reset_prepare(struct drm_i915_private *i915)
 | 
			
		||||
static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	intel_engine_mask_t awake = 0;
 | 
			
		||||
	enum intel_engine_id id;
 | 
			
		||||
 | 
			
		||||
	for_each_engine(engine, i915, id) {
 | 
			
		||||
	for_each_engine(engine, gt->i915, id) {
 | 
			
		||||
		if (intel_engine_pm_get_if_awake(engine))
 | 
			
		||||
			awake |= engine->mask;
 | 
			
		||||
		reset_prepare_engine(engine);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	intel_uc_reset_prepare(i915);
 | 
			
		||||
	intel_uc_reset_prepare(gt->i915);
 | 
			
		||||
 | 
			
		||||
	return awake;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gt_revoke(struct drm_i915_private *i915)
 | 
			
		||||
static void gt_revoke(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	revoke_mmaps(i915);
 | 
			
		||||
	revoke_mmaps(gt);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int gt_reset(struct drm_i915_private *i915,
 | 
			
		||||
		    intel_engine_mask_t stalled_mask)
 | 
			
		||||
static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
 | 
			
		||||
{
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	enum intel_engine_id id;
 | 
			
		||||
| 
						 | 
				
			
			@ -741,14 +737,14 @@ static int gt_reset(struct drm_i915_private *i915,
 | 
			
		|||
	 * Everything depends on having the GTT running, so we need to start
 | 
			
		||||
	 * there.
 | 
			
		||||
	 */
 | 
			
		||||
	err = i915_ggtt_enable_hw(i915);
 | 
			
		||||
	err = i915_ggtt_enable_hw(gt->i915);
 | 
			
		||||
	if (err)
 | 
			
		||||
		return err;
 | 
			
		||||
 | 
			
		||||
	for_each_engine(engine, i915, id)
 | 
			
		||||
		intel_engine_reset(engine, stalled_mask & engine->mask);
 | 
			
		||||
	for_each_engine(engine, gt->i915, id)
 | 
			
		||||
		__intel_engine_reset(engine, stalled_mask & engine->mask);
 | 
			
		||||
 | 
			
		||||
	i915_gem_restore_fences(i915);
 | 
			
		||||
	i915_gem_restore_fences(gt->i915);
 | 
			
		||||
 | 
			
		||||
	return err;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -761,13 +757,12 @@ static void reset_finish_engine(struct intel_engine_cs *engine)
 | 
			
		|||
	intel_engine_signal_breadcrumbs(engine);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void reset_finish(struct drm_i915_private *i915,
 | 
			
		||||
			 intel_engine_mask_t awake)
 | 
			
		||||
static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
 | 
			
		||||
{
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	enum intel_engine_id id;
 | 
			
		||||
 | 
			
		||||
	for_each_engine(engine, i915, id) {
 | 
			
		||||
	for_each_engine(engine, gt->i915, id) {
 | 
			
		||||
		reset_finish_engine(engine);
 | 
			
		||||
		if (awake & engine->mask)
 | 
			
		||||
			intel_engine_pm_put(engine);
 | 
			
		||||
| 
						 | 
				
			
			@ -791,20 +786,19 @@ static void nop_submit_request(struct i915_request *request)
 | 
			
		|||
	intel_engine_queue_breadcrumbs(engine);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void __i915_gem_set_wedged(struct drm_i915_private *i915)
 | 
			
		||||
static void __intel_gt_set_wedged(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	struct i915_gpu_error *error = &i915->gpu_error;
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	intel_engine_mask_t awake;
 | 
			
		||||
	enum intel_engine_id id;
 | 
			
		||||
 | 
			
		||||
	if (test_bit(I915_WEDGED, &error->flags))
 | 
			
		||||
	if (test_bit(I915_WEDGED, >->reset.flags))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) {
 | 
			
		||||
	if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) {
 | 
			
		||||
		struct drm_printer p = drm_debug_printer(__func__);
 | 
			
		||||
 | 
			
		||||
		for_each_engine(engine, i915, id)
 | 
			
		||||
		for_each_engine(engine, gt->i915, id)
 | 
			
		||||
			intel_engine_dump(engine, &p, "%s\n", engine->name);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -815,17 +809,17 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
 | 
			
		|||
	 * rolling the global seqno forward (since this would complete requests
 | 
			
		||||
	 * for which we haven't set the fence error to EIO yet).
 | 
			
		||||
	 */
 | 
			
		||||
	awake = reset_prepare(i915);
 | 
			
		||||
	awake = reset_prepare(gt);
 | 
			
		||||
 | 
			
		||||
	/* Even if the GPU reset fails, it should still stop the engines */
 | 
			
		||||
	if (!INTEL_INFO(i915)->gpu_reset_clobbers_display)
 | 
			
		||||
		intel_gpu_reset(i915, ALL_ENGINES);
 | 
			
		||||
	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
 | 
			
		||||
		__intel_gt_reset(gt, ALL_ENGINES);
 | 
			
		||||
 | 
			
		||||
	for_each_engine(engine, i915, id) {
 | 
			
		||||
	for_each_engine(engine, gt->i915, id) {
 | 
			
		||||
		engine->submit_request = nop_submit_request;
 | 
			
		||||
		engine->schedule = NULL;
 | 
			
		||||
	}
 | 
			
		||||
	i915->caps.scheduler = 0;
 | 
			
		||||
	gt->i915->caps.scheduler = 0;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Make sure no request can slip through without getting completed by
 | 
			
		||||
| 
						 | 
				
			
			@ -833,37 +827,36 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
 | 
			
		|||
	 * in nop_submit_request.
 | 
			
		||||
	 */
 | 
			
		||||
	synchronize_rcu_expedited();
 | 
			
		||||
	set_bit(I915_WEDGED, &error->flags);
 | 
			
		||||
	set_bit(I915_WEDGED, >->reset.flags);
 | 
			
		||||
 | 
			
		||||
	/* Mark all executing requests as skipped */
 | 
			
		||||
	for_each_engine(engine, i915, id)
 | 
			
		||||
	for_each_engine(engine, gt->i915, id)
 | 
			
		||||
		engine->cancel_requests(engine);
 | 
			
		||||
 | 
			
		||||
	reset_finish(i915, awake);
 | 
			
		||||
	reset_finish(gt, awake);
 | 
			
		||||
 | 
			
		||||
	GEM_TRACE("end\n");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void i915_gem_set_wedged(struct drm_i915_private *i915)
 | 
			
		||||
void intel_gt_set_wedged(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	struct i915_gpu_error *error = &i915->gpu_error;
 | 
			
		||||
	intel_wakeref_t wakeref;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&error->wedge_mutex);
 | 
			
		||||
	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 | 
			
		||||
		__i915_gem_set_wedged(i915);
 | 
			
		||||
	mutex_unlock(&error->wedge_mutex);
 | 
			
		||||
	mutex_lock(>->reset.mutex);
 | 
			
		||||
	with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
 | 
			
		||||
		__intel_gt_set_wedged(gt);
 | 
			
		||||
	mutex_unlock(>->reset.mutex);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
 | 
			
		||||
static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	struct i915_gpu_error *error = &i915->gpu_error;
 | 
			
		||||
	struct intel_gt_timelines *timelines = >->timelines;
 | 
			
		||||
	struct intel_timeline *tl;
 | 
			
		||||
 | 
			
		||||
	if (!test_bit(I915_WEDGED, &error->flags))
 | 
			
		||||
	if (!test_bit(I915_WEDGED, >->reset.flags))
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
 | 
			
		||||
	if (!gt->scratch) /* Never full initialised, recovery impossible */
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	GEM_TRACE("start\n");
 | 
			
		||||
| 
						 | 
				
			
			@ -878,8 +871,8 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
 | 
			
		|||
	 *
 | 
			
		||||
	 * No more can be submitted until we reset the wedged bit.
 | 
			
		||||
	 */
 | 
			
		||||
	mutex_lock(&i915->gt.timelines.mutex);
 | 
			
		||||
	list_for_each_entry(tl, &i915->gt.timelines.active_list, link) {
 | 
			
		||||
	mutex_lock(&timelines->mutex);
 | 
			
		||||
	list_for_each_entry(tl, &timelines->active_list, link) {
 | 
			
		||||
		struct i915_request *rq;
 | 
			
		||||
 | 
			
		||||
		rq = i915_active_request_get_unlocked(&tl->last_request);
 | 
			
		||||
| 
						 | 
				
			
			@ -896,9 +889,9 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
 | 
			
		|||
		dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
 | 
			
		||||
		i915_request_put(rq);
 | 
			
		||||
	}
 | 
			
		||||
	mutex_unlock(&i915->gt.timelines.mutex);
 | 
			
		||||
	mutex_unlock(&timelines->mutex);
 | 
			
		||||
 | 
			
		||||
	intel_gt_sanitize(&i915->gt, false);
 | 
			
		||||
	intel_gt_sanitize(gt, false);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Undo nop_submit_request. We prevent all new i915 requests from
 | 
			
		||||
| 
						 | 
				
			
			@ -909,53 +902,51 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
 | 
			
		|||
	 * the nop_submit_request on reset, we can do this from normal
 | 
			
		||||
	 * context and do not require stop_machine().
 | 
			
		||||
	 */
 | 
			
		||||
	intel_engines_reset_default_submission(i915);
 | 
			
		||||
	intel_engines_reset_default_submission(gt);
 | 
			
		||||
 | 
			
		||||
	GEM_TRACE("end\n");
 | 
			
		||||
 | 
			
		||||
	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
 | 
			
		||||
	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
 | 
			
		||||
	clear_bit(I915_WEDGED, >->reset.flags);
 | 
			
		||||
 | 
			
		||||
	return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 | 
			
		||||
bool intel_gt_unset_wedged(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	struct i915_gpu_error *error = &i915->gpu_error;
 | 
			
		||||
	bool result;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&error->wedge_mutex);
 | 
			
		||||
	result = __i915_gem_unset_wedged(i915);
 | 
			
		||||
	mutex_unlock(&error->wedge_mutex);
 | 
			
		||||
	mutex_lock(>->reset.mutex);
 | 
			
		||||
	result = __intel_gt_unset_wedged(gt);
 | 
			
		||||
	mutex_unlock(>->reset.mutex);
 | 
			
		||||
 | 
			
		||||
	return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int do_reset(struct drm_i915_private *i915,
 | 
			
		||||
		    intel_engine_mask_t stalled_mask)
 | 
			
		||||
static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
 | 
			
		||||
{
 | 
			
		||||
	int err, i;
 | 
			
		||||
 | 
			
		||||
	gt_revoke(i915);
 | 
			
		||||
	gt_revoke(gt);
 | 
			
		||||
 | 
			
		||||
	err = intel_gpu_reset(i915, ALL_ENGINES);
 | 
			
		||||
	err = __intel_gt_reset(gt, ALL_ENGINES);
 | 
			
		||||
	for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
 | 
			
		||||
		msleep(10 * (i + 1));
 | 
			
		||||
		err = intel_gpu_reset(i915, ALL_ENGINES);
 | 
			
		||||
		err = __intel_gt_reset(gt, ALL_ENGINES);
 | 
			
		||||
	}
 | 
			
		||||
	if (err)
 | 
			
		||||
		return err;
 | 
			
		||||
 | 
			
		||||
	return gt_reset(i915, stalled_mask);
 | 
			
		||||
	return gt_reset(gt, stalled_mask);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int resume(struct drm_i915_private *i915)
 | 
			
		||||
static int resume(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	enum intel_engine_id id;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	for_each_engine(engine, i915, id) {
 | 
			
		||||
	for_each_engine(engine, gt->i915, id) {
 | 
			
		||||
		ret = engine->resume(engine);
 | 
			
		||||
		if (ret)
 | 
			
		||||
			return ret;
 | 
			
		||||
| 
						 | 
				
			
			@ -965,8 +956,8 @@ static int resume(struct drm_i915_private *i915)
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * i915_reset - reset chip after a hang
 | 
			
		||||
 * @i915: #drm_i915_private to reset
 | 
			
		||||
 * intel_gt_reset - reset chip after a hang
 | 
			
		||||
 * @gt: #intel_gt to reset
 | 
			
		||||
 * @stalled_mask: mask of the stalled engines with the guilty requests
 | 
			
		||||
 * @reason: user error message for why we are resetting
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -981,50 +972,50 @@ static int resume(struct drm_i915_private *i915)
 | 
			
		|||
 *   - re-init interrupt state
 | 
			
		||||
 *   - re-init display
 | 
			
		||||
 */
 | 
			
		||||
void i915_reset(struct drm_i915_private *i915,
 | 
			
		||||
		intel_engine_mask_t stalled_mask,
 | 
			
		||||
		const char *reason)
 | 
			
		||||
void intel_gt_reset(struct intel_gt *gt,
 | 
			
		||||
		    intel_engine_mask_t stalled_mask,
 | 
			
		||||
		    const char *reason)
 | 
			
		||||
{
 | 
			
		||||
	struct i915_gpu_error *error = &i915->gpu_error;
 | 
			
		||||
	intel_engine_mask_t awake;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	GEM_TRACE("flags=%lx\n", error->flags);
 | 
			
		||||
	GEM_TRACE("flags=%lx\n", gt->reset.flags);
 | 
			
		||||
 | 
			
		||||
	might_sleep();
 | 
			
		||||
	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
 | 
			
		||||
	mutex_lock(&error->wedge_mutex);
 | 
			
		||||
	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, >->reset.flags));
 | 
			
		||||
	mutex_lock(>->reset.mutex);
 | 
			
		||||
 | 
			
		||||
	/* Clear any previous failed attempts at recovery. Time to try again. */
 | 
			
		||||
	if (!__i915_gem_unset_wedged(i915))
 | 
			
		||||
	if (!__intel_gt_unset_wedged(gt))
 | 
			
		||||
		goto unlock;
 | 
			
		||||
 | 
			
		||||
	if (reason)
 | 
			
		||||
		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
 | 
			
		||||
	error->reset_count++;
 | 
			
		||||
		dev_notice(gt->i915->drm.dev,
 | 
			
		||||
			   "Resetting chip for %s\n", reason);
 | 
			
		||||
	atomic_inc(>->i915->gpu_error.reset_count);
 | 
			
		||||
 | 
			
		||||
	awake = reset_prepare(i915);
 | 
			
		||||
	awake = reset_prepare(gt);
 | 
			
		||||
 | 
			
		||||
	if (!intel_has_gpu_reset(i915)) {
 | 
			
		||||
	if (!intel_has_gpu_reset(gt->i915)) {
 | 
			
		||||
		if (i915_modparams.reset)
 | 
			
		||||
			dev_err(i915->drm.dev, "GPU reset not supported\n");
 | 
			
		||||
			dev_err(gt->i915->drm.dev, "GPU reset not supported\n");
 | 
			
		||||
		else
 | 
			
		||||
			DRM_DEBUG_DRIVER("GPU reset disabled\n");
 | 
			
		||||
		goto error;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
 | 
			
		||||
		intel_runtime_pm_disable_interrupts(i915);
 | 
			
		||||
	if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
 | 
			
		||||
		intel_runtime_pm_disable_interrupts(gt->i915);
 | 
			
		||||
 | 
			
		||||
	if (do_reset(i915, stalled_mask)) {
 | 
			
		||||
		dev_err(i915->drm.dev, "Failed to reset chip\n");
 | 
			
		||||
	if (do_reset(gt, stalled_mask)) {
 | 
			
		||||
		dev_err(gt->i915->drm.dev, "Failed to reset chip\n");
 | 
			
		||||
		goto taint;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
 | 
			
		||||
		intel_runtime_pm_enable_interrupts(i915);
 | 
			
		||||
	if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
 | 
			
		||||
		intel_runtime_pm_enable_interrupts(gt->i915);
 | 
			
		||||
 | 
			
		||||
	intel_overlay_reset(i915);
 | 
			
		||||
	intel_overlay_reset(gt->i915);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Next we need to restore the context, but we don't use those
 | 
			
		||||
| 
						 | 
				
			
			@ -1034,23 +1025,23 @@ void i915_reset(struct drm_i915_private *i915,
 | 
			
		|||
	 * was running at the time of the reset (i.e. we weren't VT
 | 
			
		||||
	 * switched away).
 | 
			
		||||
	 */
 | 
			
		||||
	ret = i915_gem_init_hw(i915);
 | 
			
		||||
	ret = i915_gem_init_hw(gt->i915);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
		DRM_ERROR("Failed to initialise HW following reset (%d)\n",
 | 
			
		||||
			  ret);
 | 
			
		||||
		goto taint;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ret = resume(i915);
 | 
			
		||||
	ret = resume(gt);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		goto taint;
 | 
			
		||||
 | 
			
		||||
	i915_queue_hangcheck(i915);
 | 
			
		||||
	intel_gt_queue_hangcheck(gt);
 | 
			
		||||
 | 
			
		||||
finish:
 | 
			
		||||
	reset_finish(i915, awake);
 | 
			
		||||
	reset_finish(gt, awake);
 | 
			
		||||
unlock:
 | 
			
		||||
	mutex_unlock(&error->wedge_mutex);
 | 
			
		||||
	mutex_unlock(>->reset.mutex);
 | 
			
		||||
	return;
 | 
			
		||||
 | 
			
		||||
taint:
 | 
			
		||||
| 
						 | 
				
			
			@ -1068,18 +1059,17 @@ void i915_reset(struct drm_i915_private *i915,
 | 
			
		|||
	 */
 | 
			
		||||
	add_taint_for_CI(TAINT_WARN);
 | 
			
		||||
error:
 | 
			
		||||
	__i915_gem_set_wedged(i915);
 | 
			
		||||
	__intel_gt_set_wedged(gt);
 | 
			
		||||
	goto finish;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
 | 
			
		||||
					struct intel_engine_cs *engine)
 | 
			
		||||
static inline int intel_gt_reset_engine(struct intel_engine_cs *engine)
 | 
			
		||||
{
 | 
			
		||||
	return intel_gpu_reset(i915, engine->mask);
 | 
			
		||||
	return __intel_gt_reset(engine->gt, engine->mask);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * i915_reset_engine - reset GPU engine to recover from a hang
 | 
			
		||||
 * intel_engine_reset - reset GPU engine to recover from a hang
 | 
			
		||||
 * @engine: engine to reset
 | 
			
		||||
 * @msg: reason for GPU reset; or NULL for no dev_notice()
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -1091,13 +1081,13 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
 | 
			
		|||
 *  - reset engine (which will force the engine to idle)
 | 
			
		||||
 *  - re-init/configure engine
 | 
			
		||||
 */
 | 
			
		||||
int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 | 
			
		||||
int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
 | 
			
		||||
{
 | 
			
		||||
	struct i915_gpu_error *error = &engine->i915->gpu_error;
 | 
			
		||||
	struct intel_gt *gt = engine->gt;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
 | 
			
		||||
	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
 | 
			
		||||
	GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags);
 | 
			
		||||
	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, >->reset.flags));
 | 
			
		||||
 | 
			
		||||
	if (!intel_engine_pm_get_if_awake(engine))
 | 
			
		||||
		return 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -1107,10 +1097,10 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 | 
			
		|||
	if (msg)
 | 
			
		||||
		dev_notice(engine->i915->drm.dev,
 | 
			
		||||
			   "Resetting %s for %s\n", engine->name, msg);
 | 
			
		||||
	error->reset_engine_count[engine->id]++;
 | 
			
		||||
	atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
 | 
			
		||||
 | 
			
		||||
	if (!engine->i915->guc.execbuf_client)
 | 
			
		||||
		ret = intel_gt_reset_engine(engine->i915, engine);
 | 
			
		||||
		ret = intel_gt_reset_engine(engine);
 | 
			
		||||
	else
 | 
			
		||||
		ret = intel_guc_reset_engine(&engine->i915->guc, engine);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1126,7 +1116,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 | 
			
		|||
	 * active request and can drop it, adjust head to skip the offending
 | 
			
		||||
	 * request to resume executing remaining requests in the queue.
 | 
			
		||||
	 */
 | 
			
		||||
	intel_engine_reset(engine, true);
 | 
			
		||||
	__intel_engine_reset(engine, true);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * The engine and its registers (and workarounds in case of render)
 | 
			
		||||
| 
						 | 
				
			
			@ -1142,16 +1132,15 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 | 
			
		|||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void i915_reset_device(struct drm_i915_private *i915,
 | 
			
		||||
			      u32 engine_mask,
 | 
			
		||||
			      const char *reason)
 | 
			
		||||
static void intel_gt_reset_global(struct intel_gt *gt,
 | 
			
		||||
				  u32 engine_mask,
 | 
			
		||||
				  const char *reason)
 | 
			
		||||
{
 | 
			
		||||
	struct i915_gpu_error *error = &i915->gpu_error;
 | 
			
		||||
	struct kobject *kobj = &i915->drm.primary->kdev->kobj;
 | 
			
		||||
	struct kobject *kobj = >->i915->drm.primary->kdev->kobj;
 | 
			
		||||
	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
 | 
			
		||||
	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
 | 
			
		||||
	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
 | 
			
		||||
	struct i915_wedge_me w;
 | 
			
		||||
	struct intel_wedge_me w;
 | 
			
		||||
 | 
			
		||||
	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1159,24 +1148,24 @@ static void i915_reset_device(struct drm_i915_private *i915,
 | 
			
		|||
	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
 | 
			
		||||
 | 
			
		||||
	/* Use a watchdog to ensure that our reset completes */
 | 
			
		||||
	i915_wedge_on_timeout(&w, i915, 5 * HZ) {
 | 
			
		||||
		intel_prepare_reset(i915);
 | 
			
		||||
	intel_wedge_on_timeout(&w, gt, 5 * HZ) {
 | 
			
		||||
		intel_prepare_reset(gt->i915);
 | 
			
		||||
 | 
			
		||||
		/* Flush everyone using a resource about to be clobbered */
 | 
			
		||||
		synchronize_srcu_expedited(&error->reset_backoff_srcu);
 | 
			
		||||
		synchronize_srcu_expedited(>->reset.backoff_srcu);
 | 
			
		||||
 | 
			
		||||
		i915_reset(i915, engine_mask, reason);
 | 
			
		||||
		intel_gt_reset(gt, engine_mask, reason);
 | 
			
		||||
 | 
			
		||||
		intel_finish_reset(i915);
 | 
			
		||||
		intel_finish_reset(gt->i915);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!test_bit(I915_WEDGED, &error->flags))
 | 
			
		||||
	if (!test_bit(I915_WEDGED, >->reset.flags))
 | 
			
		||||
		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * i915_handle_error - handle a gpu error
 | 
			
		||||
 * @i915: i915 device private
 | 
			
		||||
 * intel_gt_handle_error - handle a gpu error
 | 
			
		||||
 * @gt: the intel_gt
 | 
			
		||||
 * @engine_mask: mask representing engines that are hung
 | 
			
		||||
 * @flags: control flags
 | 
			
		||||
 * @fmt: Error message format string
 | 
			
		||||
| 
						 | 
				
			
			@ -1187,12 +1176,11 @@ static void i915_reset_device(struct drm_i915_private *i915,
 | 
			
		|||
 * so userspace knows something bad happened (should trigger collection
 | 
			
		||||
 * of a ring dump etc.).
 | 
			
		||||
 */
 | 
			
		||||
void i915_handle_error(struct drm_i915_private *i915,
 | 
			
		||||
		       intel_engine_mask_t engine_mask,
 | 
			
		||||
		       unsigned long flags,
 | 
			
		||||
		       const char *fmt, ...)
 | 
			
		||||
void intel_gt_handle_error(struct intel_gt *gt,
 | 
			
		||||
			   intel_engine_mask_t engine_mask,
 | 
			
		||||
			   unsigned long flags,
 | 
			
		||||
			   const char *fmt, ...)
 | 
			
		||||
{
 | 
			
		||||
	struct i915_gpu_error *error = &i915->gpu_error;
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	intel_wakeref_t wakeref;
 | 
			
		||||
	intel_engine_mask_t tmp;
 | 
			
		||||
| 
						 | 
				
			
			@ -1216,33 +1204,31 @@ void i915_handle_error(struct drm_i915_private *i915,
 | 
			
		|||
	 * isn't the case at least when we get here by doing a
 | 
			
		||||
	 * simulated reset via debugfs, so get an RPM reference.
 | 
			
		||||
	 */
 | 
			
		||||
	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 | 
			
		||||
	wakeref = intel_runtime_pm_get(>->i915->runtime_pm);
 | 
			
		||||
 | 
			
		||||
	engine_mask &= INTEL_INFO(i915)->engine_mask;
 | 
			
		||||
	engine_mask &= INTEL_INFO(gt->i915)->engine_mask;
 | 
			
		||||
 | 
			
		||||
	if (flags & I915_ERROR_CAPTURE) {
 | 
			
		||||
		i915_capture_error_state(i915, engine_mask, msg);
 | 
			
		||||
		intel_gt_clear_error_registers(&i915->gt, engine_mask);
 | 
			
		||||
		i915_capture_error_state(gt->i915, engine_mask, msg);
 | 
			
		||||
		intel_gt_clear_error_registers(gt, engine_mask);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Try engine reset when available. We fall back to full reset if
 | 
			
		||||
	 * single reset fails.
 | 
			
		||||
	 */
 | 
			
		||||
	if (intel_has_reset_engine(i915) && !__i915_wedged(error)) {
 | 
			
		||||
		for_each_engine_masked(engine, i915, engine_mask, tmp) {
 | 
			
		||||
	if (intel_has_reset_engine(gt->i915) && !intel_gt_is_wedged(gt)) {
 | 
			
		||||
		for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
 | 
			
		||||
			BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
 | 
			
		||||
			if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
 | 
			
		||||
					     &error->flags))
 | 
			
		||||
					     >->reset.flags))
 | 
			
		||||
				continue;
 | 
			
		||||
 | 
			
		||||
			if (i915_reset_engine(engine, msg) == 0)
 | 
			
		||||
			if (intel_engine_reset(engine, msg) == 0)
 | 
			
		||||
				engine_mask &= ~engine->mask;
 | 
			
		||||
 | 
			
		||||
			clear_bit(I915_RESET_ENGINE + engine->id,
 | 
			
		||||
				  &error->flags);
 | 
			
		||||
			wake_up_bit(&error->flags,
 | 
			
		||||
				    I915_RESET_ENGINE + engine->id);
 | 
			
		||||
			clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
 | 
			
		||||
					      >->reset.flags);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1250,9 +1236,9 @@ void i915_handle_error(struct drm_i915_private *i915,
 | 
			
		|||
		goto out;
 | 
			
		||||
 | 
			
		||||
	/* Full reset needs the mutex, stop any other user trying to do so. */
 | 
			
		||||
	if (test_and_set_bit(I915_RESET_BACKOFF, &error->flags)) {
 | 
			
		||||
		wait_event(error->reset_queue,
 | 
			
		||||
			   !test_bit(I915_RESET_BACKOFF, &error->flags));
 | 
			
		||||
	if (test_and_set_bit(I915_RESET_BACKOFF, >->reset.flags)) {
 | 
			
		||||
		wait_event(gt->reset.queue,
 | 
			
		||||
			   !test_bit(I915_RESET_BACKOFF, >->reset.flags));
 | 
			
		||||
		goto out; /* piggy-back on the other reset */
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1260,113 +1246,119 @@ void i915_handle_error(struct drm_i915_private *i915,
 | 
			
		|||
	synchronize_rcu_expedited();
 | 
			
		||||
 | 
			
		||||
	/* Prevent any other reset-engine attempt. */
 | 
			
		||||
	for_each_engine(engine, i915, tmp) {
 | 
			
		||||
	for_each_engine(engine, gt->i915, tmp) {
 | 
			
		||||
		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
 | 
			
		||||
					&error->flags))
 | 
			
		||||
			wait_on_bit(&error->flags,
 | 
			
		||||
					>->reset.flags))
 | 
			
		||||
			wait_on_bit(>->reset.flags,
 | 
			
		||||
				    I915_RESET_ENGINE + engine->id,
 | 
			
		||||
				    TASK_UNINTERRUPTIBLE);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	i915_reset_device(i915, engine_mask, msg);
 | 
			
		||||
	intel_gt_reset_global(gt, engine_mask, msg);
 | 
			
		||||
 | 
			
		||||
	for_each_engine(engine, i915, tmp) {
 | 
			
		||||
		clear_bit(I915_RESET_ENGINE + engine->id,
 | 
			
		||||
			  &error->flags);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	clear_bit(I915_RESET_BACKOFF, &error->flags);
 | 
			
		||||
	wake_up_all(&error->reset_queue);
 | 
			
		||||
	for_each_engine(engine, gt->i915, tmp)
 | 
			
		||||
		clear_bit_unlock(I915_RESET_ENGINE + engine->id,
 | 
			
		||||
				 >->reset.flags);
 | 
			
		||||
	clear_bit_unlock(I915_RESET_BACKOFF, >->reset.flags);
 | 
			
		||||
	smp_mb__after_atomic();
 | 
			
		||||
	wake_up_all(>->reset.queue);
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 | 
			
		||||
	intel_runtime_pm_put(>->i915->runtime_pm, wakeref);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int i915_reset_trylock(struct drm_i915_private *i915)
 | 
			
		||||
int intel_gt_reset_trylock(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	struct i915_gpu_error *error = &i915->gpu_error;
 | 
			
		||||
	int srcu;
 | 
			
		||||
 | 
			
		||||
	might_lock(&error->reset_backoff_srcu);
 | 
			
		||||
	might_lock(>->reset.backoff_srcu);
 | 
			
		||||
	might_sleep();
 | 
			
		||||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	while (test_bit(I915_RESET_BACKOFF, &error->flags)) {
 | 
			
		||||
	while (test_bit(I915_RESET_BACKOFF, >->reset.flags)) {
 | 
			
		||||
		rcu_read_unlock();
 | 
			
		||||
 | 
			
		||||
		if (wait_event_interruptible(error->reset_queue,
 | 
			
		||||
		if (wait_event_interruptible(gt->reset.queue,
 | 
			
		||||
					     !test_bit(I915_RESET_BACKOFF,
 | 
			
		||||
						       &error->flags)))
 | 
			
		||||
						       >->reset.flags)))
 | 
			
		||||
			return -EINTR;
 | 
			
		||||
 | 
			
		||||
		rcu_read_lock();
 | 
			
		||||
	}
 | 
			
		||||
	srcu = srcu_read_lock(&error->reset_backoff_srcu);
 | 
			
		||||
	srcu = srcu_read_lock(>->reset.backoff_srcu);
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
 | 
			
		||||
	return srcu;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void i915_reset_unlock(struct drm_i915_private *i915, int tag)
 | 
			
		||||
__releases(&i915->gpu_error.reset_backoff_srcu)
 | 
			
		||||
void intel_gt_reset_unlock(struct intel_gt *gt, int tag)
 | 
			
		||||
__releases(>->reset.backoff_srcu)
 | 
			
		||||
{
 | 
			
		||||
	struct i915_gpu_error *error = &i915->gpu_error;
 | 
			
		||||
 | 
			
		||||
	srcu_read_unlock(&error->reset_backoff_srcu, tag);
 | 
			
		||||
	srcu_read_unlock(>->reset.backoff_srcu, tag);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int i915_terminally_wedged(struct drm_i915_private *i915)
 | 
			
		||||
int intel_gt_terminally_wedged(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	struct i915_gpu_error *error = &i915->gpu_error;
 | 
			
		||||
 | 
			
		||||
	might_sleep();
 | 
			
		||||
 | 
			
		||||
	if (!__i915_wedged(error))
 | 
			
		||||
	if (!intel_gt_is_wedged(gt))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	/* Reset still in progress? Maybe we will recover? */
 | 
			
		||||
	if (!test_bit(I915_RESET_BACKOFF, &error->flags))
 | 
			
		||||
	if (!test_bit(I915_RESET_BACKOFF, >->reset.flags))
 | 
			
		||||
		return -EIO;
 | 
			
		||||
 | 
			
		||||
	/* XXX intel_reset_finish() still takes struct_mutex!!! */
 | 
			
		||||
	if (mutex_is_locked(&i915->drm.struct_mutex))
 | 
			
		||||
	if (mutex_is_locked(>->i915->drm.struct_mutex))
 | 
			
		||||
		return -EAGAIN;
 | 
			
		||||
 | 
			
		||||
	if (wait_event_interruptible(error->reset_queue,
 | 
			
		||||
	if (wait_event_interruptible(gt->reset.queue,
 | 
			
		||||
				     !test_bit(I915_RESET_BACKOFF,
 | 
			
		||||
					       &error->flags)))
 | 
			
		||||
					       >->reset.flags)))
 | 
			
		||||
		return -EINTR;
 | 
			
		||||
 | 
			
		||||
	return __i915_wedged(error) ? -EIO : 0;
 | 
			
		||||
	return intel_gt_is_wedged(gt) ? -EIO : 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void i915_wedge_me(struct work_struct *work)
 | 
			
		||||
void intel_gt_init_reset(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
 | 
			
		||||
	init_waitqueue_head(>->reset.queue);
 | 
			
		||||
	mutex_init(>->reset.mutex);
 | 
			
		||||
	init_srcu_struct(>->reset.backoff_srcu);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
	dev_err(w->i915->drm.dev,
 | 
			
		||||
void intel_gt_fini_reset(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	cleanup_srcu_struct(>->reset.backoff_srcu);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void intel_wedge_me(struct work_struct *work)
 | 
			
		||||
{
 | 
			
		||||
	struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
 | 
			
		||||
 | 
			
		||||
	dev_err(w->gt->i915->drm.dev,
 | 
			
		||||
		"%s timed out, cancelling all in-flight rendering.\n",
 | 
			
		||||
		w->name);
 | 
			
		||||
	i915_gem_set_wedged(w->i915);
 | 
			
		||||
	intel_gt_set_wedged(w->gt);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void __i915_init_wedge(struct i915_wedge_me *w,
 | 
			
		||||
		       struct drm_i915_private *i915,
 | 
			
		||||
		       long timeout,
 | 
			
		||||
		       const char *name)
 | 
			
		||||
void __intel_init_wedge(struct intel_wedge_me *w,
 | 
			
		||||
			struct intel_gt *gt,
 | 
			
		||||
			long timeout,
 | 
			
		||||
			const char *name)
 | 
			
		||||
{
 | 
			
		||||
	w->i915 = i915;
 | 
			
		||||
	w->gt = gt;
 | 
			
		||||
	w->name = name;
 | 
			
		||||
 | 
			
		||||
	INIT_DELAYED_WORK_ONSTACK(&w->work, i915_wedge_me);
 | 
			
		||||
	INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me);
 | 
			
		||||
	schedule_delayed_work(&w->work, timeout);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void __i915_fini_wedge(struct i915_wedge_me *w)
 | 
			
		||||
void __intel_fini_wedge(struct intel_wedge_me *w)
 | 
			
		||||
{
 | 
			
		||||
	cancel_delayed_work_sync(&w->work);
 | 
			
		||||
	destroy_delayed_work_on_stack(&w->work);
 | 
			
		||||
	w->i915 = NULL;
 | 
			
		||||
	w->gt = NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -11,56 +11,67 @@
 | 
			
		|||
#include <linux/types.h>
 | 
			
		||||
#include <linux/srcu.h>
 | 
			
		||||
 | 
			
		||||
#include "gt/intel_engine_types.h"
 | 
			
		||||
#include "intel_engine_types.h"
 | 
			
		||||
#include "intel_reset_types.h"
 | 
			
		||||
 | 
			
		||||
struct drm_i915_private;
 | 
			
		||||
struct i915_request;
 | 
			
		||||
struct intel_engine_cs;
 | 
			
		||||
struct intel_gt;
 | 
			
		||||
struct intel_guc;
 | 
			
		||||
 | 
			
		||||
void intel_gt_init_reset(struct intel_gt *gt);
 | 
			
		||||
void intel_gt_fini_reset(struct intel_gt *gt);
 | 
			
		||||
 | 
			
		||||
__printf(4, 5)
 | 
			
		||||
void i915_handle_error(struct drm_i915_private *i915,
 | 
			
		||||
		       intel_engine_mask_t engine_mask,
 | 
			
		||||
		       unsigned long flags,
 | 
			
		||||
		       const char *fmt, ...);
 | 
			
		||||
void intel_gt_handle_error(struct intel_gt *gt,
 | 
			
		||||
			   intel_engine_mask_t engine_mask,
 | 
			
		||||
			   unsigned long flags,
 | 
			
		||||
			   const char *fmt, ...);
 | 
			
		||||
#define I915_ERROR_CAPTURE BIT(0)
 | 
			
		||||
 | 
			
		||||
void i915_reset(struct drm_i915_private *i915,
 | 
			
		||||
		intel_engine_mask_t stalled_mask,
 | 
			
		||||
		const char *reason);
 | 
			
		||||
int i915_reset_engine(struct intel_engine_cs *engine,
 | 
			
		||||
		      const char *reason);
 | 
			
		||||
void intel_gt_reset(struct intel_gt *gt,
 | 
			
		||||
		    intel_engine_mask_t stalled_mask,
 | 
			
		||||
		    const char *reason);
 | 
			
		||||
int intel_engine_reset(struct intel_engine_cs *engine,
 | 
			
		||||
		       const char *reason);
 | 
			
		||||
 | 
			
		||||
void i915_reset_request(struct i915_request *rq, bool guilty);
 | 
			
		||||
void __i915_request_reset(struct i915_request *rq, bool guilty);
 | 
			
		||||
 | 
			
		||||
int __must_check i915_reset_trylock(struct drm_i915_private *i915);
 | 
			
		||||
void i915_reset_unlock(struct drm_i915_private *i915, int tag);
 | 
			
		||||
int __must_check intel_gt_reset_trylock(struct intel_gt *gt);
 | 
			
		||||
void intel_gt_reset_unlock(struct intel_gt *gt, int tag);
 | 
			
		||||
 | 
			
		||||
int i915_terminally_wedged(struct drm_i915_private *i915);
 | 
			
		||||
void intel_gt_set_wedged(struct intel_gt *gt);
 | 
			
		||||
bool intel_gt_unset_wedged(struct intel_gt *gt);
 | 
			
		||||
int intel_gt_terminally_wedged(struct intel_gt *gt);
 | 
			
		||||
 | 
			
		||||
int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask);
 | 
			
		||||
 | 
			
		||||
int intel_reset_guc(struct intel_gt *gt);
 | 
			
		||||
 | 
			
		||||
struct intel_wedge_me {
 | 
			
		||||
	struct delayed_work work;
 | 
			
		||||
	struct intel_gt *gt;
 | 
			
		||||
	const char *name;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void __intel_init_wedge(struct intel_wedge_me *w,
 | 
			
		||||
			struct intel_gt *gt,
 | 
			
		||||
			long timeout,
 | 
			
		||||
			const char *name);
 | 
			
		||||
void __intel_fini_wedge(struct intel_wedge_me *w);
 | 
			
		||||
 | 
			
		||||
#define intel_wedge_on_timeout(W, GT, TIMEOUT)				\
 | 
			
		||||
	for (__intel_init_wedge((W), (GT), (TIMEOUT), __func__);	\
 | 
			
		||||
	     (W)->gt;							\
 | 
			
		||||
	     __intel_fini_wedge((W)))
 | 
			
		||||
 | 
			
		||||
static inline bool __intel_reset_failed(const struct intel_reset *reset)
 | 
			
		||||
{
 | 
			
		||||
	return unlikely(test_bit(I915_WEDGED, &reset->flags));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool intel_has_gpu_reset(struct drm_i915_private *i915);
 | 
			
		||||
bool intel_has_reset_engine(struct drm_i915_private *i915);
 | 
			
		||||
 | 
			
		||||
int intel_gpu_reset(struct drm_i915_private *i915,
 | 
			
		||||
		    intel_engine_mask_t engine_mask);
 | 
			
		||||
 | 
			
		||||
int intel_reset_guc(struct drm_i915_private *i915);
 | 
			
		||||
 | 
			
		||||
struct i915_wedge_me {
 | 
			
		||||
	struct delayed_work work;
 | 
			
		||||
	struct drm_i915_private *i915;
 | 
			
		||||
	const char *name;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void __i915_init_wedge(struct i915_wedge_me *w,
 | 
			
		||||
		       struct drm_i915_private *i915,
 | 
			
		||||
		       long timeout,
 | 
			
		||||
		       const char *name);
 | 
			
		||||
void __i915_fini_wedge(struct i915_wedge_me *w);
 | 
			
		||||
 | 
			
		||||
#define i915_wedge_on_timeout(W, DEV, TIMEOUT)				\
 | 
			
		||||
	for (__i915_init_wedge((W), (DEV), (TIMEOUT), __func__);	\
 | 
			
		||||
	     (W)->i915;							\
 | 
			
		||||
	     __i915_fini_wedge((W)))
 | 
			
		||||
 | 
			
		||||
#endif /* I915_RESET_H */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										50
									
								
								drivers/gpu/drm/i915/gt/intel_reset_types.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								drivers/gpu/drm/i915/gt/intel_reset_types.h
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,50 @@
 | 
			
		|||
/* SPDX-License-Identifier: MIT */
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright © 2019 Intel Corporation
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef __INTEL_RESET_TYPES_H_
 | 
			
		||||
#define __INTEL_RESET_TYPES_H_
 | 
			
		||||
 | 
			
		||||
#include <linux/mutex.h>
 | 
			
		||||
#include <linux/wait.h>
 | 
			
		||||
#include <linux/srcu.h>
 | 
			
		||||
 | 
			
		||||
struct intel_reset {
 | 
			
		||||
	/**
 | 
			
		||||
	 * flags: Control various stages of the GPU reset
 | 
			
		||||
	 *
 | 
			
		||||
	 * #I915_RESET_BACKOFF - When we start a global reset, we need to
 | 
			
		||||
	 * serialise with any other users attempting to do the same, and
 | 
			
		||||
	 * any global resources that may be clobber by the reset (such as
 | 
			
		||||
	 * FENCE registers).
 | 
			
		||||
	 *
 | 
			
		||||
	 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
 | 
			
		||||
	 * acquire the struct_mutex to reset an engine, we need an explicit
 | 
			
		||||
	 * flag to prevent two concurrent reset attempts in the same engine.
 | 
			
		||||
	 * As the number of engines continues to grow, allocate the flags from
 | 
			
		||||
	 * the most significant bits.
 | 
			
		||||
	 *
 | 
			
		||||
	 * #I915_WEDGED - If reset fails and we can no longer use the GPU,
 | 
			
		||||
	 * we set the #I915_WEDGED bit. Prior to command submission, e.g.
 | 
			
		||||
	 * i915_request_alloc(), this bit is checked and the sequence
 | 
			
		||||
	 * aborted (with -EIO reported to userspace) if set.
 | 
			
		||||
	 */
 | 
			
		||||
	unsigned long flags;
 | 
			
		||||
#define I915_RESET_BACKOFF	0
 | 
			
		||||
#define I915_RESET_MODESET	1
 | 
			
		||||
#define I915_RESET_ENGINE	2
 | 
			
		||||
#define I915_WEDGED		(BITS_PER_LONG - 1)
 | 
			
		||||
 | 
			
		||||
	struct mutex mutex; /* serialises wedging/unwedging */
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Waitqueue to signal when the reset has completed. Used by clients
 | 
			
		||||
	 * that wait for dev_priv->mm.wedged to settle.
 | 
			
		||||
	 */
 | 
			
		||||
	wait_queue_head_t queue;
 | 
			
		||||
 | 
			
		||||
	struct srcu_struct backoff_srcu;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif /* _INTEL_RESET_TYPES_H_ */
 | 
			
		||||
| 
						 | 
				
			
			@ -795,7 +795,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 | 
			
		|||
		 * If the request was innocent, we try to replay the request
 | 
			
		||||
		 * with the restored context.
 | 
			
		||||
		 */
 | 
			
		||||
		i915_reset_request(rq, stalled);
 | 
			
		||||
		__i915_request_reset(rq, stalled);
 | 
			
		||||
 | 
			
		||||
		GEM_BUG_ON(rq->ring != engine->buffer);
 | 
			
		||||
		head = rq->head;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							| 
						 | 
				
			
			@ -55,7 +55,7 @@ static int live_sanitycheck(void *arg)
 | 
			
		|||
		if (!igt_wait_for_spinner(&spin, rq)) {
 | 
			
		||||
			GEM_TRACE("spinner failed to start\n");
 | 
			
		||||
			GEM_TRACE_DUMP();
 | 
			
		||||
			i915_gem_set_wedged(i915);
 | 
			
		||||
			intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
			err = -EIO;
 | 
			
		||||
			goto err_ctx;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -211,7 +211,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
 | 
			
		|||
		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
 | 
			
		||||
		       count, n);
 | 
			
		||||
		GEM_TRACE_DUMP();
 | 
			
		||||
		i915_gem_set_wedged(outer->i915);
 | 
			
		||||
		intel_gt_set_wedged(outer->gt);
 | 
			
		||||
		err = -EIO;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -445,7 +445,7 @@ static int live_busywait_preempt(void *arg)
 | 
			
		|||
			intel_engine_dump(engine, &p, "%s\n", engine->name);
 | 
			
		||||
			GEM_TRACE_DUMP();
 | 
			
		||||
 | 
			
		||||
			i915_gem_set_wedged(i915);
 | 
			
		||||
			intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
			err = -EIO;
 | 
			
		||||
			goto err_vma;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -534,7 +534,7 @@ static int live_preempt(void *arg)
 | 
			
		|||
		if (!igt_wait_for_spinner(&spin_lo, rq)) {
 | 
			
		||||
			GEM_TRACE("lo spinner failed to start\n");
 | 
			
		||||
			GEM_TRACE_DUMP();
 | 
			
		||||
			i915_gem_set_wedged(i915);
 | 
			
		||||
			intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
			err = -EIO;
 | 
			
		||||
			goto err_ctx_lo;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -551,7 +551,7 @@ static int live_preempt(void *arg)
 | 
			
		|||
		if (!igt_wait_for_spinner(&spin_hi, rq)) {
 | 
			
		||||
			GEM_TRACE("hi spinner failed to start\n");
 | 
			
		||||
			GEM_TRACE_DUMP();
 | 
			
		||||
			i915_gem_set_wedged(i915);
 | 
			
		||||
			intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
			err = -EIO;
 | 
			
		||||
			goto err_ctx_lo;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -688,7 +688,7 @@ static int live_late_preempt(void *arg)
 | 
			
		|||
err_wedged:
 | 
			
		||||
	igt_spinner_end(&spin_hi);
 | 
			
		||||
	igt_spinner_end(&spin_lo);
 | 
			
		||||
	i915_gem_set_wedged(i915);
 | 
			
		||||
	intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
	err = -EIO;
 | 
			
		||||
	goto err_ctx_lo;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -824,7 +824,7 @@ static int live_nopreempt(void *arg)
 | 
			
		|||
err_wedged:
 | 
			
		||||
	igt_spinner_end(&b.spin);
 | 
			
		||||
	igt_spinner_end(&a.spin);
 | 
			
		||||
	i915_gem_set_wedged(i915);
 | 
			
		||||
	intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
	err = -EIO;
 | 
			
		||||
	goto err_client_b;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -934,7 +934,7 @@ static int live_suppress_self_preempt(void *arg)
 | 
			
		|||
err_wedged:
 | 
			
		||||
	igt_spinner_end(&b.spin);
 | 
			
		||||
	igt_spinner_end(&a.spin);
 | 
			
		||||
	i915_gem_set_wedged(i915);
 | 
			
		||||
	intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
	err = -EIO;
 | 
			
		||||
	goto err_client_b;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1105,7 +1105,7 @@ static int live_suppress_wait_preempt(void *arg)
 | 
			
		|||
err_wedged:
 | 
			
		||||
	for (i = 0; i < ARRAY_SIZE(client); i++)
 | 
			
		||||
		igt_spinner_end(&client[i].spin);
 | 
			
		||||
	i915_gem_set_wedged(i915);
 | 
			
		||||
	intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
	err = -EIO;
 | 
			
		||||
	goto err_client_3;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1251,7 +1251,7 @@ static int live_chain_preempt(void *arg)
 | 
			
		|||
err_wedged:
 | 
			
		||||
	igt_spinner_end(&hi.spin);
 | 
			
		||||
	igt_spinner_end(&lo.spin);
 | 
			
		||||
	i915_gem_set_wedged(i915);
 | 
			
		||||
	intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
	err = -EIO;
 | 
			
		||||
	goto err_client_lo;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1310,7 +1310,7 @@ static int live_preempt_hang(void *arg)
 | 
			
		|||
		if (!igt_wait_for_spinner(&spin_lo, rq)) {
 | 
			
		||||
			GEM_TRACE("lo spinner failed to start\n");
 | 
			
		||||
			GEM_TRACE_DUMP();
 | 
			
		||||
			i915_gem_set_wedged(i915);
 | 
			
		||||
			intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
			err = -EIO;
 | 
			
		||||
			goto err_ctx_lo;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -1332,21 +1332,21 @@ static int live_preempt_hang(void *arg)
 | 
			
		|||
						 HZ / 10)) {
 | 
			
		||||
			pr_err("Preemption did not occur within timeout!");
 | 
			
		||||
			GEM_TRACE_DUMP();
 | 
			
		||||
			i915_gem_set_wedged(i915);
 | 
			
		||||
			intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
			err = -EIO;
 | 
			
		||||
			goto err_ctx_lo;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 | 
			
		||||
		i915_reset_engine(engine, NULL);
 | 
			
		||||
		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 | 
			
		||||
		set_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags);
 | 
			
		||||
		intel_engine_reset(engine, NULL);
 | 
			
		||||
		clear_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags);
 | 
			
		||||
 | 
			
		||||
		engine->execlists.preempt_hang.inject_hang = false;
 | 
			
		||||
 | 
			
		||||
		if (!igt_wait_for_spinner(&spin_hi, rq)) {
 | 
			
		||||
			GEM_TRACE("hi spinner failed to start\n");
 | 
			
		||||
			GEM_TRACE_DUMP();
 | 
			
		||||
			i915_gem_set_wedged(i915);
 | 
			
		||||
			intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
			err = -EIO;
 | 
			
		||||
			goto err_ctx_lo;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -1726,7 +1726,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 | 
			
		|||
					  request[nc]->fence.context,
 | 
			
		||||
					  request[nc]->fence.seqno);
 | 
			
		||||
				GEM_TRACE_DUMP();
 | 
			
		||||
				i915_gem_set_wedged(i915);
 | 
			
		||||
				intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -1873,7 +1873,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
 | 
			
		|||
				  request[n]->fence.context,
 | 
			
		||||
				  request[n]->fence.seqno);
 | 
			
		||||
			GEM_TRACE_DUMP();
 | 
			
		||||
			i915_gem_set_wedged(i915);
 | 
			
		||||
			intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
			err = -EIO;
 | 
			
		||||
			goto out;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -2150,7 +2150,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 | 
			
		|||
	if (!HAS_EXECLISTS(i915))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	if (i915_terminally_wedged(i915))
 | 
			
		||||
	if (intel_gt_is_wedged(&i915->gt))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	return i915_live_subtests(tests, i915);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -9,26 +9,29 @@
 | 
			
		|||
 | 
			
		||||
static int igt_global_reset(void *arg)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_i915_private *i915 = arg;
 | 
			
		||||
	struct intel_gt *gt = arg;
 | 
			
		||||
	unsigned int reset_count;
 | 
			
		||||
	intel_wakeref_t wakeref;
 | 
			
		||||
	int err = 0;
 | 
			
		||||
 | 
			
		||||
	/* Check that we can issue a global GPU reset */
 | 
			
		||||
 | 
			
		||||
	igt_global_reset_lock(i915);
 | 
			
		||||
	igt_global_reset_lock(gt);
 | 
			
		||||
	wakeref = intel_runtime_pm_get(>->i915->runtime_pm);
 | 
			
		||||
 | 
			
		||||
	reset_count = i915_reset_count(&i915->gpu_error);
 | 
			
		||||
	reset_count = i915_reset_count(>->i915->gpu_error);
 | 
			
		||||
 | 
			
		||||
	i915_reset(i915, ALL_ENGINES, NULL);
 | 
			
		||||
	intel_gt_reset(gt, ALL_ENGINES, NULL);
 | 
			
		||||
 | 
			
		||||
	if (i915_reset_count(&i915->gpu_error) == reset_count) {
 | 
			
		||||
	if (i915_reset_count(>->i915->gpu_error) == reset_count) {
 | 
			
		||||
		pr_err("No GPU reset recorded!\n");
 | 
			
		||||
		err = -EINVAL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	igt_global_reset_unlock(i915);
 | 
			
		||||
	intel_runtime_pm_put(>->i915->runtime_pm, wakeref);
 | 
			
		||||
	igt_global_reset_unlock(gt);
 | 
			
		||||
 | 
			
		||||
	if (i915_reset_failed(i915))
 | 
			
		||||
	if (intel_gt_is_wedged(gt))
 | 
			
		||||
		err = -EIO;
 | 
			
		||||
 | 
			
		||||
	return err;
 | 
			
		||||
| 
						 | 
				
			
			@ -36,72 +39,72 @@ static int igt_global_reset(void *arg)
 | 
			
		|||
 | 
			
		||||
static int igt_wedged_reset(void *arg)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_i915_private *i915 = arg;
 | 
			
		||||
	struct intel_gt *gt = arg;
 | 
			
		||||
	intel_wakeref_t wakeref;
 | 
			
		||||
 | 
			
		||||
	/* Check that we can recover a wedged device with a GPU reset */
 | 
			
		||||
 | 
			
		||||
	igt_global_reset_lock(i915);
 | 
			
		||||
	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 | 
			
		||||
	igt_global_reset_lock(gt);
 | 
			
		||||
	wakeref = intel_runtime_pm_get(>->i915->runtime_pm);
 | 
			
		||||
 | 
			
		||||
	i915_gem_set_wedged(i915);
 | 
			
		||||
	intel_gt_set_wedged(gt);
 | 
			
		||||
 | 
			
		||||
	GEM_BUG_ON(!i915_reset_failed(i915));
 | 
			
		||||
	i915_reset(i915, ALL_ENGINES, NULL);
 | 
			
		||||
	GEM_BUG_ON(!intel_gt_is_wedged(gt));
 | 
			
		||||
	intel_gt_reset(gt, ALL_ENGINES, NULL);
 | 
			
		||||
 | 
			
		||||
	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 | 
			
		||||
	igt_global_reset_unlock(i915);
 | 
			
		||||
	intel_runtime_pm_put(>->i915->runtime_pm, wakeref);
 | 
			
		||||
	igt_global_reset_unlock(gt);
 | 
			
		||||
 | 
			
		||||
	return i915_reset_failed(i915) ? -EIO : 0;
 | 
			
		||||
	return intel_gt_is_wedged(gt) ? -EIO : 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int igt_atomic_reset(void *arg)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_i915_private *i915 = arg;
 | 
			
		||||
	struct intel_gt *gt = arg;
 | 
			
		||||
	const typeof(*igt_atomic_phases) *p;
 | 
			
		||||
	int err = 0;
 | 
			
		||||
 | 
			
		||||
	/* Check that the resets are usable from atomic context */
 | 
			
		||||
 | 
			
		||||
	intel_gt_pm_get(&i915->gt);
 | 
			
		||||
	igt_global_reset_lock(i915);
 | 
			
		||||
	intel_gt_pm_get(gt);
 | 
			
		||||
	igt_global_reset_lock(gt);
 | 
			
		||||
 | 
			
		||||
	/* Flush any requests before we get started and check basics */
 | 
			
		||||
	if (!igt_force_reset(i915))
 | 
			
		||||
	if (!igt_force_reset(gt))
 | 
			
		||||
		goto unlock;
 | 
			
		||||
 | 
			
		||||
	for (p = igt_atomic_phases; p->name; p++) {
 | 
			
		||||
		intel_engine_mask_t awake;
 | 
			
		||||
 | 
			
		||||
		GEM_TRACE("intel_gpu_reset under %s\n", p->name);
 | 
			
		||||
		GEM_TRACE("__intel_gt_reset under %s\n", p->name);
 | 
			
		||||
 | 
			
		||||
		awake = reset_prepare(i915);
 | 
			
		||||
		awake = reset_prepare(gt);
 | 
			
		||||
		p->critical_section_begin();
 | 
			
		||||
 | 
			
		||||
		err = intel_gpu_reset(i915, ALL_ENGINES);
 | 
			
		||||
		err = __intel_gt_reset(gt, ALL_ENGINES);
 | 
			
		||||
 | 
			
		||||
		p->critical_section_end();
 | 
			
		||||
		reset_finish(i915, awake);
 | 
			
		||||
		reset_finish(gt, awake);
 | 
			
		||||
 | 
			
		||||
		if (err) {
 | 
			
		||||
			pr_err("intel_gpu_reset failed under %s\n", p->name);
 | 
			
		||||
			pr_err("__intel_gt_reset failed under %s\n", p->name);
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* As we poke around the guts, do a full reset before continuing. */
 | 
			
		||||
	igt_force_reset(i915);
 | 
			
		||||
	igt_force_reset(gt);
 | 
			
		||||
 | 
			
		||||
unlock:
 | 
			
		||||
	igt_global_reset_unlock(i915);
 | 
			
		||||
	intel_gt_pm_put(&i915->gt);
 | 
			
		||||
	igt_global_reset_unlock(gt);
 | 
			
		||||
	intel_gt_pm_put(gt);
 | 
			
		||||
 | 
			
		||||
	return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int igt_atomic_engine_reset(void *arg)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_i915_private *i915 = arg;
 | 
			
		||||
	struct intel_gt *gt = arg;
 | 
			
		||||
	const typeof(*igt_atomic_phases) *p;
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	enum intel_engine_id id;
 | 
			
		||||
| 
						 | 
				
			
			@ -109,33 +112,33 @@ static int igt_atomic_engine_reset(void *arg)
 | 
			
		|||
 | 
			
		||||
	/* Check that the resets are usable from atomic context */
 | 
			
		||||
 | 
			
		||||
	if (!intel_has_reset_engine(i915))
 | 
			
		||||
	if (!intel_has_reset_engine(gt->i915))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	if (USES_GUC_SUBMISSION(i915))
 | 
			
		||||
	if (USES_GUC_SUBMISSION(gt->i915))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	intel_gt_pm_get(&i915->gt);
 | 
			
		||||
	igt_global_reset_lock(i915);
 | 
			
		||||
	intel_gt_pm_get(gt);
 | 
			
		||||
	igt_global_reset_lock(gt);
 | 
			
		||||
 | 
			
		||||
	/* Flush any requests before we get started and check basics */
 | 
			
		||||
	if (!igt_force_reset(i915))
 | 
			
		||||
	if (!igt_force_reset(gt))
 | 
			
		||||
		goto out_unlock;
 | 
			
		||||
 | 
			
		||||
	for_each_engine(engine, i915, id) {
 | 
			
		||||
	for_each_engine(engine, gt->i915, id) {
 | 
			
		||||
		tasklet_disable_nosync(&engine->execlists.tasklet);
 | 
			
		||||
		intel_engine_pm_get(engine);
 | 
			
		||||
 | 
			
		||||
		for (p = igt_atomic_phases; p->name; p++) {
 | 
			
		||||
			GEM_TRACE("i915_reset_engine(%s) under %s\n",
 | 
			
		||||
			GEM_TRACE("intel_engine_reset(%s) under %s\n",
 | 
			
		||||
				  engine->name, p->name);
 | 
			
		||||
 | 
			
		||||
			p->critical_section_begin();
 | 
			
		||||
			err = i915_reset_engine(engine, NULL);
 | 
			
		||||
			err = intel_engine_reset(engine, NULL);
 | 
			
		||||
			p->critical_section_end();
 | 
			
		||||
 | 
			
		||||
			if (err) {
 | 
			
		||||
				pr_err("i915_reset_engine(%s) failed under %s\n",
 | 
			
		||||
				pr_err("intel_engine_reset(%s) failed under %s\n",
 | 
			
		||||
				       engine->name, p->name);
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			@ -148,11 +151,11 @@ static int igt_atomic_engine_reset(void *arg)
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	/* As we poke around the guts, do a full reset before continuing. */
 | 
			
		||||
	igt_force_reset(i915);
 | 
			
		||||
	igt_force_reset(gt);
 | 
			
		||||
 | 
			
		||||
out_unlock:
 | 
			
		||||
	igt_global_reset_unlock(i915);
 | 
			
		||||
	intel_gt_pm_put(&i915->gt);
 | 
			
		||||
	igt_global_reset_unlock(gt);
 | 
			
		||||
	intel_gt_pm_put(gt);
 | 
			
		||||
 | 
			
		||||
	return err;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -165,17 +168,13 @@ int intel_reset_live_selftests(struct drm_i915_private *i915)
 | 
			
		|||
		SUBTEST(igt_atomic_reset),
 | 
			
		||||
		SUBTEST(igt_atomic_engine_reset),
 | 
			
		||||
	};
 | 
			
		||||
	intel_wakeref_t wakeref;
 | 
			
		||||
	int err = 0;
 | 
			
		||||
	struct intel_gt *gt = &i915->gt;
 | 
			
		||||
 | 
			
		||||
	if (!intel_has_gpu_reset(i915))
 | 
			
		||||
	if (!intel_has_gpu_reset(gt->i915))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	if (i915_terminally_wedged(i915))
 | 
			
		||||
	if (intel_gt_is_wedged(gt))
 | 
			
		||||
		return -EIO; /* we're long past hope of a successful reset */
 | 
			
		||||
 | 
			
		||||
	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 | 
			
		||||
		err = i915_subtests(tests, i915);
 | 
			
		||||
 | 
			
		||||
	return err;
 | 
			
		||||
	return intel_gt_live_subtests(tests, gt);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,6 +7,7 @@
 | 
			
		|||
#include <linux/prime_numbers.h>
 | 
			
		||||
 | 
			
		||||
#include "gem/i915_gem_pm.h"
 | 
			
		||||
#include "intel_gt.h"
 | 
			
		||||
 | 
			
		||||
#include "../selftests/i915_random.h"
 | 
			
		||||
#include "../i915_selftest.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -836,7 +837,7 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915)
 | 
			
		|||
		SUBTEST(live_hwsp_wrap),
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	if (i915_terminally_wedged(i915))
 | 
			
		||||
	if (intel_gt_is_wedged(&i915->gt))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	return i915_live_subtests(tests, i915);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,7 +12,6 @@
 | 
			
		|||
#include "selftests/igt_flush_test.h"
 | 
			
		||||
#include "selftests/igt_reset.h"
 | 
			
		||||
#include "selftests/igt_spinner.h"
 | 
			
		||||
#include "selftests/igt_wedge_me.h"
 | 
			
		||||
#include "selftests/mock_drm.h"
 | 
			
		||||
 | 
			
		||||
#include "gem/selftests/igt_gem_utils.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -178,7 +177,7 @@ static int check_whitelist(struct i915_gem_context *ctx,
 | 
			
		|||
			   struct intel_engine_cs *engine)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_i915_gem_object *results;
 | 
			
		||||
	struct igt_wedge_me wedge;
 | 
			
		||||
	struct intel_wedge_me wedge;
 | 
			
		||||
	u32 *vaddr;
 | 
			
		||||
	int err;
 | 
			
		||||
	int i;
 | 
			
		||||
| 
						 | 
				
			
			@ -189,10 +188,10 @@ static int check_whitelist(struct i915_gem_context *ctx,
 | 
			
		|||
 | 
			
		||||
	err = 0;
 | 
			
		||||
	i915_gem_object_lock(results);
 | 
			
		||||
	igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */
 | 
			
		||||
	intel_wedge_on_timeout(&wedge, &ctx->i915->gt, HZ / 5) /* safety net! */
 | 
			
		||||
		err = i915_gem_object_set_to_cpu_domain(results, false);
 | 
			
		||||
	i915_gem_object_unlock(results);
 | 
			
		||||
	if (i915_terminally_wedged(ctx->i915))
 | 
			
		||||
	if (intel_gt_is_wedged(&ctx->i915->gt))
 | 
			
		||||
		err = -EIO;
 | 
			
		||||
	if (err)
 | 
			
		||||
		goto out_put;
 | 
			
		||||
| 
						 | 
				
			
			@ -225,13 +224,13 @@ static int check_whitelist(struct i915_gem_context *ctx,
 | 
			
		|||
 | 
			
		||||
static int do_device_reset(struct intel_engine_cs *engine)
 | 
			
		||||
{
 | 
			
		||||
	i915_reset(engine->i915, engine->mask, "live_workarounds");
 | 
			
		||||
	intel_gt_reset(engine->gt, engine->mask, "live_workarounds");
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int do_engine_reset(struct intel_engine_cs *engine)
 | 
			
		||||
{
 | 
			
		||||
	return i915_reset_engine(engine, "live_workarounds");
 | 
			
		||||
	return intel_engine_reset(engine, "live_workarounds");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
| 
						 | 
				
			
			@ -572,7 +571,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 | 
			
		|||
		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 | 
			
		||||
			pr_err("%s: Futzing %x timedout; cancelling test\n",
 | 
			
		||||
			       engine->name, reg);
 | 
			
		||||
			i915_gem_set_wedged(ctx->i915);
 | 
			
		||||
			intel_gt_set_wedged(&ctx->i915->gt);
 | 
			
		||||
			err = -EIO;
 | 
			
		||||
			goto out_batch;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -730,7 +729,7 @@ static int live_reset_whitelist(void *arg)
 | 
			
		|||
	if (!engine || engine->whitelist.count == 0)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	igt_global_reset_lock(i915);
 | 
			
		||||
	igt_global_reset_lock(&i915->gt);
 | 
			
		||||
 | 
			
		||||
	if (intel_has_reset_engine(i915)) {
 | 
			
		||||
		err = check_whitelist_across_reset(engine,
 | 
			
		||||
| 
						 | 
				
			
			@ -749,7 +748,7 @@ static int live_reset_whitelist(void *arg)
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	igt_global_reset_unlock(i915);
 | 
			
		||||
	igt_global_reset_unlock(&i915->gt);
 | 
			
		||||
	return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1118,7 +1117,7 @@ live_gpu_reset_workarounds(void *arg)
 | 
			
		|||
 | 
			
		||||
	pr_info("Verifying after GPU reset...\n");
 | 
			
		||||
 | 
			
		||||
	igt_global_reset_lock(i915);
 | 
			
		||||
	igt_global_reset_lock(&i915->gt);
 | 
			
		||||
	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 | 
			
		||||
 | 
			
		||||
	reference_lists_init(i915, &lists);
 | 
			
		||||
| 
						 | 
				
			
			@ -1127,7 +1126,7 @@ live_gpu_reset_workarounds(void *arg)
 | 
			
		|||
	if (!ok)
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
	i915_reset(i915, ALL_ENGINES, "live_workarounds");
 | 
			
		||||
	intel_gt_reset(&i915->gt, ALL_ENGINES, "live_workarounds");
 | 
			
		||||
 | 
			
		||||
	ok = verify_wa_lists(ctx, &lists, "after reset");
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1135,7 +1134,7 @@ live_gpu_reset_workarounds(void *arg)
 | 
			
		|||
	kernel_context_close(ctx);
 | 
			
		||||
	reference_lists_fini(i915, &lists);
 | 
			
		||||
	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 | 
			
		||||
	igt_global_reset_unlock(i915);
 | 
			
		||||
	igt_global_reset_unlock(&i915->gt);
 | 
			
		||||
 | 
			
		||||
	return ok ? 0 : -ESRCH;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1160,7 +1159,7 @@ live_engine_reset_workarounds(void *arg)
 | 
			
		|||
	if (IS_ERR(ctx))
 | 
			
		||||
		return PTR_ERR(ctx);
 | 
			
		||||
 | 
			
		||||
	igt_global_reset_lock(i915);
 | 
			
		||||
	igt_global_reset_lock(&i915->gt);
 | 
			
		||||
	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 | 
			
		||||
 | 
			
		||||
	reference_lists_init(i915, &lists);
 | 
			
		||||
| 
						 | 
				
			
			@ -1176,7 +1175,7 @@ live_engine_reset_workarounds(void *arg)
 | 
			
		|||
			goto err;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		i915_reset_engine(engine, "live_workarounds");
 | 
			
		||||
		intel_engine_reset(engine, "live_workarounds");
 | 
			
		||||
 | 
			
		||||
		ok = verify_wa_lists(ctx, &lists, "after idle reset");
 | 
			
		||||
		if (!ok) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1204,7 +1203,7 @@ live_engine_reset_workarounds(void *arg)
 | 
			
		|||
			goto err;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		i915_reset_engine(engine, "live_workarounds");
 | 
			
		||||
		intel_engine_reset(engine, "live_workarounds");
 | 
			
		||||
 | 
			
		||||
		igt_spinner_end(&spin);
 | 
			
		||||
		igt_spinner_fini(&spin);
 | 
			
		||||
| 
						 | 
				
			
			@ -1219,7 +1218,7 @@ live_engine_reset_workarounds(void *arg)
 | 
			
		|||
err:
 | 
			
		||||
	reference_lists_fini(i915, &lists);
 | 
			
		||||
	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 | 
			
		||||
	igt_global_reset_unlock(i915);
 | 
			
		||||
	igt_global_reset_unlock(&i915->gt);
 | 
			
		||||
	kernel_context_close(ctx);
 | 
			
		||||
 | 
			
		||||
	igt_flush_test(i915, I915_WAIT_LOCKED);
 | 
			
		||||
| 
						 | 
				
			
			@ -1238,7 +1237,7 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915)
 | 
			
		|||
	};
 | 
			
		||||
	int err;
 | 
			
		||||
 | 
			
		||||
	if (i915_terminally_wedged(i915))
 | 
			
		||||
	if (intel_gt_is_wedged(&i915->gt))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&i915->drm.struct_mutex);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1074,15 +1074,16 @@ static void i915_instdone_info(struct drm_i915_private *dev_priv,
 | 
			
		|||
 | 
			
		||||
static int i915_hangcheck_info(struct seq_file *m, void *unused)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 | 
			
		||||
	struct drm_i915_private *i915 = node_to_i915(m->private);
 | 
			
		||||
	struct intel_gt *gt = &i915->gt;
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	intel_wakeref_t wakeref;
 | 
			
		||||
	enum intel_engine_id id;
 | 
			
		||||
 | 
			
		||||
	seq_printf(m, "Reset flags: %lx\n", dev_priv->gpu_error.flags);
 | 
			
		||||
	if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
 | 
			
		||||
	seq_printf(m, "Reset flags: %lx\n", gt->reset.flags);
 | 
			
		||||
	if (test_bit(I915_WEDGED, >->reset.flags))
 | 
			
		||||
		seq_puts(m, "\tWedged\n");
 | 
			
		||||
	if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
 | 
			
		||||
	if (test_bit(I915_RESET_BACKOFF, >->reset.flags))
 | 
			
		||||
		seq_puts(m, "\tDevice (global) reset in progress\n");
 | 
			
		||||
 | 
			
		||||
	if (!i915_modparams.enable_hangcheck) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1090,19 +1091,19 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 | 
			
		|||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer))
 | 
			
		||||
	if (timer_pending(>->hangcheck.work.timer))
 | 
			
		||||
		seq_printf(m, "Hangcheck active, timer fires in %dms\n",
 | 
			
		||||
			   jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
 | 
			
		||||
			   jiffies_to_msecs(gt->hangcheck.work.timer.expires -
 | 
			
		||||
					    jiffies));
 | 
			
		||||
	else if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work))
 | 
			
		||||
	else if (delayed_work_pending(>->hangcheck.work))
 | 
			
		||||
		seq_puts(m, "Hangcheck active, work pending\n");
 | 
			
		||||
	else
 | 
			
		||||
		seq_puts(m, "Hangcheck inactive\n");
 | 
			
		||||
 | 
			
		||||
	seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake));
 | 
			
		||||
	seq_printf(m, "GT active? %s\n", yesno(gt->awake));
 | 
			
		||||
 | 
			
		||||
	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
 | 
			
		||||
		for_each_engine(engine, dev_priv, id) {
 | 
			
		||||
	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
 | 
			
		||||
		for_each_engine(engine, i915, id) {
 | 
			
		||||
			struct intel_instdone instdone;
 | 
			
		||||
 | 
			
		||||
			seq_printf(m, "%s: %d ms ago\n",
 | 
			
		||||
| 
						 | 
				
			
			@ -1117,10 +1118,10 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 | 
			
		|||
			intel_engine_get_instdone(engine, &instdone);
 | 
			
		||||
 | 
			
		||||
			seq_puts(m, "\tinstdone read =\n");
 | 
			
		||||
			i915_instdone_info(dev_priv, m, &instdone);
 | 
			
		||||
			i915_instdone_info(i915, m, &instdone);
 | 
			
		||||
 | 
			
		||||
			seq_puts(m, "\tinstdone accu =\n");
 | 
			
		||||
			i915_instdone_info(dev_priv, m,
 | 
			
		||||
			i915_instdone_info(i915, m,
 | 
			
		||||
					   &engine->hangcheck.instdone);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -1128,23 +1129,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 | 
			
		|||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int i915_reset_info(struct seq_file *m, void *unused)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 | 
			
		||||
	struct i915_gpu_error *error = &dev_priv->gpu_error;
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	enum intel_engine_id id;
 | 
			
		||||
 | 
			
		||||
	seq_printf(m, "full gpu reset = %u\n", i915_reset_count(error));
 | 
			
		||||
 | 
			
		||||
	for_each_engine(engine, dev_priv, id) {
 | 
			
		||||
		seq_printf(m, "%s = %u\n", engine->name,
 | 
			
		||||
			   i915_reset_engine_count(error, engine));
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int ironlake_drpc_info(struct seq_file *m)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_i915_private *i915 = node_to_i915(m->private);
 | 
			
		||||
| 
						 | 
				
			
			@ -3616,7 +3600,8 @@ static const struct file_operations i915_cur_wm_latency_fops = {
 | 
			
		|||
static int
 | 
			
		||||
i915_wedged_get(void *data, u64 *val)
 | 
			
		||||
{
 | 
			
		||||
	int ret = i915_terminally_wedged(data);
 | 
			
		||||
	struct drm_i915_private *i915 = data;
 | 
			
		||||
	int ret = intel_gt_terminally_wedged(&i915->gt);
 | 
			
		||||
 | 
			
		||||
	switch (ret) {
 | 
			
		||||
	case -EIO:
 | 
			
		||||
| 
						 | 
				
			
			@ -3636,11 +3621,11 @@ i915_wedged_set(void *data, u64 val)
 | 
			
		|||
	struct drm_i915_private *i915 = data;
 | 
			
		||||
 | 
			
		||||
	/* Flush any previous reset before applying for a new one */
 | 
			
		||||
	wait_event(i915->gpu_error.reset_queue,
 | 
			
		||||
		   !test_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags));
 | 
			
		||||
	wait_event(i915->gt.reset.queue,
 | 
			
		||||
		   !test_bit(I915_RESET_BACKOFF, &i915->gt.reset.flags));
 | 
			
		||||
 | 
			
		||||
	i915_handle_error(i915, val, I915_ERROR_CAPTURE,
 | 
			
		||||
			  "Manually set wedged engine mask = %llx", val);
 | 
			
		||||
	intel_gt_handle_error(&i915->gt, val, I915_ERROR_CAPTURE,
 | 
			
		||||
			      "Manually set wedged engine mask = %llx", val);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -3683,8 +3668,9 @@ i915_drop_caches_set(void *data, u64 val)
 | 
			
		|||
		  val, val & DROP_ALL);
 | 
			
		||||
 | 
			
		||||
	if (val & DROP_RESET_ACTIVE &&
 | 
			
		||||
	    wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT))
 | 
			
		||||
		i915_gem_set_wedged(i915);
 | 
			
		||||
	    wait_for(intel_engines_are_idle(&i915->gt),
 | 
			
		||||
		     I915_IDLE_ENGINES_TIMEOUT))
 | 
			
		||||
		intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
 | 
			
		||||
	/* No need to check and wait for gpu resets, only libdrm auto-restarts
 | 
			
		||||
	 * on ioctls on -EAGAIN. */
 | 
			
		||||
| 
						 | 
				
			
			@ -3719,8 +3705,8 @@ i915_drop_caches_set(void *data, u64 val)
 | 
			
		|||
		mutex_unlock(&i915->drm.struct_mutex);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(i915))
 | 
			
		||||
		i915_handle_error(i915, ALL_ENGINES, 0, NULL);
 | 
			
		||||
	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt))
 | 
			
		||||
		intel_gt_handle_error(&i915->gt, ALL_ENGINES, 0, NULL);
 | 
			
		||||
 | 
			
		||||
	fs_reclaim_acquire(GFP_KERNEL);
 | 
			
		||||
	if (val & DROP_BOUND)
 | 
			
		||||
| 
						 | 
				
			
			@ -4375,7 +4361,6 @@ static const struct drm_info_list i915_debugfs_list[] = {
 | 
			
		|||
	{"i915_huc_load_status", i915_huc_load_status_info, 0},
 | 
			
		||||
	{"i915_frequency_info", i915_frequency_info, 0},
 | 
			
		||||
	{"i915_hangcheck_info", i915_hangcheck_info, 0},
 | 
			
		||||
	{"i915_reset_info", i915_reset_info, 0},
 | 
			
		||||
	{"i915_drpc_info", i915_drpc_info, 0},
 | 
			
		||||
	{"i915_emon_status", i915_emon_status, 0},
 | 
			
		||||
	{"i915_ring_freq_table", i915_ring_freq_table, 0},
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -947,7 +947,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
 | 
			
		|||
	if (ret < 0)
 | 
			
		||||
		goto err_uc;
 | 
			
		||||
	intel_irq_init(dev_priv);
 | 
			
		||||
	intel_hangcheck_init(dev_priv);
 | 
			
		||||
	intel_init_display_hooks(dev_priv);
 | 
			
		||||
	intel_init_clock_gating_hooks(dev_priv);
 | 
			
		||||
	intel_init_audio_hooks(dev_priv);
 | 
			
		||||
| 
						 | 
				
			
			@ -1967,7 +1966,7 @@ void i915_driver_remove(struct drm_device *dev)
 | 
			
		|||
	 * all in-flight requests so that we can quickly unbind the active
 | 
			
		||||
	 * resources.
 | 
			
		||||
	 */
 | 
			
		||||
	i915_gem_set_wedged(dev_priv);
 | 
			
		||||
	intel_gt_set_wedged(&dev_priv->gt);
 | 
			
		||||
 | 
			
		||||
	/* Flush any external code that still may be under the RCU lock */
 | 
			
		||||
	synchronize_rcu();
 | 
			
		||||
| 
						 | 
				
			
			@ -1988,7 +1987,7 @@ void i915_driver_remove(struct drm_device *dev)
 | 
			
		|||
	intel_csr_ucode_fini(dev_priv);
 | 
			
		||||
 | 
			
		||||
	/* Free error state after interrupts are fully disabled. */
 | 
			
		||||
	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
 | 
			
		||||
	cancel_delayed_work_sync(&dev_priv->gt.hangcheck.work);
 | 
			
		||||
	i915_reset_error_state(dev_priv);
 | 
			
		||||
 | 
			
		||||
	i915_gem_driver_remove(dev_priv);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2400,28 +2400,10 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
 | 
			
		|||
void i915_driver_remove(struct drm_device *dev);
 | 
			
		||||
 | 
			
		||||
void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
 | 
			
		||||
void intel_hangcheck_init(struct drm_i915_private *dev_priv);
 | 
			
		||||
int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 | 
			
		||||
 | 
			
		||||
u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
 | 
			
		||||
 | 
			
		||||
static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long delay;
 | 
			
		||||
 | 
			
		||||
	if (unlikely(!i915_modparams.enable_hangcheck))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	/* Don't continually defer the hangcheck so that it is always run at
 | 
			
		||||
	 * least once after work has been scheduled on any ring. Otherwise,
 | 
			
		||||
	 * we will ignore a hung ring if a second ring is kept busy.
 | 
			
		||||
	 */
 | 
			
		||||
 | 
			
		||||
	delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES);
 | 
			
		||||
	queue_delayed_work(system_long_wq,
 | 
			
		||||
			   &dev_priv->gpu_error.hangcheck_work, delay);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
 | 
			
		||||
{
 | 
			
		||||
	return dev_priv->gvt;
 | 
			
		||||
| 
						 | 
				
			
			@ -2510,30 +2492,17 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 | 
			
		|||
 | 
			
		||||
int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno);
 | 
			
		||||
 | 
			
		||||
static inline bool __i915_wedged(struct i915_gpu_error *error)
 | 
			
		||||
{
 | 
			
		||||
	return unlikely(test_bit(I915_WEDGED, &error->flags));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline bool i915_reset_failed(struct drm_i915_private *i915)
 | 
			
		||||
{
 | 
			
		||||
	return __i915_wedged(&i915->gpu_error);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline u32 i915_reset_count(struct i915_gpu_error *error)
 | 
			
		||||
{
 | 
			
		||||
	return READ_ONCE(error->reset_count);
 | 
			
		||||
	return atomic_read(&error->reset_count);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
 | 
			
		||||
					  struct intel_engine_cs *engine)
 | 
			
		||||
{
 | 
			
		||||
	return READ_ONCE(error->reset_engine_count[engine->id]);
 | 
			
		||||
	return atomic_read(&error->reset_engine_count[engine->uabi_class]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 | 
			
		||||
bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
 | 
			
		||||
 | 
			
		||||
void i915_gem_init_mmio(struct drm_i915_private *i915);
 | 
			
		||||
int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
 | 
			
		||||
int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -894,13 +894,13 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int wait_for_engines(struct drm_i915_private *i915)
 | 
			
		||||
static int wait_for_engines(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
 | 
			
		||||
		dev_err(i915->drm.dev,
 | 
			
		||||
	if (wait_for(intel_engines_are_idle(gt), I915_IDLE_ENGINES_TIMEOUT)) {
 | 
			
		||||
		dev_err(gt->i915->drm.dev,
 | 
			
		||||
			"Failed to idle engines, declaring wedged!\n");
 | 
			
		||||
		GEM_TRACE_DUMP();
 | 
			
		||||
		i915_gem_set_wedged(i915);
 | 
			
		||||
		intel_gt_set_wedged(gt);
 | 
			
		||||
		return -EIO;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -971,7 +971,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 | 
			
		|||
 | 
			
		||||
		lockdep_assert_held(&i915->drm.struct_mutex);
 | 
			
		||||
 | 
			
		||||
		err = wait_for_engines(i915);
 | 
			
		||||
		err = wait_for_engines(&i915->gt);
 | 
			
		||||
		if (err)
 | 
			
		||||
			return err;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1149,8 +1149,8 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 | 
			
		|||
	 * back to defaults, recovering from whatever wedged state we left it
 | 
			
		||||
	 * in and so worth trying to use the device once more.
 | 
			
		||||
	 */
 | 
			
		||||
	if (i915_terminally_wedged(i915))
 | 
			
		||||
		i915_gem_unset_wedged(i915);
 | 
			
		||||
	if (intel_gt_is_wedged(&i915->gt))
 | 
			
		||||
		intel_gt_unset_wedged(&i915->gt);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * If we inherit context state from the BIOS or earlier occupants
 | 
			
		||||
| 
						 | 
				
			
			@ -1202,7 +1202,7 @@ int i915_gem_init_hw(struct drm_i915_private *i915)
 | 
			
		|||
	int ret;
 | 
			
		||||
 | 
			
		||||
	BUG_ON(!i915->kernel_context);
 | 
			
		||||
	ret = i915_terminally_wedged(i915);
 | 
			
		||||
	ret = intel_gt_terminally_wedged(gt);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		return ret;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1384,7 +1384,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 | 
			
		|||
	 * and ready to be torn-down. The quickest way we can accomplish
 | 
			
		||||
	 * this is by declaring ourselves wedged.
 | 
			
		||||
	 */
 | 
			
		||||
	i915_gem_set_wedged(i915);
 | 
			
		||||
	intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
	goto out_ctx;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1539,7 +1539,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 | 
			
		|||
err_gt:
 | 
			
		||||
	mutex_unlock(&dev_priv->drm.struct_mutex);
 | 
			
		||||
 | 
			
		||||
	i915_gem_set_wedged(dev_priv);
 | 
			
		||||
	intel_gt_set_wedged(&dev_priv->gt);
 | 
			
		||||
	i915_gem_suspend(dev_priv);
 | 
			
		||||
	i915_gem_suspend_late(dev_priv);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1581,10 +1581,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 | 
			
		|||
		 * wedged. But we only want to do this where the GPU is angry,
 | 
			
		||||
		 * for all other failure, such as an allocation failure, bail.
 | 
			
		||||
		 */
 | 
			
		||||
		if (!i915_reset_failed(dev_priv)) {
 | 
			
		||||
		if (!intel_gt_is_wedged(&dev_priv->gt)) {
 | 
			
		||||
			i915_probe_error(dev_priv,
 | 
			
		||||
					 "Failed to initialize GPU, declaring it wedged!\n");
 | 
			
		||||
			i915_gem_set_wedged(dev_priv);
 | 
			
		||||
			intel_gt_set_wedged(&dev_priv->gt);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/* Minimal basic recovery for KMS */
 | 
			
		||||
| 
						 | 
				
			
			@ -1666,11 +1666,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 | 
			
		|||
	i915_gem_init__mm(dev_priv);
 | 
			
		||||
	i915_gem_init__pm(dev_priv);
 | 
			
		||||
 | 
			
		||||
	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
 | 
			
		||||
	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
 | 
			
		||||
	mutex_init(&dev_priv->gpu_error.wedge_mutex);
 | 
			
		||||
	init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
 | 
			
		||||
 | 
			
		||||
	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
 | 
			
		||||
 | 
			
		||||
	spin_lock_init(&dev_priv->fb_tracking.lock);
 | 
			
		||||
| 
						 | 
				
			
			@ -1689,7 +1684,7 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
 | 
			
		|||
	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
 | 
			
		||||
	WARN_ON(dev_priv->mm.shrink_count);
 | 
			
		||||
 | 
			
		||||
	cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
 | 
			
		||||
	intel_gt_cleanup_early(&dev_priv->gt);
 | 
			
		||||
 | 
			
		||||
	i915_gemfs_fini(dev_priv);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,6 +7,7 @@
 | 
			
		|||
#ifndef _I915_GPU_ERROR_H_
 | 
			
		||||
#define _I915_GPU_ERROR_H_
 | 
			
		||||
 | 
			
		||||
#include <linux/atomic.h>
 | 
			
		||||
#include <linux/kref.h>
 | 
			
		||||
#include <linux/ktime.h>
 | 
			
		||||
#include <linux/sched.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -180,12 +181,6 @@ struct i915_gpu_state {
 | 
			
		|||
};
 | 
			
		||||
 | 
			
		||||
struct i915_gpu_error {
 | 
			
		||||
	/* For hangcheck timer */
 | 
			
		||||
#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
 | 
			
		||||
#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
 | 
			
		||||
 | 
			
		||||
	struct delayed_work hangcheck_work;
 | 
			
		||||
 | 
			
		||||
	/* For reset and error_state handling. */
 | 
			
		||||
	spinlock_t lock;
 | 
			
		||||
	/* Protected by the above dev->gpu_error.lock. */
 | 
			
		||||
| 
						 | 
				
			
			@ -193,52 +188,11 @@ struct i915_gpu_error {
 | 
			
		|||
 | 
			
		||||
	atomic_t pending_fb_pin;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * flags: Control various stages of the GPU reset
 | 
			
		||||
	 *
 | 
			
		||||
	 * #I915_RESET_BACKOFF - When we start a global reset, we need to
 | 
			
		||||
	 * serialise with any other users attempting to do the same, and
 | 
			
		||||
	 * any global resources that may be clobber by the reset (such as
 | 
			
		||||
	 * FENCE registers).
 | 
			
		||||
	 *
 | 
			
		||||
	 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
 | 
			
		||||
	 * acquire the struct_mutex to reset an engine, we need an explicit
 | 
			
		||||
	 * flag to prevent two concurrent reset attempts in the same engine.
 | 
			
		||||
	 * As the number of engines continues to grow, allocate the flags from
 | 
			
		||||
	 * the most significant bits.
 | 
			
		||||
	 *
 | 
			
		||||
	 * #I915_WEDGED - If reset fails and we can no longer use the GPU,
 | 
			
		||||
	 * we set the #I915_WEDGED bit. Prior to command submission, e.g.
 | 
			
		||||
	 * i915_request_alloc(), this bit is checked and the sequence
 | 
			
		||||
	 * aborted (with -EIO reported to userspace) if set.
 | 
			
		||||
	 */
 | 
			
		||||
	unsigned long flags;
 | 
			
		||||
#define I915_RESET_BACKOFF	0
 | 
			
		||||
#define I915_RESET_MODESET	1
 | 
			
		||||
#define I915_RESET_ENGINE	2
 | 
			
		||||
#define I915_WEDGED		(BITS_PER_LONG - 1)
 | 
			
		||||
 | 
			
		||||
	/** Number of times the device has been reset (global) */
 | 
			
		||||
	u32 reset_count;
 | 
			
		||||
	atomic_t reset_count;
 | 
			
		||||
 | 
			
		||||
	/** Number of times an engine has been reset */
 | 
			
		||||
	u32 reset_engine_count[I915_NUM_ENGINES];
 | 
			
		||||
 | 
			
		||||
	struct mutex wedge_mutex; /* serialises wedging/unwedging */
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Waitqueue to signal when a hang is detected. Used to for waiters
 | 
			
		||||
	 * to release the struct_mutex for the reset to procede.
 | 
			
		||||
	 */
 | 
			
		||||
	wait_queue_head_t wait_queue;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Waitqueue to signal when the reset has completed. Used by clients
 | 
			
		||||
	 * that wait for dev_priv->mm.wedged to settle.
 | 
			
		||||
	 */
 | 
			
		||||
	wait_queue_head_t reset_queue;
 | 
			
		||||
 | 
			
		||||
	struct srcu_struct reset_backoff_srcu;
 | 
			
		||||
	atomic_t reset_engine_count[I915_NUM_ENGINES];
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct drm_i915_error_state_buf {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1401,8 +1401,7 @@ long i915_request_wait(struct i915_request *rq,
 | 
			
		|||
	 * serialise wait/reset with an explicit lock, we do want
 | 
			
		||||
	 * lockdep to detect potential dependency cycles.
 | 
			
		||||
	 */
 | 
			
		||||
	mutex_acquire(&rq->i915->gpu_error.wedge_mutex.dep_map,
 | 
			
		||||
		      0, 0, _THIS_IP_);
 | 
			
		||||
	mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Optimistic spin before touching IRQs.
 | 
			
		||||
| 
						 | 
				
			
			@ -1480,7 +1479,7 @@ long i915_request_wait(struct i915_request *rq,
 | 
			
		|||
	dma_fence_remove_callback(&rq->fence, &wait.cb);
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	mutex_release(&rq->i915->gpu_error.wedge_mutex.dep_map, 0, _THIS_IP_);
 | 
			
		||||
	mutex_release(&rq->engine->gt->reset.mutex.dep_map, 0, _THIS_IP_);
 | 
			
		||||
	trace_i915_request_wait_end(rq);
 | 
			
		||||
	return timeout;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -72,6 +72,9 @@ int __i915_nop_teardown(int err, void *data);
 | 
			
		|||
int __i915_live_setup(void *data);
 | 
			
		||||
int __i915_live_teardown(int err, void *data);
 | 
			
		||||
 | 
			
		||||
int __intel_gt_live_setup(void *data);
 | 
			
		||||
int __intel_gt_live_teardown(int err, void *data);
 | 
			
		||||
 | 
			
		||||
int __i915_subtests(const char *caller,
 | 
			
		||||
		    int (*setup)(void *data),
 | 
			
		||||
		    int (*teardown)(int err, void *data),
 | 
			
		||||
| 
						 | 
				
			
			@ -88,6 +91,12 @@ int __i915_subtests(const char *caller,
 | 
			
		|||
			__i915_live_setup, __i915_live_teardown, \
 | 
			
		||||
			T, ARRAY_SIZE(T), data); \
 | 
			
		||||
})
 | 
			
		||||
#define intel_gt_live_subtests(T, data) ({ \
 | 
			
		||||
	typecheck(struct intel_gt *, data); \
 | 
			
		||||
	__i915_subtests(__func__, \
 | 
			
		||||
			__intel_gt_live_setup, __intel_gt_live_teardown, \
 | 
			
		||||
			T, ARRAY_SIZE(T), data); \
 | 
			
		||||
})
 | 
			
		||||
 | 
			
		||||
#define SUBTEST(x) { x, #x }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -667,7 +667,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 | 
			
		|||
	if (!i915_request_started(rq))
 | 
			
		||||
		stalled = false;
 | 
			
		||||
 | 
			
		||||
	i915_reset_request(rq, stalled);
 | 
			
		||||
	__i915_request_reset(rq, stalled);
 | 
			
		||||
	intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled);
 | 
			
		||||
 | 
			
		||||
out_unlock:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -38,7 +38,7 @@ static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv)
 | 
			
		|||
	int ret;
 | 
			
		||||
	u32 guc_status;
 | 
			
		||||
 | 
			
		||||
	ret = intel_reset_guc(dev_priv);
 | 
			
		||||
	ret = intel_reset_guc(&dev_priv->gt);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
		DRM_ERROR("Failed to reset GuC, ret = %d\n", ret);
 | 
			
		||||
		return ret;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,6 +7,7 @@
 | 
			
		|||
#include <linux/kref.h>
 | 
			
		||||
 | 
			
		||||
#include "gem/i915_gem_pm.h"
 | 
			
		||||
#include "gt/intel_gt.h"
 | 
			
		||||
 | 
			
		||||
#include "i915_selftest.h"
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -221,7 +222,7 @@ int i915_active_live_selftests(struct drm_i915_private *i915)
 | 
			
		|||
		SUBTEST(live_active_retire),
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	if (i915_terminally_wedged(i915))
 | 
			
		||||
	if (intel_gt_is_wedged(&i915->gt))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	return i915_subtests(tests, i915);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,6 +8,7 @@
 | 
			
		|||
 | 
			
		||||
#include "gem/selftests/igt_gem_utils.h"
 | 
			
		||||
#include "gem/selftests/mock_context.h"
 | 
			
		||||
#include "gt/intel_gt.h"
 | 
			
		||||
 | 
			
		||||
#include "i915_selftest.h"
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -206,7 +207,7 @@ int i915_gem_live_selftests(struct drm_i915_private *i915)
 | 
			
		|||
		SUBTEST(igt_gem_hibernate),
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	if (i915_terminally_wedged(i915))
 | 
			
		||||
	if (intel_gt_is_wedged(&i915->gt))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	return i915_live_subtests(tests, i915);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -25,6 +25,7 @@
 | 
			
		|||
#include "gem/i915_gem_pm.h"
 | 
			
		||||
#include "gem/selftests/igt_gem_utils.h"
 | 
			
		||||
#include "gem/selftests/mock_context.h"
 | 
			
		||||
#include "gt/intel_gt.h"
 | 
			
		||||
 | 
			
		||||
#include "i915_selftest.h"
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -557,7 +558,7 @@ int i915_gem_evict_live_selftests(struct drm_i915_private *i915)
 | 
			
		|||
		SUBTEST(igt_evict_contexts),
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	if (i915_terminally_wedged(i915))
 | 
			
		||||
	if (intel_gt_is_wedged(&i915->gt))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	return i915_subtests(tests, i915);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -380,7 +380,7 @@ static int __igt_breadcrumbs_smoketest(void *arg)
 | 
			
		|||
			       t->engine->name);
 | 
			
		||||
			GEM_TRACE_DUMP();
 | 
			
		||||
 | 
			
		||||
			i915_gem_set_wedged(t->engine->i915);
 | 
			
		||||
			intel_gt_set_wedged(t->engine->gt);
 | 
			
		||||
			GEM_BUG_ON(!i915_request_completed(rq));
 | 
			
		||||
			i915_sw_fence_wait(wait);
 | 
			
		||||
			err = -EIO;
 | 
			
		||||
| 
						 | 
				
			
			@ -1234,7 +1234,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 | 
			
		|||
		SUBTEST(live_breadcrumbs_smoketest),
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	if (i915_terminally_wedged(i915))
 | 
			
		||||
	if (intel_gt_is_wedged(&i915->gt))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	return i915_subtests(tests, i915);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -256,7 +256,7 @@ int __i915_live_setup(void *data)
 | 
			
		|||
{
 | 
			
		||||
	struct drm_i915_private *i915 = data;
 | 
			
		||||
 | 
			
		||||
	return i915_terminally_wedged(i915);
 | 
			
		||||
	return intel_gt_terminally_wedged(&i915->gt);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int __i915_live_teardown(int err, void *data)
 | 
			
		||||
| 
						 | 
				
			
			@ -273,6 +273,27 @@ int __i915_live_teardown(int err, void *data)
 | 
			
		|||
	return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int __intel_gt_live_setup(void *data)
 | 
			
		||||
{
 | 
			
		||||
	struct intel_gt *gt = data;
 | 
			
		||||
 | 
			
		||||
	return intel_gt_terminally_wedged(gt);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int __intel_gt_live_teardown(int err, void *data)
 | 
			
		||||
{
 | 
			
		||||
	struct intel_gt *gt = data;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(>->i915->drm.struct_mutex);
 | 
			
		||||
	if (igt_flush_test(gt->i915, I915_WAIT_LOCKED))
 | 
			
		||||
		err = -EIO;
 | 
			
		||||
	mutex_unlock(>->i915->drm.struct_mutex);
 | 
			
		||||
 | 
			
		||||
	i915_gem_drain_freed_objects(gt->i915);
 | 
			
		||||
 | 
			
		||||
	return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int __i915_subtests(const char *caller,
 | 
			
		||||
		    int (*setup)(void *data),
 | 
			
		||||
		    int (*teardown)(int err, void *data),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,6 +5,7 @@
 | 
			
		|||
 */
 | 
			
		||||
 | 
			
		||||
#include "gem/i915_gem_context.h"
 | 
			
		||||
#include "gt/intel_gt.h"
 | 
			
		||||
 | 
			
		||||
#include "i915_drv.h"
 | 
			
		||||
#include "i915_selftest.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -13,7 +14,7 @@
 | 
			
		|||
 | 
			
		||||
int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
 | 
			
		||||
{
 | 
			
		||||
	int ret = i915_terminally_wedged(i915) ? -EIO : 0;
 | 
			
		||||
	int ret = intel_gt_is_wedged(&i915->gt) ? -EIO : 0;
 | 
			
		||||
	int repeat = !!(flags & I915_WAIT_LOCKED);
 | 
			
		||||
 | 
			
		||||
	cond_resched();
 | 
			
		||||
| 
						 | 
				
			
			@ -27,7 +28,7 @@ int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
 | 
			
		|||
				  __builtin_return_address(0));
 | 
			
		||||
			GEM_TRACE_DUMP();
 | 
			
		||||
 | 
			
		||||
			i915_gem_set_wedged(i915);
 | 
			
		||||
			intel_gt_set_wedged(&i915->gt);
 | 
			
		||||
			repeat = 0;
 | 
			
		||||
			ret = -EIO;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,47 +7,45 @@
 | 
			
		|||
#include "igt_reset.h"
 | 
			
		||||
 | 
			
		||||
#include "gt/intel_engine.h"
 | 
			
		||||
#include "gt/intel_gt.h"
 | 
			
		||||
 | 
			
		||||
#include "../i915_drv.h"
 | 
			
		||||
 | 
			
		||||
void igt_global_reset_lock(struct drm_i915_private *i915)
 | 
			
		||||
void igt_global_reset_lock(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	enum intel_engine_id id;
 | 
			
		||||
 | 
			
		||||
	pr_debug("%s: current gpu_error=%08lx\n",
 | 
			
		||||
		 __func__, i915->gpu_error.flags);
 | 
			
		||||
	pr_debug("%s: current gpu_error=%08lx\n", __func__, gt->reset.flags);
 | 
			
		||||
 | 
			
		||||
	while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
 | 
			
		||||
		wait_event(i915->gpu_error.reset_queue,
 | 
			
		||||
			   !test_bit(I915_RESET_BACKOFF,
 | 
			
		||||
				     &i915->gpu_error.flags));
 | 
			
		||||
	while (test_and_set_bit(I915_RESET_BACKOFF, >->reset.flags))
 | 
			
		||||
		wait_event(gt->reset.queue,
 | 
			
		||||
			   !test_bit(I915_RESET_BACKOFF, >->reset.flags));
 | 
			
		||||
 | 
			
		||||
	for_each_engine(engine, i915, id) {
 | 
			
		||||
	for_each_engine(engine, gt->i915, id) {
 | 
			
		||||
		while (test_and_set_bit(I915_RESET_ENGINE + id,
 | 
			
		||||
					&i915->gpu_error.flags))
 | 
			
		||||
			wait_on_bit(&i915->gpu_error.flags,
 | 
			
		||||
				    I915_RESET_ENGINE + id,
 | 
			
		||||
					>->reset.flags))
 | 
			
		||||
			wait_on_bit(>->reset.flags, I915_RESET_ENGINE + id,
 | 
			
		||||
				    TASK_UNINTERRUPTIBLE);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void igt_global_reset_unlock(struct drm_i915_private *i915)
 | 
			
		||||
void igt_global_reset_unlock(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	struct intel_engine_cs *engine;
 | 
			
		||||
	enum intel_engine_id id;
 | 
			
		||||
 | 
			
		||||
	for_each_engine(engine, i915, id)
 | 
			
		||||
		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 | 
			
		||||
	for_each_engine(engine, gt->i915, id)
 | 
			
		||||
		clear_bit(I915_RESET_ENGINE + id, >->reset.flags);
 | 
			
		||||
 | 
			
		||||
	clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
 | 
			
		||||
	wake_up_all(&i915->gpu_error.reset_queue);
 | 
			
		||||
	clear_bit(I915_RESET_BACKOFF, >->reset.flags);
 | 
			
		||||
	wake_up_all(>->reset.queue);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool igt_force_reset(struct drm_i915_private *i915)
 | 
			
		||||
bool igt_force_reset(struct intel_gt *gt)
 | 
			
		||||
{
 | 
			
		||||
	i915_gem_set_wedged(i915);
 | 
			
		||||
	i915_reset(i915, 0, NULL);
 | 
			
		||||
	intel_gt_set_wedged(gt);
 | 
			
		||||
	intel_gt_reset(gt, 0, NULL);
 | 
			
		||||
 | 
			
		||||
	return !i915_reset_failed(i915);
 | 
			
		||||
	return !intel_gt_is_wedged(gt);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,10 +7,12 @@
 | 
			
		|||
#ifndef __I915_SELFTESTS_IGT_RESET_H__
 | 
			
		||||
#define __I915_SELFTESTS_IGT_RESET_H__
 | 
			
		||||
 | 
			
		||||
#include "../i915_drv.h"
 | 
			
		||||
#include <linux/types.h>
 | 
			
		||||
 | 
			
		||||
void igt_global_reset_lock(struct drm_i915_private *i915);
 | 
			
		||||
void igt_global_reset_unlock(struct drm_i915_private *i915);
 | 
			
		||||
bool igt_force_reset(struct drm_i915_private *i915);
 | 
			
		||||
struct intel_gt;
 | 
			
		||||
 | 
			
		||||
void igt_global_reset_lock(struct intel_gt *gt);
 | 
			
		||||
void igt_global_reset_unlock(struct intel_gt *gt);
 | 
			
		||||
bool igt_force_reset(struct intel_gt *gt);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,58 +0,0 @@
 | 
			
		|||
/*
 | 
			
		||||
 * SPDX-License-Identifier: MIT
 | 
			
		||||
 *
 | 
			
		||||
 * Copyright © 2018 Intel Corporation
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef IGT_WEDGE_ME_H
 | 
			
		||||
#define IGT_WEDGE_ME_H
 | 
			
		||||
 | 
			
		||||
#include <linux/workqueue.h>
 | 
			
		||||
 | 
			
		||||
#include "../i915_gem.h"
 | 
			
		||||
 | 
			
		||||
struct drm_i915_private;
 | 
			
		||||
 | 
			
		||||
struct igt_wedge_me {
 | 
			
		||||
	struct delayed_work work;
 | 
			
		||||
	struct drm_i915_private *i915;
 | 
			
		||||
	const char *name;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static void __igt_wedge_me(struct work_struct *work)
 | 
			
		||||
{
 | 
			
		||||
	struct igt_wedge_me *w = container_of(work, typeof(*w), work.work);
 | 
			
		||||
 | 
			
		||||
	pr_err("%s timed out, cancelling test.\n", w->name);
 | 
			
		||||
 | 
			
		||||
	GEM_TRACE("%s timed out.\n", w->name);
 | 
			
		||||
	GEM_TRACE_DUMP();
 | 
			
		||||
 | 
			
		||||
	i915_gem_set_wedged(w->i915);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void __igt_init_wedge(struct igt_wedge_me *w,
 | 
			
		||||
			     struct drm_i915_private *i915,
 | 
			
		||||
			     long timeout,
 | 
			
		||||
			     const char *name)
 | 
			
		||||
{
 | 
			
		||||
	w->i915 = i915;
 | 
			
		||||
	w->name = name;
 | 
			
		||||
 | 
			
		||||
	INIT_DELAYED_WORK_ONSTACK(&w->work, __igt_wedge_me);
 | 
			
		||||
	schedule_delayed_work(&w->work, timeout);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void __igt_fini_wedge(struct igt_wedge_me *w)
 | 
			
		||||
{
 | 
			
		||||
	cancel_delayed_work_sync(&w->work);
 | 
			
		||||
	destroy_delayed_work_on_stack(&w->work);
 | 
			
		||||
	w->i915 = NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define igt_wedge_on_timeout(W, DEV, TIMEOUT)				\
 | 
			
		||||
	for (__igt_init_wedge((W), (DEV), (TIMEOUT), __func__);		\
 | 
			
		||||
	     (W)->i915;							\
 | 
			
		||||
	     __igt_fini_wedge((W)))
 | 
			
		||||
 | 
			
		||||
#endif /* IGT_WEDGE_ME_H */
 | 
			
		||||
| 
						 | 
				
			
			@ -183,11 +183,6 @@ struct drm_i915_private *mock_gem_device(void)
 | 
			
		|||
	intel_gt_init_early(&i915->gt, i915);
 | 
			
		||||
	atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */
 | 
			
		||||
 | 
			
		||||
	init_waitqueue_head(&i915->gpu_error.wait_queue);
 | 
			
		||||
	init_waitqueue_head(&i915->gpu_error.reset_queue);
 | 
			
		||||
	init_srcu_struct(&i915->gpu_error.reset_backoff_srcu);
 | 
			
		||||
	mutex_init(&i915->gpu_error.wedge_mutex);
 | 
			
		||||
 | 
			
		||||
	i915->wq = alloc_ordered_workqueue("mock", 0);
 | 
			
		||||
	if (!i915->wq)
 | 
			
		||||
		goto err_drv;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue