drm/i915/guc: reset GuC and retry on firmware load failure
Due to timing issues in the HW, some of the status bits required for GuC authentication occasionally don't get set; when that happens, the GuC cannot be initialized and we will be left with a wedged GPU. The W/A suggested is to perform a soft reset of the GuC and attempt to reload the F/W again for few times before giving up. As the failure is dependent on timing, tests performed by triggering manual full gpu reset (i915_wedged) showed that we could sometimes hit this after several thousand iterations, but sometimes tests ran even longer without any issues. Reset and reload mechanism proved helpful when we indeed hit f/w load failure, so it is better to include this to improve driver stability. This change implements the following WAs, WaEnableuKernelHeaderValidFix:skl,bxt WaEnableGuCBootHashCheckNotSet:skl,bxt Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> Signed-off-by: Dave Gordon <david.s.gordon@intel.com> Reviewed-by: Alex Dai <yu.dai@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
This commit is contained in:
parent
168cf367d7
commit
6b332fa20f
|
@ -2747,6 +2747,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
|
|||
extern int intel_gpu_reset(struct drm_device *dev, u32 engine_mask);
|
||||
extern bool intel_has_gpu_reset(struct drm_device *dev);
|
||||
extern int i915_reset(struct drm_device *dev);
|
||||
extern int intel_guc_reset(struct drm_i915_private *dev_priv);
|
||||
extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
|
||||
extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
|
||||
extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
/* Definitions of GuC H/W registers, bits, etc */
|
||||
|
||||
#define GUC_STATUS _MMIO(0xc000)
|
||||
#define GS_MIA_IN_RESET (1 << 0)
|
||||
#define GS_BOOTROM_SHIFT 1
|
||||
#define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT)
|
||||
#define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT)
|
||||
|
|
|
@ -165,6 +165,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
|
|||
#define GEN6_GRDOM_MEDIA (1 << 2)
|
||||
#define GEN6_GRDOM_BLT (1 << 3)
|
||||
#define GEN6_GRDOM_VECS (1 << 4)
|
||||
#define GEN9_GRDOM_GUC (1 << 5)
|
||||
#define GEN8_GRDOM_MEDIA2 (1 << 7)
|
||||
|
||||
#define RING_PP_DIR_BASE(ring) _MMIO((ring)->mmio_base+0x228)
|
||||
|
|
|
@ -353,6 +353,24 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int i915_reset_guc(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
int ret;
|
||||
u32 guc_status;
|
||||
|
||||
ret = intel_guc_reset(dev_priv);
|
||||
if (ret) {
|
||||
DRM_ERROR("GuC reset failed, ret = %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
guc_status = I915_READ(GUC_STATUS);
|
||||
WARN(!(guc_status & GS_MIA_IN_RESET),
|
||||
"GuC status: 0x%x, MIA core expected to be in reset\n", guc_status);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_guc_ucode_load() - load GuC uCode into the device
|
||||
* @dev: drm device
|
||||
|
@ -417,9 +435,36 @@ int intel_guc_ucode_load(struct drm_device *dev)
|
|||
if (err)
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* WaEnableuKernelHeaderValidFix:skl,bxt
|
||||
* For BXT, this is only upto B0 but below WA is required for later
|
||||
* steppings also so this is extended as well.
|
||||
*/
|
||||
/* WaEnableGuCBootHashCheckNotSet:skl,bxt */
|
||||
err = guc_ucode_xfer(dev_priv);
|
||||
if (err)
|
||||
goto fail;
|
||||
if (err) {
|
||||
int retries = 3;
|
||||
|
||||
DRM_ERROR("GuC fw load failed, err=%d, attempting reset and retry\n", err);
|
||||
|
||||
while (retries--) {
|
||||
err = i915_reset_guc(dev_priv);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
err = guc_ucode_xfer(dev_priv);
|
||||
if (!err) {
|
||||
DRM_DEBUG_DRIVER("GuC fw reload succeeded after reset\n");
|
||||
break;
|
||||
}
|
||||
DRM_DEBUG_DRIVER("GuC fw reload retries left: %d\n", retries);
|
||||
}
|
||||
|
||||
if (err) {
|
||||
DRM_ERROR("GuC fw reload attempt failed, ret=%d\n", err);
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
guc_fw->guc_fw_load_status = GUC_FIRMWARE_SUCCESS;
|
||||
|
||||
|
|
|
@ -1673,6 +1673,25 @@ bool intel_has_gpu_reset(struct drm_device *dev)
|
|||
return intel_get_gpu_reset(dev) != NULL;
|
||||
}
|
||||
|
||||
int intel_guc_reset(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
int ret;
|
||||
unsigned long irqflags;
|
||||
|
||||
if (!i915.enable_guc_submission)
|
||||
return -EINVAL;
|
||||
|
||||
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
|
||||
spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
|
||||
|
||||
ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC);
|
||||
|
||||
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
|
||||
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
return check_for_unclaimed_mmio(dev_priv);
|
||||
|
|
Loading…
Reference in a new issue