From 4f1cb5875ca0e9386ff2f6545cd386d47ab8441b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Apr 2019 08:56:27 +0100 Subject: [PATCH 001/591] drm/i915: Verify workarounds immediately after application Immediately after writing the workaround, verify that it stuck in the register. References: https://bugs.freedesktop.org/show_bug.cgi?id=108954 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190417075657.19456-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_workarounds.c | 32 +++++++++++++----------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index ccaf63679435..ea9292ee755a 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -913,6 +913,20 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) return fw; } +static bool +wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) +{ + if ((cur ^ wa->val) & wa->mask) { + DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", + name, from, i915_mmio_reg_offset(wa->reg), cur, + cur & wa->mask, wa->val, wa->mask); + + return false; + } + + return true; +} + static void wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) { @@ -931,6 +945,10 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + wa_verify(wa, + intel_uncore_read_fw(uncore, wa->reg), + wal->name, "application"); } intel_uncore_forcewake_put__locked(uncore, fw); @@ -942,20 +960,6 @@ void intel_gt_apply_workarounds(struct drm_i915_private *i915) wa_list_apply(&i915->uncore, &i915->gt_wa_list); } -static bool -wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) -{ - if ((cur ^ wa->val) & wa->mask) { - DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", - name, from, i915_mmio_reg_offset(wa->reg), cur, - cur & wa->mask, wa->val, wa->mask); - - return false; - } - - return true; -} - static bool wa_list_verify(struct intel_uncore *uncore, const struct i915_wa_list *wal, const char *from) From 254e11864a36a1d3b362bf5727e89382f1540015 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Apr 2019 08:56:28 +0100 Subject: [PATCH 002/591] drm/i915: Verify the engine workarounds stick on application Read the engine workarounds back using the GPU after loading the initial context state to verify that we are setting them correctly, and bail if it fails. v2: Break out the verification into its own loop Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190417075657.19456-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 21 +++ drivers/gpu/drm/i915/intel_workarounds.c | 120 ++++++++++++++++++ drivers/gpu/drm/i915/intel_workarounds.h | 2 + .../drm/i915/selftests/intel_workarounds.c | 53 +------- 4 files changed, 149 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0a818a60ad31..a5412323fee1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4842,6 +4842,23 @@ static void i915_gem_fini_scratch(struct drm_i915_private *i915) i915_vma_unpin_and_release(&i915->gt.scratch, 0); } +static int intel_engines_verify_workarounds(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return 0; + + for_each_engine(engine, i915, id) { + if (intel_engine_verify_workarounds(engine, "load")) + err = -EIO; + } + + return err; +} + int i915_gem_init(struct drm_i915_private *dev_priv) { int ret; @@ -4927,6 +4944,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv) */ intel_init_clock_gating(dev_priv); + ret = intel_engines_verify_workarounds(dev_priv); + if (ret) + goto err_init_hw; + ret = __intel_engines_record_defaults(dev_priv); if (ret) goto err_init_hw; diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index ea9292ee755a..89e2c603e34b 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -1259,6 +1259,126 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine) wa_list_apply(engine->uncore, &engine->wa_list); } +static struct i915_vma * +create_scratch(struct i915_address_space *vm, int count) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int size; + int err; + + size = round_up(count * sizeof(u32), PAGE_SIZE); + obj = i915_gem_object_create_internal(vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, + i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); + if (err) + goto err_obj; + + return vma; + +err_obj: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int +wa_list_srm(struct i915_request *rq, + const struct i915_wa_list *wal, + struct i915_vma *vma) +{ + const struct i915_wa *wa; + unsigned int i; + u32 srm, *cs; + + srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + if (INTEL_GEN(rq->i915) >= 8) + srm++; + + cs = intel_ring_begin(rq, 4 * wal->count); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + *cs++ = srm; + *cs++ = i915_mmio_reg_offset(wa->reg); + *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; + *cs++ = 0; + } + intel_ring_advance(rq, cs); + + return 0; +} + +static int engine_wa_list_verify(struct intel_engine_cs *engine, + const struct i915_wa_list * const wal, + const char *from) +{ + const struct i915_wa *wa; + struct i915_request *rq; + struct i915_vma *vma; + unsigned int i; + u32 *results; + int err; + + if (!wal->count) + return 0; + + vma = create_scratch(&engine->i915->ggtt.vm, wal->count); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + rq = i915_request_alloc(engine, engine->kernel_context->gem_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_vma; + } + + err = wa_list_srm(rq, wal, vma); + if (err) + goto err_vma; + + i915_request_add(rq); + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + err = -ETIME; + goto err_vma; + } + + results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(results)) { + err = PTR_ERR(results); + goto err_vma; + } + + err = 0; + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + if (!wa_verify(wa, results[i], wal->name, from)) + err = -ENXIO; + + i915_gem_object_unpin_map(vma->obj); + +err_vma: + i915_vma_unpin(vma); + i915_vma_put(vma); + return err; +} + +int intel_engine_verify_workarounds(struct intel_engine_cs *engine, + const char *from) +{ + return engine_wa_list_verify(engine, &engine->wa_list, from); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/intel_workarounds.c" #endif diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h index 34eee5ec511e..fdf7ebb90f28 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.h +++ b/drivers/gpu/drm/i915/intel_workarounds.h @@ -30,5 +30,7 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine); void intel_engine_init_workarounds(struct intel_engine_cs *engine); void intel_engine_apply_workarounds(struct intel_engine_cs *engine); +int intel_engine_verify_workarounds(struct intel_engine_cs *engine, + const char *from); #endif diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index 567b6f8dae86..a363748a7a4f 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -340,49 +340,6 @@ out: return err; } -static struct i915_vma *create_scratch(struct i915_gem_context *ctx) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - void *ptr; - int err; - - obj = i915_gem_object_create_internal(ctx->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); - - ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(ptr)) { - err = PTR_ERR(ptr); - goto err_obj; - } - memset(ptr, 0xc5, PAGE_SIZE); - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err_obj; - - err = i915_gem_object_set_to_cpu_domain(obj, false); - if (err) - goto err_obj; - - return vma; - -err_obj: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - static struct i915_vma *create_batch(struct i915_gem_context *ctx) { struct drm_i915_gem_object *obj; @@ -475,7 +432,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, int err = 0, i, v; u32 *cs, *results; - scratch = create_scratch(ctx); + scratch = create_scratch(&ctx->ppgtt->vm, 2 * ARRAY_SIZE(values) + 1); if (IS_ERR(scratch)) return PTR_ERR(scratch); @@ -752,9 +709,11 @@ static bool verify_gt_engine_wa(struct drm_i915_private *i915, ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str); - for_each_engine(engine, i915, id) - ok &= wa_list_verify(engine->uncore, - &lists->engine[id].wa_list, str); + for_each_engine(engine, i915, id) { + ok &= engine_wa_list_verify(engine, + &lists->engine[id].wa_list, + str) == 0; + } return ok; } From 769f0dab622c58e3158fc55d761b62a61e7fa2e5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Apr 2019 08:56:29 +0100 Subject: [PATCH 003/591] drm/i915: Make workaround verification *optional* Sometimes the HW doesn't even play fair, and completely forgets about register writes. Skip verifying known troublemakers. References: https://bugs.freedesktop.org/show_bug.cgi?id=108954 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190417075657.19456-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_workarounds.c | 40 ++++++++++++++----- .../gpu/drm/i915/intel_workarounds_types.h | 7 ++-- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 89e2c603e34b..b3cbed1ee1c9 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -122,6 +122,7 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) wal->wa_count++; wa_->val |= wa->val; wa_->mask |= wa->mask; + wa_->read |= wa->read; return; } } @@ -146,9 +147,10 @@ wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) { struct i915_wa wa = { - .reg = reg, + .reg = reg, .mask = mask, - .val = val + .val = val, + .read = mask, }; _wa_add(wal, &wa); @@ -172,6 +174,19 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) wa_write_masked_or(wal, reg, val, val); } +static void +ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) +{ + struct i915_wa wa = { + .reg = reg, + .mask = mask, + .val = val, + /* Bonkers HW, skip verifying */ + }; + + _wa_add(wal, &wa); +} + #define WA_SET_BIT_MASKED(addr, mask) \ wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) @@ -916,10 +931,11 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) static bool wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) { - if ((cur ^ wa->val) & wa->mask) { + if ((cur ^ wa->val) & wa->read) { DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", - name, from, i915_mmio_reg_offset(wa->reg), cur, - cur & wa->mask, wa->val, wa->mask); + name, from, i915_mmio_reg_offset(wa->reg), + cur, cur & wa->read, + wa->val, wa->mask); return false; } @@ -1122,9 +1138,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); /* WaPipelineFlushCoherentLines:icl */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN8_LQSC_FLUSH_COHERENT_LINES); + ignore_wa_write_or(wal, + GEN8_L3SQCREG4, + GEN8_LQSC_FLUSH_COHERENT_LINES, + GEN8_LQSC_FLUSH_COHERENT_LINES); /* * Wa_1405543622:icl @@ -1151,9 +1168,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1405733216:icl * Formerly known as WaDisableCleanEvicts */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN11_LQSC_CLEAN_EVICT_DISABLE); + ignore_wa_write_or(wal, + GEN8_L3SQCREG4, + GEN11_LQSC_CLEAN_EVICT_DISABLE, + GEN11_LQSC_CLEAN_EVICT_DISABLE); /* WaForwardProgressSoftReset:icl */ wa_write_or(wal, diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/intel_workarounds_types.h index 30918da180ff..42ac1fb99572 100644 --- a/drivers/gpu/drm/i915/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/intel_workarounds_types.h @@ -12,9 +12,10 @@ #include "i915_reg.h" struct i915_wa { - i915_reg_t reg; - u32 mask; - u32 val; + i915_reg_t reg; + u32 mask; + u32 val; + u32 read; }; struct i915_wa_list { From 99534023490686ce4453c45e5cb813535b9bff95 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Apr 2019 14:25:07 +0100 Subject: [PATCH 004/591] drm/i915: Avoid use-after-free in reporting create.size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have to avoid chasing after a userspace race! <3>[ 473.114328] BUG: KASAN: use-after-free in i915_gem_create+0x1d2/0x1f0 [i915] <3>[ 473.114389] Read of size 8 at addr ffff88815bf1d840 by task gem_flink_race/1541 <4>[ 473.114464] CPU: 1 PID: 1541 Comm: gem_flink_race Tainted: G U 5.1.0-rc4-g7d07e025e786-kasan_88+ #1 <4>[ 473.114469] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.10 09/29/2016 <4>[ 473.114474] Call Trace: <4>[ 473.114488] dump_stack+0x7c/0xbb <4>[ 473.114612] ? i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.114621] print_address_description+0x65/0x270 <4>[ 473.114728] ? i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.114839] ? i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.114848] kasan_report+0x149/0x18d <4>[ 473.114962] ? i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.115069] i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.115176] ? i915_gem_object_create.part.28+0x4b0/0x4b0 [i915] <4>[ 473.115289] ? i915_gem_dumb_create+0x1a0/0x1a0 [i915] <4>[ 473.115297] drm_ioctl_kernel+0x192/0x260 <4>[ 473.115306] ? drm_ioctl_permit+0x280/0x280 <4>[ 473.115326] drm_ioctl+0x67c/0x960 <4>[ 473.115438] ? i915_gem_dumb_create+0x1a0/0x1a0 [i915] <4>[ 473.115448] ? drm_getstats+0x20/0x20 <4>[ 473.115459] ? __lock_acquire+0xa66/0x3fe0 <4>[ 473.115474] ? _raw_spin_unlock_irqrestore+0x39/0x60 <4>[ 473.115485] ? debug_object_active_state+0x2ea/0x4e0 <4>[ 473.115496] ? debug_show_all_locks+0x2d0/0x2d0 <4>[ 473.115513] do_vfs_ioctl+0x18d/0xfa0 <4>[ 473.115522] ? check_flags.part.27+0x440/0x440 <4>[ 473.115532] ? ioctl_preallocate+0x1a0/0x1a0 <4>[ 473.115547] ? __fget+0x2ac/0x410 <4>[ 473.115561] ? __ia32_sys_dup3+0xb0/0xb0 <4>[ 473.115569] ? rwlock_bug.part.0+0x90/0x90 <4>[ 473.115590] ksys_ioctl+0x35/0x70 <4>[ 473.115597] ? lockdep_hardirqs_off+0x1cb/0x2b0 <4>[ 473.115608] __x64_sys_ioctl+0x6a/0xb0 <4>[ 473.115614] ? lockdep_hardirqs_on+0x342/0x590 <4>[ 473.115623] do_syscall_64+0x97/0x400 <4>[ 473.115633] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 473.115641] RIP: 0033:0x7fce590d55d7 <4>[ 473.115649] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48 <4>[ 473.115655] RSP: 002b:00007fce4d525ba8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 <4>[ 473.115662] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fce590d55d7 <4>[ 473.115667] RDX: 00007fce4d525c10 RSI: 00000000c010645b RDI: 0000000000000007 <4>[ 473.115672] RBP: 00007fce4d525c10 R08: 00007fce4d526700 R09: 00007fce4d526700 <4>[ 473.115677] R10: 0000000000000054 R11: 0000000000000246 R12: 00000000c010645b <4>[ 473.115682] R13: 0000000000000007 R14: 0000000000000000 R15: 00007ffe0e4a7450 <3>[ 473.115731] Allocated by task 1541: <4>[ 473.115766] kmem_cache_alloc+0xce/0x290 <4>[ 473.115895] i915_gem_object_create.part.28+0x1c/0x4b0 [i915] <4>[ 473.116000] i915_gem_create+0xe3/0x1f0 [i915] <4>[ 473.116008] drm_ioctl_kernel+0x192/0x260 <4>[ 473.116013] drm_ioctl+0x67c/0x960 <4>[ 473.116020] do_vfs_ioctl+0x18d/0xfa0 <4>[ 473.116026] ksys_ioctl+0x35/0x70 <4>[ 473.116032] __x64_sys_ioctl+0x6a/0xb0 <4>[ 473.116038] do_syscall_64+0x97/0x400 <4>[ 473.116044] entry_SYSCALL_64_after_hwframe+0x49/0xbe <3>[ 473.116071] Freed by task 1542: <4>[ 473.116101] kmem_cache_free+0xb7/0x2f0 <4>[ 473.116205] __i915_gem_free_objects+0x7d4/0xe10 [i915] <4>[ 473.116311] i915_gem_create_ioctl+0xaa/0xd0 [i915] <4>[ 473.116318] drm_ioctl_kernel+0x192/0x260 <4>[ 473.116323] drm_ioctl+0x67c/0x960 <4>[ 473.116330] do_vfs_ioctl+0x18d/0xfa0 <4>[ 473.116335] ksys_ioctl+0x35/0x70 <4>[ 473.116341] __x64_sys_ioctl+0x6a/0xb0 <4>[ 473.116347] do_syscall_64+0x97/0x400 <4>[ 473.116354] entry_SYSCALL_64_after_hwframe+0x49/0xbe Testcase: igt/gem_flink_race/flink_close Fixes: e163484afa8d ("drm/i915: Update size upon return from GEM_CREATE") Signed-off-by: Chris Wilson Cc: Michał Winiarski Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190417132507.27133-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a5412323fee1..e5462639de0b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -647,7 +647,7 @@ i915_gem_create(struct drm_file *file, return ret; *handle_p = handle; - *size_p = obj->base.size; + *size_p = size; return 0; } From dfe2c8ed23d7524dd363e1941039da63e3982e98 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 Apr 2019 14:27:19 +0100 Subject: [PATCH 005/591] drm/i915: Stop overwriting RING_IMR in rcs resume We store the engine->imr mask and set up the RING_IMR register on restarting the engine. We do not then want to overwrite it with an incomplete mask later as we may then lose interrupts! Reported-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190418132720.3716-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 029fd8ec1857..00bd9eeb053d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -873,9 +873,6 @@ static int init_render_ring(struct intel_engine_cs *engine) if (IS_GEN_RANGE(dev_priv, 6, 7)) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - if (INTEL_GEN(dev_priv) >= 6) - ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); - return 0; } From 26ddc068de47e2a7cbbd06c915dca7a0dc22c499 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 Apr 2019 14:27:20 +0100 Subject: [PATCH 006/591] drm/i915: Setup the RCS ring prior to execution We need to set the various ring registers prior to restarting the engine, or else we may restart it after reset/resume in an ill-defined state. Reported-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190418132720.3716-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 00bd9eeb053d..3844581f622c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -831,9 +831,6 @@ static int intel_rcs_ctx_init(struct i915_request *rq) static int init_render_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - int ret = init_ring_common(engine); - if (ret) - return ret; /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ if (IS_GEN_RANGE(dev_priv, 4, 6)) @@ -873,7 +870,7 @@ static int init_render_ring(struct intel_engine_cs *engine) if (IS_GEN_RANGE(dev_priv, 6, 7)) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - return 0; + return init_ring_common(engine); } static void cancel_requests(struct intel_engine_cs *engine) From 844e33135d3a17686e167af2b6e653a4721c26e5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 Apr 2019 21:53:58 +0100 Subject: [PATCH 007/591] drm/i915: Remove unwarranted clamping for hsw/bdw We always start off at an "efficient frequency" and can let the system autotune from there, eliminating the need to clamp the available range. Signed-off-by: Chris Wilson Reviewed-by: Sagar Arun Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20190418205358.11450-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 44be676fabd6..87f6fc6d5502 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8528,18 +8528,9 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) gen6_init_rps_frequencies(dev_priv); /* Derive initial user preferences/limits from the hardware limits */ - rps->idle_freq = rps->min_freq; - rps->cur_freq = rps->idle_freq; - rps->max_freq_softlimit = rps->max_freq; rps->min_freq_softlimit = rps->min_freq; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - rps->min_freq_softlimit = - max_t(int, - rps->efficient_freq, - intel_freq_opcode(dev_priv, 450)); - /* After setting max-softlimit, find the overclock max freq */ if (IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { @@ -8556,6 +8547,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) /* Finally allow us to boost to max by default */ rps->boost_freq = rps->max_freq; + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; mutex_unlock(&dev_priv->pcu_lock); } From d69990e0c399e4f7f9b50505d3285e5de991148a Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Fri, 5 Apr 2019 15:02:34 +0200 Subject: [PATCH 008/591] drm/i915: Use drm_dev_unplug() The driver does not currently support unbinding from a device which is in use. Since open file descriptors may still be pointing into kernel memory where the device structures used to be, entirely correct kernel panics protect the driver from being unbound as we should not be unbinding it before those dangling pointers have been made safe. According to the documentation found inside drivers/gpu/drm/drm_drv.c, drm_dev_unplug() should be used instead of drm_dev_unregister() in order to make a device inaccessible to users as soon as it is unpluged. Follow that advice to make those possibly dangling pointers safe, protected by DRM layer from a user who is otherwise left pointing into possibly reused kernel memory after the driver has been unbound from the device. Once done, also cancel inflight operations immediately by calling i915_gem_set_wedged(). Signed-off-by: Janusz Krzysztofik Reviewed-by: Chris Wilson Reviewed-by: Daniel Vetter Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190405130235.7707-2-janusz.krzysztofik@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1ad88e6d7c04..5e2ae2300454 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1760,7 +1760,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) i915_pmu_unregister(dev_priv); i915_teardown_sysfs(dev_priv); - drm_dev_unregister(&dev_priv->drm); + drm_dev_unplug(&dev_priv->drm); i915_gem_shrinker_unregister(dev_priv); } From 91cbdb83d3aee84b6aaaa3fc3b4a6084a35e19c1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Apr 2019 14:48:36 +0100 Subject: [PATCH 009/591] drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info For consistency (and elegance!), add intel_device_info.has_rps. The immediate boon is that RPS support is now emitted along the other capabilities in the debug log and after errors. Signed-off-by: Chris Wilson Reviewed-by: Sagar Arun Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20190419134836.5626-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 5 +++++ drivers/gpu/drm/i915/intel_device_info.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 7 +++++-- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 066fd2a12851..71612e7fc8bc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2585,6 +2585,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_RC6p(dev_priv) (INTEL_INFO(dev_priv)->has_rc6p) #define HAS_RC6pp(dev_priv) (false) /* HW was never validated */ +#define HAS_RPS(dev_priv) (INTEL_INFO(dev_priv)->has_rps) + #define HAS_CSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_csr) #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index f893c2cbce15..ffa2ee70a03d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -370,6 +370,7 @@ static const struct intel_device_info intel_ironlake_m_info = { .has_llc = 1, \ .has_rc6 = 1, \ .has_rc6p = 1, \ + .has_rps = true, \ .ppgtt_type = INTEL_PPGTT_ALIASING, \ .ppgtt_size = 31, \ I9XX_PIPE_OFFSETS, \ @@ -417,6 +418,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { .has_llc = 1, \ .has_rc6 = 1, \ .has_rc6p = 1, \ + .has_rps = true, \ .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 31, \ IVB_PIPE_OFFSETS, \ @@ -470,6 +472,7 @@ static const struct intel_device_info intel_valleyview_info = { .num_pipes = 2, .has_runtime_pm = 1, .has_rc6 = 1, + .has_rps = true, .display.has_gmch = 1, .display.has_hotplug = 1, .ppgtt_type = INTEL_PPGTT_FULL, @@ -565,6 +568,7 @@ static const struct intel_device_info intel_cherryview_info = { .has_64bit_reloc = 1, .has_runtime_pm = 1, .has_rc6 = 1, + .has_rps = true, .has_logical_ring_contexts = 1, .display.has_gmch = 1, .ppgtt_type = INTEL_PPGTT_FULL, @@ -640,6 +644,7 @@ static const struct intel_device_info intel_skylake_gt4_info = { .has_runtime_pm = 1, \ .display.has_csr = 1, \ .has_rc6 = 1, \ + .has_rps = true, \ .display.has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ .has_logical_ring_preemption = 1, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 0e579f158016..7a2f14eff699 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -118,6 +118,7 @@ enum intel_ppgtt_type { func(has_pooled_eu); \ func(has_rc6); \ func(has_rc6p); \ + func(has_rps); \ func(has_runtime_pm); \ func(has_snoop); \ func(has_coherent_ggtt); \ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 87f6fc6d5502..7aa9a8c12b54 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7013,8 +7013,10 @@ static bool sanitize_rc6(struct drm_i915_private *i915) struct intel_device_info *info = mkwrite_device_info(i915); /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(i915)) + if (intel_vgpu_active(i915)) { info->has_rc6 = 0; + info->has_rps = false; + } if (info->has_rc6 && IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) { @@ -8716,7 +8718,8 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) if (HAS_RC6(dev_priv)) intel_enable_rc6(dev_priv); - intel_enable_rps(dev_priv); + if (HAS_RPS(dev_priv)) + intel_enable_rps(dev_priv); if (HAS_LLC(dev_priv)) intel_enable_llc_pstate(dev_priv); From 7ce99d24ed7265b8f83e0213252aa4f65755f872 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Apr 2019 19:26:25 +0100 Subject: [PATCH 010/591] drm/i915: Expose the busyspin durations for i915_wait_request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An interesting discussion regarding "hybrid interrupt polling" for NVMe came to the conclusion that the ideal busyspin before sleeping was half of the expected request latency (and better if it was already halfway through that request). This suggested that we too should look again at our tradeoff between spinning and waiting. Currently, our spin simply tries to hide the cost of enabling the interrupt, which is good to avoid penalising nop requests (i.e. test throughput) and not much else. Studying real world workloads suggests that a spin of upto 500us can dramatically boost performance, but the suggestion is that this is not from avoiding interrupt latency per-se, but from secondary effects of sleeping such as allowing the CPU reduce cstate and context switch away. In a truly hybrid interrupt polling scheme, we would aim to sleep until just before the request completed and then wake up in advance of the interrupt and do a quick poll to handle completion. This is tricky for ourselves at the moment as we are not recording request times, and since we allow preemption, our requests are not on as a nicely ordered timeline as IO. However, the idea is interesting, for it will certainly help us decide when busyspinning is worthwhile. v2: Expose the spin setting via Kconfig options for easier adjustment and testing. v3: Don't get caught sneaking in a change to the busyspin parameters. v4: Explain more about the "hybrid interrupt polling" scheme that we want to migrate towards. Suggested-by: Sagar Kamble References: http://events.linuxfoundation.org/sites/events/files/slides/lemoal-nvme-polling-vault-2017-final_0.pdf Signed-off-by: Chris Wilson Cc: Sagar Kamble Cc: Eero Tamminen Cc: Tvrtko Ursulin Cc: Ben Widawsky Cc: Joonas Lahtinen Cc: Michał Winiarski Reviewed-by: Sagar Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20190419182625.11186-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig | 6 ++++++ drivers/gpu/drm/i915/Kconfig.profile | 13 +++++++++++++ drivers/gpu/drm/i915/i915_request.c | 27 +++++++++++++++++++++++++-- 3 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/i915/Kconfig.profile diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 148be8e1a090..f0556310b851 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -133,3 +133,9 @@ depends on DRM_I915 depends on EXPERT source "drivers/gpu/drm/i915/Kconfig.debug" endmenu + +menu "drm/i915 Profile Guided Optimisation" + visible if EXPERT + depends on DRM_I915 + source "drivers/gpu/drm/i915/Kconfig.profile" +endmenu diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile new file mode 100644 index 000000000000..0e5db98da8f3 --- /dev/null +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -0,0 +1,13 @@ +config DRM_I915_SPIN_REQUEST + int + default 5 # microseconds + help + Before sleeping waiting for a request (GPU operation) to complete, + we may spend some time polling for its completion. As the IRQ may + take a non-negligible time to setup, we do a short spin first to + check if the request will complete in the time it would have taken + us to enable the interrupt. + + May be 0 to disable the initial spin. In practice, we estimate + the cost of enabling the interrupt (if currently disabled) to be + a few microseconds. diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b836721d3b13..b1f00b59bb95 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1340,8 +1340,31 @@ long i915_request_wait(struct i915_request *rq, trace_i915_request_wait_begin(rq, flags); - /* Optimistic short spin before touching IRQs */ - if (__i915_spin_request(rq, state, 5)) + /* + * Optimistic spin before touching IRQs. + * + * We may use a rather large value here to offset the penalty of + * switching away from the active task. Frequently, the client will + * wait upon an old swapbuffer to throttle itself to remain within a + * frame of the gpu. If the client is running in lockstep with the gpu, + * then it should not be waiting long at all, and a sleep now will incur + * extra scheduler latency in producing the next frame. To try to + * avoid adding the cost of enabling/disabling the interrupt to the + * short wait, we first spin to see if the request would have completed + * in the time taken to setup the interrupt. + * + * We need upto 5us to enable the irq, and upto 20us to hide the + * scheduler latency of a context switch, ignoring the secondary + * impacts from a context switch such as cache eviction. + * + * The scheme used for low-latency IO is called "hybrid interrupt + * polling". The suggestion there is to sleep until just before you + * expect to be woken by the device interrupt and then poll for its + * completion. That requires having a good predictor for the request + * duration, which we currently lack. + */ + if (CONFIG_DRM_I915_SPIN_REQUEST && + __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) goto out; /* From b972fffa114b18a120a7bbde105d69a080d24970 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 17 Apr 2019 13:25:24 +0200 Subject: [PATCH 011/591] drm/i915: remove DRM_AUTH from IOCTLs which also have DRM_RENDER_ALLOW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is to work around problems with libva and vainfo. Signed-off-by: Christian König Reviewed-by: Chris Wilson Reviewed-by: Daniel Vetter Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190417112525.16848-1-christian.koenig@amd.com --- drivers/gpu/drm/i915/i915_drv.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5e2ae2300454..6354c68c94b3 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -3098,7 +3098,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_BATCHBUFFER, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_IRQ_EMIT, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_IRQ_WAIT, drm_noop, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_SETPARAM, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_ALLOC, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_FREE, drm_noop, DRM_AUTH), @@ -3111,13 +3111,13 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer_ioctl, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), - DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_SET_CACHING, i915_gem_set_caching_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_GET_CACHING, i915_gem_get_caching_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_CREATE, i915_gem_create_ioctl, DRM_RENDER_ALLOW), @@ -3136,7 +3136,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER), - DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE_EXT, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_RENDER_ALLOW), From 267e80ee6a341bc694406ef7c4f30fa2721610b7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Apr 2019 21:12:07 +0100 Subject: [PATCH 012/591] drm/i915/gtt: Skip clearing the GGTT under gen6+ full-ppgtt If we know that the user cannot access the GGTT, by virtue of having a segregated memory area, we can skip clearing the unused entries as they cannot be accessed. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190419201207.5477-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8f460cc4cc1f..10558bc8bf90 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3280,7 +3280,9 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) size = gen6_get_total_gtt_size(snb_gmch_ctl); ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; - ggtt->vm.clear_range = gen6_ggtt_clear_range; + ggtt->vm.clear_range = nop_clear_range; + if (!HAS_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) + ggtt->vm.clear_range = gen6_ggtt_clear_range; ggtt->vm.insert_page = gen6_ggtt_insert_page; ggtt->vm.insert_entries = gen6_ggtt_insert_entries; ggtt->vm.cleanup = gen6_gmch_remove; From 95ebcda3ef4fa2c928e2e0dbe0f707ca90852110 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:11 -0700 Subject: [PATCH 013/591] drm/i915/uc: Rename uC firmware init/fini functions he uC firmware init function is called during GuC/HuC init early phases. Rename to include "_early" and properly reflect which phase we are at. The uC firmware fini function is cleaning up the state set/created on firmware fetch. Replace "_fini" with "_cleanup_fetch". v2: also rename uC fw fini function Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-2-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/intel_guc.c | 6 +++--- drivers/gpu/drm/i915/intel_guc_fw.c | 2 +- drivers/gpu/drm/i915/intel_huc.h | 2 +- drivers/gpu/drm/i915/intel_huc_fw.c | 2 +- drivers/gpu/drm/i915/intel_uc_fw.c | 4 ++-- drivers/gpu/drm/i915/intel_uc_fw.h | 5 +++-- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 3aabfa2d9198..d81a02b0f525 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -154,7 +154,7 @@ int intel_guc_init_misc(struct intel_guc *guc) void intel_guc_fini_misc(struct intel_guc *guc) { - intel_uc_fw_fini(&guc->fw); + intel_uc_fw_cleanup_fetch(&guc->fw); guc_fini_wq(guc); } @@ -221,7 +221,7 @@ err_log: err_shared: guc_shared_data_destroy(guc); err_fetch: - intel_uc_fw_fini(&guc->fw); + intel_uc_fw_cleanup_fetch(&guc->fw); return ret; } @@ -237,7 +237,7 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ads_destroy(guc); intel_guc_log_destroy(&guc->log); guc_shared_data_destroy(guc); - intel_uc_fw_fini(&guc->fw); + intel_uc_fw_cleanup_fetch(&guc->fw); } static u32 guc_ctl_debug_flags(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index 792a551450c7..4385d9ef02bb 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -90,7 +90,7 @@ void intel_guc_fw_init_early(struct intel_guc *guc) { struct intel_uc_fw *guc_fw = &guc->fw; - intel_uc_fw_init(guc_fw, INTEL_UC_FW_TYPE_GUC); + intel_uc_fw_init_early(guc_fw, INTEL_UC_FW_TYPE_GUC); guc_fw_select(guc_fw); } diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h index 7e41d870b509..ce129e301961 100644 --- a/drivers/gpu/drm/i915/intel_huc.h +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -42,7 +42,7 @@ int intel_huc_check_status(struct intel_huc *huc); static inline void intel_huc_fini_misc(struct intel_huc *huc) { - intel_uc_fw_fini(&huc->fw); + intel_uc_fw_cleanup_fetch(&huc->fw); } static inline int intel_huc_sanitize(struct intel_huc *huc) diff --git a/drivers/gpu/drm/i915/intel_huc_fw.c b/drivers/gpu/drm/i915/intel_huc_fw.c index 68d47c105939..80a176d91edc 100644 --- a/drivers/gpu/drm/i915/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/intel_huc_fw.c @@ -89,7 +89,7 @@ void intel_huc_fw_init_early(struct intel_huc *huc) { struct intel_uc_fw *huc_fw = &huc->fw; - intel_uc_fw_init(huc_fw, INTEL_UC_FW_TYPE_HUC); + intel_uc_fw_init_early(huc_fw, INTEL_UC_FW_TYPE_HUC); huc_fw_select(huc_fw); } diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c index becf05ebae4d..e3e74207a102 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -274,13 +274,13 @@ fail: } /** - * intel_uc_fw_fini - cleanup uC firmware + * intel_uc_fw_cleanup_fetch - cleanup uC firmware * * @uc_fw: uC firmware * * Cleans up uC firmware by releasing the firmware GEM obj. */ -void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) +void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index 0e3bd580e267..e6fa8599757c 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -102,7 +102,8 @@ static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) } static inline -void intel_uc_fw_init(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type) +void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, + enum intel_uc_fw_type type) { uc_fw->path = NULL; uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; @@ -144,10 +145,10 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, struct intel_uc_fw *uc_fw); +void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw); int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, int (*xfer)(struct intel_uc_fw *uc_fw, struct i915_vma *vma)); -void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p); #endif From 911800765ef6cdcb9103da7557aa5dd9ebb4cda0 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:12 -0700 Subject: [PATCH 014/591] drm/i915/uc: Reserve upper range of GGTT GuC and HuC depend on struct_mutex for device reinitialization. Moving away from this dependency requires perma-pinning the firmware images in GGTT. The upper portion of the GuC address space has a sizeable hole (several MB) that is inaccessible by GuC. Reserve this range within GGTT as it can comfortably hold GuC/HuC firmware images. v2: Reserve node rather than insert (Chris) Simpler determination of node start/size (Daniele) Move reserve/release out to intel_guc.* files v3: Reserve starting at GUC_GGTT_TOP only and bail if this fails (Chris) Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-3-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 25 ++++++++++++------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/intel_guc.c | 27 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_guc.h | 2 ++ 4 files changed, 42 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 10558bc8bf90..3557233de0f5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2752,6 +2752,12 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) if (ret) return ret; + if (USES_GUC(dev_priv)) { + ret = intel_guc_reserve_ggtt_top(&dev_priv->guc); + if (ret) + goto err_reserve; + } + /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", @@ -2766,12 +2772,14 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) if (INTEL_PPGTT(dev_priv) == INTEL_PPGTT_ALIASING) { ret = i915_gem_init_aliasing_ppgtt(dev_priv); if (ret) - goto err; + goto err_appgtt; } return 0; -err: +err_appgtt: + intel_guc_release_ggtt_top(&dev_priv->guc); +err_reserve: drm_mm_remove_node(&ggtt->error_capture); return ret; } @@ -2797,6 +2805,8 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); + intel_guc_release_ggtt_top(&dev_priv->guc); + if (drm_mm_initialized(&ggtt->vm.mm)) { intel_vgt_deballoon(dev_priv); i915_address_space_fini(&ggtt->vm); @@ -3371,17 +3381,6 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) if (ret) return ret; - /* Trim the GGTT to fit the GuC mappable upper range (when enabled). - * This is easier than doing range restriction on the fly, as we - * currently don't have any bits spare to pass in this upper - * restriction! - */ - if (USES_GUC(dev_priv)) { - ggtt->vm.total = min_t(u64, ggtt->vm.total, GUC_GGTT_TOP); - ggtt->mappable_end = - min_t(u64, ggtt->mappable_end, ggtt->vm.total); - } - if ((ggtt->vm.total - 1) >> 32) { DRM_ERROR("We never expected a Global GTT with more than 32bits" " of address space! Found %lldM!\n", diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f597f35b109b..b51e779732c3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -384,6 +384,7 @@ struct i915_ggtt { u32 pin_bias; struct drm_mm_node error_capture; + struct drm_mm_node uc_fw; }; struct i915_hw_ppgtt { diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index d81a02b0f525..a10a68e0ffce 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -721,3 +721,30 @@ u32 intel_guc_reserved_gtt_size(struct intel_guc *guc) { return guc_to_i915(guc)->wopcm.guc.size; } + +int intel_guc_reserve_ggtt_top(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_i915(guc); + struct i915_ggtt *ggtt = &i915->ggtt; + u64 size; + int ret; + + size = ggtt->vm.total - GUC_GGTT_TOP; + + ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, + GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, + PIN_NOEVICT); + if (ret) + DRM_DEBUG_DRIVER("GuC: failed to reserve top of ggtt\n"); + + return ret; +} + +void intel_guc_release_ggtt_top(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_i915(guc); + struct i915_ggtt *ggtt = &i915->ggtt; + + if (drm_mm_node_allocated(&ggtt->uc_fw)) + drm_mm_remove_node(&ggtt->uc_fw); +} diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 2c59ff8d9f39..2494e84831a2 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -173,6 +173,8 @@ int intel_guc_suspend(struct intel_guc *guc); int intel_guc_resume(struct intel_guc *guc); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); u32 intel_guc_reserved_gtt_size(struct intel_guc *guc); +int intel_guc_reserve_ggtt_top(struct intel_guc *guc); +void intel_guc_release_ggtt_top(struct intel_guc *guc); static inline int intel_guc_sanitize(struct intel_guc *guc) { From fc488b59034aa4519f4971f4b2b842718e56af79 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:13 -0700 Subject: [PATCH 015/591] drm/i915/uc: Place uC firmware in upper range of GGTT Currently we pin the GuC or HuC firmware image just before uploading. Perma-pin during uC initialization instead and use the range reserved at the top of the address space. Moving the firmware resulted in needing to: - use an additional pinning for the rsa signature which will be used during HuC auth as addresses above GUC_GGTT_TOP do not map through GTT. v2: Remove call to set to gtt domain Do not restore fw gtt mapping unconditionally Separate out pin/unpin functions and drop usage of pin/unpin Use uc_fw init/fini functions to bind/unbind fw object v3: Bind is only needed during xfer (Chris) Remove attempts to bind outside of xfer (Chris) Mark fw bind/unbind static Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-4-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/intel_guc.c | 9 ++- drivers/gpu/drm/i915/intel_guc_fw.c | 18 +++--- drivers/gpu/drm/i915/intel_huc.c | 74 ++++++++++++++++----- drivers/gpu/drm/i915/intel_huc.h | 4 ++ drivers/gpu/drm/i915/intel_huc_fw.c | 47 ++++++++++---- drivers/gpu/drm/i915/intel_uc.c | 23 +++++-- drivers/gpu/drm/i915/intel_uc_fw.c | 99 ++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_uc_fw.h | 7 +- 8 files changed, 208 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index a10a68e0ffce..c4ac29309fcc 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -189,9 +189,13 @@ int intel_guc_init(struct intel_guc *guc) struct drm_i915_private *dev_priv = guc_to_i915(guc); int ret; - ret = guc_shared_data_create(guc); + ret = intel_uc_fw_init(&guc->fw); if (ret) goto err_fetch; + + ret = guc_shared_data_create(guc); + if (ret) + goto err_fw; GEM_BUG_ON(!guc->shared_data); ret = intel_guc_log_create(&guc->log); @@ -220,6 +224,8 @@ err_log: intel_guc_log_destroy(&guc->log); err_shared: guc_shared_data_destroy(guc); +err_fw: + intel_uc_fw_fini(&guc->fw); err_fetch: intel_uc_fw_cleanup_fetch(&guc->fw); return ret; @@ -237,6 +243,7 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ads_destroy(guc); intel_guc_log_destroy(&guc->log); guc_shared_data_destroy(guc); + intel_uc_fw_fini(&guc->fw); intel_uc_fw_cleanup_fetch(&guc->fw); } diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index 4385d9ef02bb..8b2dcc70b956 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -122,14 +122,16 @@ static void guc_prepare_xfer(struct intel_guc *guc) } /* Copy RSA signature from the fw image to HW for verification */ -static void guc_xfer_rsa(struct intel_guc *guc, struct i915_vma *vma) +static void guc_xfer_rsa(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_uc_fw *fw = &guc->fw; + struct sg_table *pages = fw->obj->mm.pages; u32 rsa[UOS_RSA_SCRATCH_COUNT]; int i; - sg_pcopy_to_buffer(vma->pages->sgl, vma->pages->nents, - rsa, sizeof(rsa), guc->fw.rsa_offset); + sg_pcopy_to_buffer(pages->sgl, pages->nents, + rsa, sizeof(rsa), fw->rsa_offset); for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]); @@ -201,7 +203,7 @@ static int guc_wait_ucode(struct intel_guc *guc) * transfer between GTT locations. This functionality is left out of the API * for now as there is no need for it. */ -static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) +static int guc_xfer_ucode(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_uc_fw *guc_fw = &guc->fw; @@ -214,7 +216,7 @@ static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); /* Set the source address for the new blob */ - offset = intel_guc_ggtt_offset(guc, vma) + guc_fw->header_offset; + offset = intel_uc_fw_ggtt_offset(guc_fw) + guc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); @@ -233,7 +235,7 @@ static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) /* * Load the GuC firmware blob into the MinuteIA. */ -static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) +static int guc_fw_xfer(struct intel_uc_fw *guc_fw) { struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -250,9 +252,9 @@ static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) * by the DMA engine in one operation, whereas the RSA signature is * loaded via MMIO. */ - guc_xfer_rsa(guc, vma); + guc_xfer_rsa(guc); - ret = guc_xfer_ucode(guc, vma); + ret = guc_xfer_ucode(guc); intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 94c04f16a2ad..1ff1fb015e58 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -40,6 +40,61 @@ int intel_huc_init_misc(struct intel_huc *huc) return 0; } +static int intel_huc_rsa_data_create(struct intel_huc *huc) +{ + struct drm_i915_private *i915 = huc_to_i915(huc); + struct intel_guc *guc = &i915->guc; + struct i915_vma *vma; + void *vaddr; + + /* + * HuC firmware will sit above GUC_GGTT_TOP and will not map + * through GTT. Unfortunately, this means GuC cannot perform + * the HuC auth. as the rsa offset now falls within the GuC + * inaccessible range. We resort to perma-pinning an additional + * vma within the accessible range that only contains the rsa + * signature. The GuC can use this extra pinning to perform + * the authentication since its GGTT offset will be GuC + * accessible. + */ + vma = intel_guc_allocate_vma(guc, PAGE_SIZE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma, 0); + return PTR_ERR(vaddr); + } + + huc->rsa_data = vma; + huc->rsa_data_vaddr = vaddr; + + return 0; +} + +static void intel_huc_rsa_data_destroy(struct intel_huc *huc) +{ + i915_vma_unpin_and_release(&huc->rsa_data, I915_VMA_RELEASE_MAP); +} + +int intel_huc_init(struct intel_huc *huc) +{ + int err; + + err = intel_huc_rsa_data_create(huc); + if (err) + return err; + + return intel_uc_fw_init(&huc->fw); +} + +void intel_huc_fini(struct intel_huc *huc) +{ + intel_uc_fw_fini(&huc->fw); + intel_huc_rsa_data_destroy(huc); +} + /** * intel_huc_auth() - Authenticate HuC uCode * @huc: intel_huc structure @@ -55,27 +110,17 @@ int intel_huc_auth(struct intel_huc *huc) { struct drm_i915_private *i915 = huc_to_i915(huc); struct intel_guc *guc = &i915->guc; - struct i915_vma *vma; u32 status; int ret; if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) return -ENOEXEC; - vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0, - PIN_OFFSET_BIAS | i915->ggtt.pin_bias); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - DRM_ERROR("HuC: Failed to pin huc fw object %d\n", ret); - goto fail; - } - ret = intel_guc_auth_huc(guc, - intel_guc_ggtt_offset(guc, vma) + - huc->fw.rsa_offset); + intel_guc_ggtt_offset(guc, huc->rsa_data)); if (ret) { DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); - goto fail_unpin; + goto fail; } /* Check authentication status, it should be done by now */ @@ -86,14 +131,11 @@ int intel_huc_auth(struct intel_huc *huc) 2, 50, &status); if (ret) { DRM_ERROR("HuC: Firmware not verified %#x\n", status); - goto fail_unpin; + goto fail; } - i915_vma_unpin(vma); return 0; -fail_unpin: - i915_vma_unpin(vma); fail: huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL; diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h index ce129e301961..a0c21ae02a99 100644 --- a/drivers/gpu/drm/i915/intel_huc.h +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -33,10 +33,14 @@ struct intel_huc { struct intel_uc_fw fw; /* HuC-specific additions */ + struct i915_vma *rsa_data; + void *rsa_data_vaddr; }; void intel_huc_init_early(struct intel_huc *huc); int intel_huc_init_misc(struct intel_huc *huc); +int intel_huc_init(struct intel_huc *huc); +void intel_huc_fini(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc); int intel_huc_check_status(struct intel_huc *huc); diff --git a/drivers/gpu/drm/i915/intel_huc_fw.c b/drivers/gpu/drm/i915/intel_huc_fw.c index 80a176d91edc..44c559526072 100644 --- a/drivers/gpu/drm/i915/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/intel_huc_fw.c @@ -93,18 +93,24 @@ void intel_huc_fw_init_early(struct intel_huc *huc) huc_fw_select(huc_fw); } -/** - * huc_fw_xfer() - DMA's the firmware - * @huc_fw: the firmware descriptor - * @vma: the firmware image (bound into the GGTT) - * - * Transfer the firmware image to RAM for execution by the microcontroller. - * - * Return: 0 on success, non-zero on failure - */ -static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) +static void huc_xfer_rsa(struct intel_huc *huc) { - struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); + struct intel_uc_fw *fw = &huc->fw; + struct sg_table *pages = fw->obj->mm.pages; + + /* + * HuC firmware image is outside GuC accessible range. + * Copy the RSA signature out of the image into + * the perma-pinned region set aside for it + */ + sg_pcopy_to_buffer(pages->sgl, pages->nents, + huc->rsa_data_vaddr, fw->rsa_size, + fw->rsa_offset); +} + +static int huc_xfer_ucode(struct intel_huc *huc) +{ + struct intel_uc_fw *huc_fw = &huc->fw; struct drm_i915_private *dev_priv = huc_to_i915(huc); struct intel_uncore *uncore = &dev_priv->uncore; unsigned long offset = 0; @@ -116,7 +122,7 @@ static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); /* Set the source address for the uCode */ - offset = intel_guc_ggtt_offset(&dev_priv->guc, vma) + + offset = intel_uc_fw_ggtt_offset(huc_fw) + huc_fw->header_offset; intel_uncore_write(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset)); @@ -150,6 +156,23 @@ static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) return ret; } +/** + * huc_fw_xfer() - DMA's the firmware + * @huc_fw: the firmware descriptor + * + * Transfer the firmware image to RAM for execution by the microcontroller. + * + * Return: 0 on success, non-zero on failure + */ +static int huc_fw_xfer(struct intel_uc_fw *huc_fw) +{ + struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); + + huc_xfer_rsa(huc); + + return huc_xfer_ucode(huc); +} + /** * intel_huc_fw_upload() - load HuC uCode to device * @huc: intel_huc structure diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 25b80ffe71ad..488dffba04d2 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -280,6 +280,7 @@ void intel_uc_fini_misc(struct drm_i915_private *i915) int intel_uc_init(struct drm_i915_private *i915) { struct intel_guc *guc = &i915->guc; + struct intel_huc *huc = &i915->huc; int ret; if (!USES_GUC(i915)) @@ -292,19 +293,30 @@ int intel_uc_init(struct drm_i915_private *i915) if (ret) return ret; + if (USES_HUC(i915)) { + ret = intel_huc_init(huc); + if (ret) + goto err_guc; + } + if (USES_GUC_SUBMISSION(i915)) { /* * This is stuff we need to have available at fw load time * if we are planning to enable submission later */ ret = intel_guc_submission_init(guc); - if (ret) { - intel_guc_fini(guc); - return ret; - } + if (ret) + goto err_huc; } return 0; + +err_huc: + if (USES_HUC(i915)) + intel_huc_fini(huc); +err_guc: + intel_guc_fini(guc); + return ret; } void intel_uc_fini(struct drm_i915_private *i915) @@ -319,6 +331,9 @@ void intel_uc_fini(struct drm_i915_private *i915) if (USES_GUC_SUBMISSION(i915)) intel_guc_submission_fini(guc); + if (USES_HUC(i915)) + intel_huc_fini(&i915->huc); + intel_guc_fini(guc); } diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c index e3e74207a102..b9cb6fea9332 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -191,6 +191,35 @@ fail: release_firmware(fw); /* OK even if fw is NULL */ } +static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_gem_object *obj = uc_fw->obj; + struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; + struct i915_vma dummy = { + .node.start = intel_uc_fw_ggtt_offset(uc_fw), + .node.size = obj->base.size, + .pages = obj->mm.pages, + .vm = &ggtt->vm, + }; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size); + + /* uc_fw->obj cache domains were not controlled across suspend */ + drm_clflush_sg(dummy.pages); + + ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0); +} + +static void intel_uc_fw_ggtt_unbind(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_gem_object *obj = uc_fw->obj; + struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; + u64 start = intel_uc_fw_ggtt_offset(uc_fw); + + ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size); +} + /** * intel_uc_fw_upload - load uC firmware using custom loader * @uc_fw: uC firmware @@ -201,11 +230,8 @@ fail: * Return: 0 on success, non-zero on failure. */ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, - int (*xfer)(struct intel_uc_fw *uc_fw, - struct i915_vma *vma)) + int (*xfer)(struct intel_uc_fw *uc_fw)) { - struct i915_vma *vma; - u32 ggtt_pin_bias; int err; DRM_DEBUG_DRIVER("%s fw load %s\n", @@ -219,36 +245,15 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, intel_uc_fw_type_repr(uc_fw->type), intel_uc_fw_status_repr(uc_fw->load_status)); - /* Pin object with firmware */ - err = i915_gem_object_set_to_gtt_domain(uc_fw->obj, false); - if (err) { - DRM_DEBUG_DRIVER("%s fw set-domain err=%d\n", - intel_uc_fw_type_repr(uc_fw->type), err); - goto fail; - } - - ggtt_pin_bias = to_i915(uc_fw->obj->base.dev)->ggtt.pin_bias; - vma = i915_gem_object_ggtt_pin(uc_fw->obj, NULL, 0, 0, - PIN_OFFSET_BIAS | ggtt_pin_bias); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - DRM_DEBUG_DRIVER("%s fw ggtt-pin err=%d\n", - intel_uc_fw_type_repr(uc_fw->type), err); - goto fail; - } + intel_uc_fw_ggtt_bind(uc_fw); /* Call custom loader */ - err = xfer(uc_fw, vma); - - /* - * We keep the object pages for reuse during resume. But we can unpin it - * now that DMA has completed, so it doesn't continue to take up space. - */ - i915_vma_unpin(vma); - + err = xfer(uc_fw); if (err) goto fail; + intel_uc_fw_ggtt_unbind(uc_fw); + uc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; DRM_DEBUG_DRIVER("%s fw load %s\n", intel_uc_fw_type_repr(uc_fw->type), @@ -273,6 +278,42 @@ fail: return err; } +int intel_uc_fw_init(struct intel_uc_fw *uc_fw) +{ + int err; + + if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) + return -ENOEXEC; + + err = i915_gem_object_pin_pages(uc_fw->obj); + if (err) + DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + + return err; +} + +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) +{ + if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) + return; + + i915_gem_object_unpin_pages(uc_fw->obj); +} + +u32 intel_uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_private *i915 = to_i915(uc_fw->obj->base.dev); + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node *node = &ggtt->uc_fw; + + GEM_BUG_ON(!node->allocated); + GEM_BUG_ON(upper_32_bits(node->start)); + GEM_BUG_ON(upper_32_bits(node->start + node->size - 1)); + + return lower_32_bits(node->start); +} + /** * intel_uc_fw_cleanup_fetch - cleanup uC firmware * diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index e6fa8599757c..ff98f8661d72 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -27,7 +27,6 @@ struct drm_printer; struct drm_i915_private; -struct i915_vma; /* Home of GuC, HuC and DMC firmwares */ #define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915" @@ -147,8 +146,10 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, struct intel_uc_fw *uc_fw); void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw); int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, - int (*xfer)(struct intel_uc_fw *uc_fw, - struct i915_vma *vma)); + int (*xfer)(struct intel_uc_fw *uc_fw)); +int intel_uc_fw_init(struct intel_uc_fw *uc_fw); +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); +u32 intel_uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw); void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p); #endif From 40d211ef62de3efdfb0a78894fc0bc3b24061b40 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:14 -0700 Subject: [PATCH 016/591] Revert "drm/i915/guc: Disable global reset" We have now prepared the guc reset paths to avoid taking struct_mutex, or any other lock, and so it is now safe to re-enable. References: fe62365f9f80 ("drm/i915/guc: Disable global reset") Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-5-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/i915_reset.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 677d59304e78..1092d16c289c 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -641,9 +641,6 @@ int intel_gpu_reset(struct drm_i915_private *i915, bool intel_has_gpu_reset(struct drm_i915_private *i915) { - if (USES_GUC(i915)) - return false; - if (!i915_modparams.reset) return NULL; From f3c2b76ef25e73e2065614108fe33bf2d790cac3 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:15 -0700 Subject: [PATCH 017/591] drm/i915/selftests: Check that gpu reset is usable from atomic context GPU reset is now available with GuC enabled, so re-enable our check that this reset is usable from atomic context. Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-6-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 050bd1e19e02..2fd33aad8683 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -1814,9 +1814,6 @@ static int igt_atomic_reset(void *arg) /* Check that the resets are usable from atomic context */ - if (USES_GUC_SUBMISSION(i915)) - return 0; /* guc is dead; long live the guc */ - igt_global_reset_lock(i915); mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(i915); @@ -1846,6 +1843,9 @@ static int igt_atomic_reset(void *arg) force_reset(i915); } + if (USES_GUC_SUBMISSION(i915)) + goto unlock; + if (intel_has_reset_engine(i915)) { struct intel_engine_cs *engine; enum intel_engine_id id; From 2d6692e642e7ca02883524350038e2a431ef44e8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 20 Apr 2019 12:55:39 +0100 Subject: [PATCH 018/591] drm/i915: Start writeback from the shrinker When we are called to relieve mempressue via the shrinker, the only way we can make progress is either by discarding unwanted pages (those objects that userspace has marked MADV_DONTNEED) or by reclaiming the dirty objects via swap. As we know that is the only way to make further progress, we can initiate the writeback as we invalidate the objects. This means the objects we put onto the inactive anon lru list are already marked for reclaim+writeback and so will trigger a wait upon the writeback inside direct reclaim, greatly improving the success rate of direct reclaim on i915 objects. The corollary is that we may start a slow swap on opportunistic mempressure from the likes of the compaction + migration kthreads. This is limited by those threads only being allowed to shrink idle pages, but also that if we reactivate the page before it is swapped out by gpu activity, we only page the cost of repinning the page. The cost is most felt when an object is reused after mempressure, which hopefully excludes the latency sensitive tasks (as we are just extending the impact of swap thrashing to them). Apparently this is not the first time we've had this idea. Back in commit 5537252b6b6d ("drm/i915: Invalidate our pages under memory pressure") we wanted to start writeback but settled on invalidate after Hugh Dickins warned us about a possibility of a deadlock within shmemfs if we started writeback from shrink_slab. Looking at the callchain, using writeback from i915_gem_shrink should be equivalent to the pageout also employed by shrink_slab, i.e. it should not be any riskier afaict. v2: Leave mmapings intact. At this point, the only mmapings of our objects will be via CPU mmaps on the shmemfs filp, which are out-of-scope for our LRU tracking. Instead leave those pages to the inactive anon LRU page list for aging and pageout as normal. v3: Be selective on which paths trigger writeback, in particular excluding paths shrinking just to reclaim vm space (e.g. mmap, vmap reapers) and avoid starting writeback on the entire process space from within the pm freezer. References: https://bugs.freedesktop.org/show_bug.cgi?id=108686 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Matthew Auld Cc: Daniel Vetter Cc: Michal Hocko Reviewed-by: Joonas Lahtinen #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20190420115539.29081-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 13 ++-- drivers/gpu/drm/i915/i915_gem.c | 27 +-------- drivers/gpu/drm/i915/i915_gem_shrinker.c | 75 ++++++++++++++++++++++-- 3 files changed, 79 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 71612e7fc8bc..dc74d33c20aa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3007,7 +3007,7 @@ enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, enum i915_mm_subclass subclass); -void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj); +void __i915_gem_object_truncate(struct drm_i915_gem_object *obj); enum i915_map_type { I915_MAP_WB = 0, @@ -3268,11 +3268,12 @@ unsigned long i915_gem_shrink(struct drm_i915_private *i915, unsigned long target, unsigned long *nr_scanned, unsigned flags); -#define I915_SHRINK_PURGEABLE 0x1 -#define I915_SHRINK_UNBOUND 0x2 -#define I915_SHRINK_BOUND 0x4 -#define I915_SHRINK_ACTIVE 0x8 -#define I915_SHRINK_VMAPS 0x10 +#define I915_SHRINK_PURGEABLE BIT(0) +#define I915_SHRINK_UNBOUND BIT(1) +#define I915_SHRINK_BOUND BIT(2) +#define I915_SHRINK_ACTIVE BIT(3) +#define I915_SHRINK_VMAPS BIT(4) +#define I915_SHRINK_WRITEBACK BIT(5) unsigned long i915_gem_shrink_all(struct drm_i915_private *i915); void i915_gem_shrinker_register(struct drm_i915_private *i915); void i915_gem_shrinker_unregister(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e5462639de0b..a2bf94c3cfca 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2143,8 +2143,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, } /* Immediately discard the backing storage */ -static void -i915_gem_object_truncate(struct drm_i915_gem_object *obj) +void __i915_gem_object_truncate(struct drm_i915_gem_object *obj) { i915_gem_object_free_mmap_offset(obj); @@ -2161,28 +2160,6 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj) obj->mm.pages = ERR_PTR(-EFAULT); } -/* Try to discard unwanted pages */ -void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) -{ - struct address_space *mapping; - - lockdep_assert_held(&obj->mm.lock); - GEM_BUG_ON(i915_gem_object_has_pages(obj)); - - switch (obj->mm.madv) { - case I915_MADV_DONTNEED: - i915_gem_object_truncate(obj); - case __I915_MADV_PURGED: - return; - } - - if (obj->base.filp == NULL) - return; - - mapping = obj->base.filp->f_mapping, - invalidate_mapping_pages(mapping, 0, (loff_t)-1); -} - /* * Move pages to appropriate lru and release the pagevec, decrementing the * ref count of those pages. @@ -4023,7 +4000,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, /* if the object is no longer attached, discard its backing storage */ if (obj->mm.madv == I915_MADV_DONTNEED && !i915_gem_object_has_pages(obj)) - i915_gem_object_truncate(obj); + __i915_gem_object_truncate(obj); args->retained = obj->mm.madv != __I915_MADV_PURGED; mutex_unlock(&obj->mm.lock); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 6da795c7e62e..588e3898b120 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -114,6 +114,67 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) return !i915_gem_object_has_pages(obj); } +static void __start_writeback(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + struct address_space *mapping; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = SWAP_CLUSTER_MAX, + .range_start = 0, + .range_end = LLONG_MAX, + .for_reclaim = 1, + }; + unsigned long i; + + lockdep_assert_held(&obj->mm.lock); + GEM_BUG_ON(i915_gem_object_has_pages(obj)); + + switch (obj->mm.madv) { + case I915_MADV_DONTNEED: + __i915_gem_object_truncate(obj); + case __I915_MADV_PURGED: + return; + } + + if (!obj->base.filp) + return; + + if (!(flags & I915_SHRINK_WRITEBACK)) + return; + + /* + * Leave mmapings intact (GTT will have been revoked on unbinding, + * leaving only CPU mmapings around) and add those pages to the LRU + * instead of invoking writeback so they are aged and paged out + * as normal. + */ + mapping = obj->base.filp->f_mapping; + + /* Begin writeback on each dirty page */ + for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) { + struct page *page; + + page = find_lock_entry(mapping, i); + if (!page || xa_is_value(page)) + continue; + + if (!page_mapped(page) && clear_page_dirty_for_io(page)) { + int ret; + + SetPageReclaim(page); + ret = mapping->a_ops->writepage(page, &wbc); + if (!PageWriteback(page)) + ClearPageReclaim(page); + if (!ret) + goto put; + } + unlock_page(page); +put: + put_page(page); + } +} + /** * i915_gem_shrink - Shrink buffer object caches * @i915: i915 device @@ -254,7 +315,7 @@ i915_gem_shrink(struct drm_i915_private *i915, mutex_lock_nested(&obj->mm.lock, I915_MM_SHRINKER); if (!i915_gem_object_has_pages(obj)) { - __i915_gem_object_invalidate(obj); + __start_writeback(obj, flags); count += obj->base.size >> PAGE_SHIFT; } mutex_unlock(&obj->mm.lock); @@ -366,13 +427,15 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) &sc->nr_scanned, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | - I915_SHRINK_PURGEABLE); + I915_SHRINK_PURGEABLE | + I915_SHRINK_WRITEBACK); if (sc->nr_scanned < sc->nr_to_scan) freed += i915_gem_shrink(i915, sc->nr_to_scan - sc->nr_scanned, &sc->nr_scanned, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { intel_wakeref_t wakeref; @@ -382,7 +445,8 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) &sc->nr_scanned, I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); } } @@ -404,7 +468,8 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) with_intel_runtime_pm(i915, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not From 9c11b12184bb01d8ba2c48e655509b184f02c769 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 19 Apr 2019 10:10:26 +0300 Subject: [PATCH 019/591] drm/i915/icl: Fix MG_DP_MODE() register programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the order of lane, port parameters passed to the register macro. Note that this was already partly fixed by commit 37fc7845df7b6 ("drm/i915: Call MG_DP_MODE() macro with the right parameters order") While at it simplify things by using the macro directly instead of an unnecessary redirection via an array. v2: - Add a note the commit message about simplifying things. (José) Fixes: 58106b7d816e1 ("drm/i915: Make MG PHY macros semantically consistent") Cc: José Roberto de Souza Cc: Lucas De Marchi Cc: Aditya Swarup Signed-off-by: Imre Deak Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190419071026.32370-1-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 24f9106efcc6..f181c26f62fd 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2905,21 +2905,20 @@ static void icl_enable_phy_clock_gating(struct intel_digital_port *dig_port) struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); enum port port = dig_port->base.port; enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - i915_reg_t mg_regs[2] = { MG_DP_MODE(0, port), MG_DP_MODE(1, port) }; u32 val; - int i; + int ln; if (tc_port == PORT_TC_NONE) return; - for (i = 0; i < ARRAY_SIZE(mg_regs); i++) { - val = I915_READ(mg_regs[i]); + for (ln = 0; ln < 2; ln++) { + val = I915_READ(MG_DP_MODE(ln, port)); val |= MG_DP_MODE_CFG_TR2PWR_GATING | MG_DP_MODE_CFG_TRPWR_GATING | MG_DP_MODE_CFG_CLNPWR_GATING | MG_DP_MODE_CFG_DIGPWR_GATING | MG_DP_MODE_CFG_GAONPWR_GATING; - I915_WRITE(mg_regs[i], val); + I915_WRITE(MG_DP_MODE(ln, port), val); } val = I915_READ(MG_MISC_SUS0(tc_port)); @@ -2938,21 +2937,20 @@ static void icl_disable_phy_clock_gating(struct intel_digital_port *dig_port) struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); enum port port = dig_port->base.port; enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - i915_reg_t mg_regs[2] = { MG_DP_MODE(port, 0), MG_DP_MODE(port, 1) }; u32 val; - int i; + int ln; if (tc_port == PORT_TC_NONE) return; - for (i = 0; i < ARRAY_SIZE(mg_regs); i++) { - val = I915_READ(mg_regs[i]); + for (ln = 0; ln < 2; ln++) { + val = I915_READ(MG_DP_MODE(ln, port)); val &= ~(MG_DP_MODE_CFG_TR2PWR_GATING | MG_DP_MODE_CFG_TRPWR_GATING | MG_DP_MODE_CFG_CLNPWR_GATING | MG_DP_MODE_CFG_DIGPWR_GATING | MG_DP_MODE_CFG_GAONPWR_GATING); - I915_WRITE(mg_regs[i], val); + I915_WRITE(MG_DP_MODE(ln, port), val); } val = I915_READ(MG_MISC_SUS0(tc_port)); From 372b9ffb5799a3d5f4a9c51ac35cf65ed8f40101 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Apr 2019 22:59:07 +0300 Subject: [PATCH 020/591] drm/i915: Fix skl+ max plane width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The spec has changed since skl_max_plane_width() was written. Now the SKL limits are lower than what they were initially, and GLK and ICL have different limits. Update the code to match the spec. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190418195907.23912-1-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/intel_display.c | 72 ++++++++++++++++++---------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3bd40a4a6739..da9285228557 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2964,41 +2964,55 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, switch (fb->modifier) { case DRM_FORMAT_MOD_LINEAR: case I915_FORMAT_MOD_X_TILED: - switch (cpp) { - case 8: - return 4096; - case 4: - case 2: - case 1: - return 8192; - default: - MISSING_CASE(cpp); - break; - } - break; + return 4096; case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Yf_TILED_CCS: /* FIXME AUX plane? */ case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: - switch (cpp) { - case 8: + if (cpp == 8) return 2048; - case 4: + else return 4096; - case 2: - case 1: - return 8192; - default: - MISSING_CASE(cpp); - break; - } - break; default: MISSING_CASE(fb->modifier); + return 2048; } +} - return 2048; +static int glk_max_plane_width(const struct drm_framebuffer *fb, + int color_plane, + unsigned int rotation) +{ + int cpp = fb->format->cpp[color_plane]; + + switch (fb->modifier) { + case DRM_FORMAT_MOD_LINEAR: + case I915_FORMAT_MOD_X_TILED: + if (cpp == 8) + return 4096; + else + return 5120; + case I915_FORMAT_MOD_Y_TILED_CCS: + case I915_FORMAT_MOD_Yf_TILED_CCS: + /* FIXME AUX plane? */ + case I915_FORMAT_MOD_Y_TILED: + case I915_FORMAT_MOD_Yf_TILED: + if (cpp == 8) + return 2048; + else + return 5120; + default: + MISSING_CASE(fb->modifier); + return 2048; + } +} + +static int icl_max_plane_width(const struct drm_framebuffer *fb, + int color_plane, + unsigned int rotation) +{ + return 5120; } static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state, @@ -3041,16 +3055,24 @@ static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state static int skl_check_main_surface(struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; int x = plane_state->base.src.x1 >> 16; int y = plane_state->base.src.y1 >> 16; int w = drm_rect_width(&plane_state->base.src) >> 16; int h = drm_rect_height(&plane_state->base.src) >> 16; - int max_width = skl_max_plane_width(fb, 0, rotation); + int max_width; int max_height = 4096; u32 alignment, offset, aux_offset = plane_state->color_plane[1].offset; + if (INTEL_GEN(dev_priv) >= 11) + max_width = icl_max_plane_width(fb, 0, rotation); + else if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + max_width = glk_max_plane_width(fb, 0, rotation); + else + max_width = skl_max_plane_width(fb, 0, rotation); + if (w > max_width || h > max_height) { DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n", w, h, max_width, max_height); From 51eb1a1de7a92a812a3834986260834d5f52e566 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Wed, 17 Apr 2019 11:59:01 -0700 Subject: [PATCH 021/591] drm/i915/icl: Fix clockgating issue when using scalers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the clock-gating issue when pipe scaling is enabled. (Lineage #2006604312) V2: Fix typo in headline(Chris) Handle the non double buffered nature of the register(Ville) V3: Fix checkpatch warning. BAT failure for V2 on gen3 looks unrelated. V4: Split the icl and skl wa's(Ville) V5: Split the checks for icl and skl(Ville) V6: Correct the flipped checks in intel_pre_plane_update(Ville) V7: Use enum for pipe and extend the WA for plane scalers(Ville) V8: Eliminate the redundant use of pch_pfit(Ville) Cc: Chris Wilson Cc: Ville Syrjala Cc: Rodrigo Vivi Cc: Clint Taylor Cc: Aditya Swarup Signed-off-by: Radhakrishna Sripada Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190417185901.14833-1-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/intel_display.c | 40 ++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index da9285228557..62d663e506ab 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -476,6 +476,7 @@ static const struct intel_limit intel_limits_bxt = { .p2 = { .p2_slow = 1, .p2_fast = 20 }, }; +/* WA Display #0827: Gen9:all */ static void skl_wa_827(struct drm_i915_private *dev_priv, int pipe, bool enable) { @@ -489,6 +490,19 @@ skl_wa_827(struct drm_i915_private *dev_priv, int pipe, bool enable) ~(DUPS1_GATING_DIS | DUPS2_GATING_DIS)); } +/* Wa_2006604312:icl */ +static void +icl_wa_scalerclkgating(struct drm_i915_private *dev_priv, enum pipe pipe, + bool enable) +{ + if (enable) + I915_WRITE(CLKGATE_DIS_PSL(pipe), + I915_READ(CLKGATE_DIS_PSL(pipe)) | DPFR_GATING_DIS); + else + I915_WRITE(CLKGATE_DIS_PSL(pipe), + I915_READ(CLKGATE_DIS_PSL(pipe)) & ~DPFR_GATING_DIS); +} + static bool needs_modeset(const struct drm_crtc_state *state) { @@ -5527,6 +5541,16 @@ static bool needs_nv12_wa(struct drm_i915_private *dev_priv, return false; } +static bool needs_scalerclk_wa(struct drm_i915_private *dev_priv, + const struct intel_crtc_state *crtc_state) +{ + /* Wa_2006604312:icl */ + if (crtc_state->scaler_state.scaler_users > 0 && IS_ICELAKE(dev_priv)) + return true; + + return false; +} + static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); @@ -5560,11 +5584,13 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) intel_post_enable_primary(&crtc->base, pipe_config); } - /* Display WA 827 */ if (needs_nv12_wa(dev_priv, old_crtc_state) && - !needs_nv12_wa(dev_priv, pipe_config)) { + !needs_nv12_wa(dev_priv, pipe_config)) skl_wa_827(dev_priv, crtc->pipe, false); - } + + if (needs_scalerclk_wa(dev_priv, old_crtc_state) && + !needs_scalerclk_wa(dev_priv, pipe_config)) + icl_wa_scalerclkgating(dev_priv, crtc->pipe, false); } static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, @@ -5601,9 +5627,13 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, /* Display WA 827 */ if (!needs_nv12_wa(dev_priv, old_crtc_state) && - needs_nv12_wa(dev_priv, pipe_config)) { + needs_nv12_wa(dev_priv, pipe_config)) skl_wa_827(dev_priv, crtc->pipe, true); - } + + /* Wa_2006604312:icl */ + if (!needs_scalerclk_wa(dev_priv, old_crtc_state) && + needs_scalerclk_wa(dev_priv, pipe_config)) + icl_wa_scalerclkgating(dev_priv, crtc->pipe, true); /* * Vblank time updates from the shadow to live plane control register From 09407579abf55a8f472c221325dda81cca324326 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 10:51:34 +0100 Subject: [PATCH 022/591] drm/i915: Store the default sseu setup on the engine As we push for better compartmentalisation, it is more convenient to copy the default sseu configuration from the engine into the derived logical context, than it is to dig it out from i915->runtime_info. v2: Use intel_sseu_from_device_info() to describe the converter Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190424095134.30249-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/Makefile.header-test | 1 + drivers/gpu/drm/i915/i915_drv.h | 14 -- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_perf.c | 2 +- drivers/gpu/drm/i915/intel_context.c | 4 +- drivers/gpu/drm/i915/intel_context_types.h | 11 +- drivers/gpu/drm/i915/intel_device_info.h | 28 +--- drivers/gpu/drm/i915/intel_engine_cs.c | 4 + drivers/gpu/drm/i915/intel_engine_types.h | 3 + drivers/gpu/drm/i915/intel_lrc.c | 134 +---------------- drivers/gpu/drm/i915/intel_lrc.h | 2 - drivers/gpu/drm/i915/intel_sseu.c | 142 ++++++++++++++++++ drivers/gpu/drm/i915/intel_sseu.h | 67 +++++++++ .../gpu/drm/i915/selftests/i915_gem_context.c | 5 +- 15 files changed, 226 insertions(+), 194 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_sseu.c create mode 100644 drivers/gpu/drm/i915/intel_sseu.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index fbcb0904f4a8..53ff209b91bb 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -95,6 +95,7 @@ i915-y += \ intel_lrc.o \ intel_mocs.o \ intel_ringbuffer.o \ + intel_sseu.o \ intel_uncore.o \ intel_wopcm.o diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index c1c391816fa7..5bcc78d7ac96 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -33,6 +33,7 @@ header_test := \ intel_psr.h \ intel_sdvo.h \ intel_sprite.h \ + intel_sseu.h \ intel_tv.h \ intel_workarounds_types.h diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dc74d33c20aa..e6f9a5ddac3d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3390,20 +3390,6 @@ mkwrite_device_info(struct drm_i915_private *dev_priv) return (struct intel_device_info *)INTEL_INFO(dev_priv); } -static inline struct intel_sseu -intel_device_default_sseu(struct drm_i915_private *i915) -{ - const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; - struct intel_sseu value = { - .slice_mask = sseu->slice_mask, - .subslice_mask = sseu->subslice_mask[0], - .min_eus_per_subslice = sseu->max_eus_per_subslice, - .max_eus_per_subslice = sseu->max_eus_per_subslice, - }; - - return value; -} - /* modesetting */ extern void intel_modeset_init_hw(struct drm_device *dev); extern int intel_modeset_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index dd728b26b5aa..c02a30612df9 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -1156,7 +1156,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); - *cs++ = gen8_make_rpcs(rq->i915, &sseu); + *cs++ = intel_sseu_make_rpcs(rq->i915, &sseu); intel_ring_advance(rq, cs); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 39a4804091d7..56da457bed21 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1679,7 +1679,7 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - gen8_make_rpcs(i915, &ce->sseu)); + intel_sseu_make_rpcs(i915, &ce->sseu)); } /* diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c index 8931e0fee873..961d1445833d 100644 --- a/drivers/gpu/drm/i915/intel_context.c +++ b/drivers/gpu/drm/i915/intel_context.c @@ -230,15 +230,13 @@ intel_context_init(struct intel_context *ce, ce->gem_context = ctx; ce->engine = engine; ce->ops = engine->cops; + ce->sseu = engine->sseu; INIT_LIST_HEAD(&ce->signal_link); INIT_LIST_HEAD(&ce->signals); mutex_init(&ce->pin_mutex); - /* Use the whole device by default */ - ce->sseu = intel_device_default_sseu(ctx->i915); - i915_active_request_init(&ce->active_tracker, NULL, intel_context_retire); } diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h index 68b4ca1611e0..9ec4f787c908 100644 --- a/drivers/gpu/drm/i915/intel_context_types.h +++ b/drivers/gpu/drm/i915/intel_context_types.h @@ -14,6 +14,7 @@ #include #include "i915_active_types.h" +#include "intel_sseu.h" struct i915_gem_context; struct i915_vma; @@ -28,16 +29,6 @@ struct intel_context_ops { void (*destroy)(struct kref *kref); }; -/* - * Powergating configuration for a particular (context,engine). - */ -struct intel_sseu { - u8 slice_mask; - u8 subslice_mask; - u8 min_eus_per_subslice; - u8 max_eus_per_subslice; -}; - struct intel_context { struct kref ref; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 7a2f14eff699..1598c7079ffd 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -29,6 +29,7 @@ #include "intel_engine_types.h" #include "intel_display.h" +#include "intel_sseu.h" struct drm_printer; struct drm_i915_private; @@ -140,33 +141,6 @@ enum intel_ppgtt_type { func(overlay_needs_physical); \ func(supports_tv); -#define GEN_MAX_SLICES (6) /* CNL upper bound */ -#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ - -struct sseu_dev_info { - u8 slice_mask; - u8 subslice_mask[GEN_MAX_SLICES]; - u16 eu_total; - u8 eu_per_subslice; - u8 min_eu_in_pool; - /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ - u8 subslice_7eu[3]; - u8 has_slice_pg:1; - u8 has_subslice_pg:1; - u8 has_eu_pg:1; - - /* Topology fields */ - u8 max_slices; - u8 max_subslices; - u8 max_eus_per_subslice; - - /* We don't have more than 8 eus per subslice at the moment and as we - * store eus enabled using bits, no need to multiply by eus per - * subslice. - */ - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; -}; - struct intel_device_info { u16 gen_mask; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index eea9bec04f1b..202b4b7a24f1 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -588,6 +588,10 @@ int intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); + /* Use the whole device by default */ + engine->sseu = + intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); + return 0; err_hwsp: diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h index 1f970c76b6a6..d07a01b3ed0b 100644 --- a/drivers/gpu/drm/i915/intel_engine_types.h +++ b/drivers/gpu/drm/i915/intel_engine_types.h @@ -17,6 +17,7 @@ #include "i915_priolist_types.h" #include "i915_selftest.h" #include "i915_timeline_types.h" +#include "intel_sseu.h" #include "intel_workarounds_types.h" #include "i915_gem_batch_pool.h" @@ -278,6 +279,8 @@ struct intel_engine_cs { u32 context_size; u32 mmio_base; + struct intel_sseu sseu; + struct intel_ring *buffer; struct i915_timeline timeline; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 4e0a351bfbca..18a9dc6ca877 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1232,7 +1232,7 @@ __execlists_update_reg_state(struct intel_context *ce, /* RPCS */ if (engine->class == RENDER_CLASS) regs[CTX_R_PWR_CLK_STATE + 1] = - gen8_make_rpcs(engine->i915, &ce->sseu); + intel_sseu_make_rpcs(engine->i915, &ce->sseu); } static int @@ -2551,138 +2551,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine) return logical_ring_init(engine); } -u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu) -{ - const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; - bool subslice_pg = sseu->has_subslice_pg; - struct intel_sseu ctx_sseu; - u8 slices, subslices; - u32 rpcs = 0; - - /* - * No explicit RPCS request is needed to ensure full - * slice/subslice/EU enablement prior to Gen9. - */ - if (INTEL_GEN(i915) < 9) - return 0; - - /* - * If i915/perf is active, we want a stable powergating configuration - * on the system. - * - * We could choose full enablement, but on ICL we know there are use - * cases which disable slices for functional, apart for performance - * reasons. So in this case we select a known stable subset. - */ - if (!i915->perf.oa.exclusive_stream) { - ctx_sseu = *req_sseu; - } else { - ctx_sseu = intel_device_default_sseu(i915); - - if (IS_GEN(i915, 11)) { - /* - * We only need subslice count so it doesn't matter - * which ones we select - just turn off low bits in the - * amount of half of all available subslices per slice. - */ - ctx_sseu.subslice_mask = - ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2)); - ctx_sseu.slice_mask = 0x1; - } - } - - slices = hweight8(ctx_sseu.slice_mask); - subslices = hweight8(ctx_sseu.subslice_mask); - - /* - * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits - * wide and Icelake has up to eight subslices, specfial programming is - * needed in order to correctly enable all subslices. - * - * According to documentation software must consider the configuration - * as 2x4x8 and hardware will translate this to 1x8x8. - * - * Furthemore, even though SScount is three bits, maximum documented - * value for it is four. From this some rules/restrictions follow: - * - * 1. - * If enabled subslice count is greater than four, two whole slices must - * be enabled instead. - * - * 2. - * When more than one slice is enabled, hardware ignores the subslice - * count altogether. - * - * From these restrictions it follows that it is not possible to enable - * a count of subslices between the SScount maximum of four restriction, - * and the maximum available number on a particular SKU. Either all - * subslices are enabled, or a count between one and four on the first - * slice. - */ - if (IS_GEN(i915, 11) && - slices == 1 && - subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { - GEM_BUG_ON(subslices & 1); - - subslice_pg = false; - slices *= 2; - } - - /* - * Starting in Gen9, render power gating can leave - * slice/subslice/EU in a partially enabled state. We - * must make an explicit request through RPCS for full - * enablement. - */ - if (sseu->has_slice_pg) { - u32 mask, val = slices; - - if (INTEL_GEN(i915) >= 11) { - mask = GEN11_RPCS_S_CNT_MASK; - val <<= GEN11_RPCS_S_CNT_SHIFT; - } else { - mask = GEN8_RPCS_S_CNT_MASK; - val <<= GEN8_RPCS_S_CNT_SHIFT; - } - - GEM_BUG_ON(val & ~mask); - val &= mask; - - rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; - } - - if (subslice_pg) { - u32 val = subslices; - - val <<= GEN8_RPCS_SS_CNT_SHIFT; - - GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); - val &= GEN8_RPCS_SS_CNT_MASK; - - rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; - } - - if (sseu->has_eu_pg) { - u32 val; - - val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; - GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); - val &= GEN8_RPCS_EU_MIN_MASK; - - rpcs |= val; - - val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; - GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); - val &= GEN8_RPCS_EU_MAX_MASK; - - rpcs |= val; - - rpcs |= GEN8_RPCS_ENABLE; - } - - return rpcs; -} - static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) { u32 indirect_ctx_offset; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 84aa230ea27b..99f75ee9d087 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -115,6 +115,4 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, const char *prefix), unsigned int max); -u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu); - #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_sseu.c b/drivers/gpu/drm/i915/intel_sseu.c new file mode 100644 index 000000000000..7f448f3bea0b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_sseu.c @@ -0,0 +1,142 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_lrc_reg.h" +#include "intel_sseu.h" + +u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, + const struct intel_sseu *req_sseu) +{ + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + bool subslice_pg = sseu->has_subslice_pg; + struct intel_sseu ctx_sseu; + u8 slices, subslices; + u32 rpcs = 0; + + /* + * No explicit RPCS request is needed to ensure full + * slice/subslice/EU enablement prior to Gen9. + */ + if (INTEL_GEN(i915) < 9) + return 0; + + /* + * If i915/perf is active, we want a stable powergating configuration + * on the system. + * + * We could choose full enablement, but on ICL we know there are use + * cases which disable slices for functional, apart for performance + * reasons. So in this case we select a known stable subset. + */ + if (!i915->perf.oa.exclusive_stream) { + ctx_sseu = *req_sseu; + } else { + ctx_sseu = intel_sseu_from_device_info(sseu); + + if (IS_GEN(i915, 11)) { + /* + * We only need subslice count so it doesn't matter + * which ones we select - just turn off low bits in the + * amount of half of all available subslices per slice. + */ + ctx_sseu.subslice_mask = + ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2)); + ctx_sseu.slice_mask = 0x1; + } + } + + slices = hweight8(ctx_sseu.slice_mask); + subslices = hweight8(ctx_sseu.subslice_mask); + + /* + * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits + * wide and Icelake has up to eight subslices, specfial programming is + * needed in order to correctly enable all subslices. + * + * According to documentation software must consider the configuration + * as 2x4x8 and hardware will translate this to 1x8x8. + * + * Furthemore, even though SScount is three bits, maximum documented + * value for it is four. From this some rules/restrictions follow: + * + * 1. + * If enabled subslice count is greater than four, two whole slices must + * be enabled instead. + * + * 2. + * When more than one slice is enabled, hardware ignores the subslice + * count altogether. + * + * From these restrictions it follows that it is not possible to enable + * a count of subslices between the SScount maximum of four restriction, + * and the maximum available number on a particular SKU. Either all + * subslices are enabled, or a count between one and four on the first + * slice. + */ + if (IS_GEN(i915, 11) && + slices == 1 && + subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { + GEM_BUG_ON(subslices & 1); + + subslice_pg = false; + slices *= 2; + } + + /* + * Starting in Gen9, render power gating can leave + * slice/subslice/EU in a partially enabled state. We + * must make an explicit request through RPCS for full + * enablement. + */ + if (sseu->has_slice_pg) { + u32 mask, val = slices; + + if (INTEL_GEN(i915) >= 11) { + mask = GEN11_RPCS_S_CNT_MASK; + val <<= GEN11_RPCS_S_CNT_SHIFT; + } else { + mask = GEN8_RPCS_S_CNT_MASK; + val <<= GEN8_RPCS_S_CNT_SHIFT; + } + + GEM_BUG_ON(val & ~mask); + val &= mask; + + rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; + } + + if (subslice_pg) { + u32 val = subslices; + + val <<= GEN8_RPCS_SS_CNT_SHIFT; + + GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); + val &= GEN8_RPCS_SS_CNT_MASK; + + rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; + } + + if (sseu->has_eu_pg) { + u32 val; + + val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; + GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); + val &= GEN8_RPCS_EU_MIN_MASK; + + rpcs |= val; + + val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; + GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); + val &= GEN8_RPCS_EU_MAX_MASK; + + rpcs |= val; + + rpcs |= GEN8_RPCS_ENABLE; + } + + return rpcs; +} diff --git a/drivers/gpu/drm/i915/intel_sseu.h b/drivers/gpu/drm/i915/intel_sseu.h new file mode 100644 index 000000000000..73bc824094e8 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_sseu.h @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_SSEU_H__ +#define __INTEL_SSEU_H__ + +#include + +struct drm_i915_private; + +#define GEN_MAX_SLICES (6) /* CNL upper bound */ +#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ + +struct sseu_dev_info { + u8 slice_mask; + u8 subslice_mask[GEN_MAX_SLICES]; + u16 eu_total; + u8 eu_per_subslice; + u8 min_eu_in_pool; + /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ + u8 subslice_7eu[3]; + u8 has_slice_pg:1; + u8 has_subslice_pg:1; + u8 has_eu_pg:1; + + /* Topology fields */ + u8 max_slices; + u8 max_subslices; + u8 max_eus_per_subslice; + + /* We don't have more than 8 eus per subslice at the moment and as we + * store eus enabled using bits, no need to multiply by eus per + * subslice. + */ + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; +}; + +/* + * Powergating configuration for a particular (context,engine). + */ +struct intel_sseu { + u8 slice_mask; + u8 subslice_mask; + u8 min_eus_per_subslice; + u8 max_eus_per_subslice; +}; + +static inline struct intel_sseu +intel_sseu_from_device_info(const struct sseu_dev_info *sseu) +{ + struct intel_sseu value = { + .slice_mask = sseu->slice_mask, + .subslice_mask = sseu->subslice_mask[0], + .min_eus_per_subslice = sseu->max_eus_per_subslice, + .max_eus_per_subslice = sseu->max_eus_per_subslice, + }; + + return value; +} + +u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, + const struct intel_sseu *req_sseu); + +#endif /* __INTEL_SSEU_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 4e1b6efc6b22..e1cb22f03e8e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -962,8 +962,7 @@ __sseu_finish(struct drm_i915_private *i915, unsigned int expected, struct igt_spinner *spin) { - unsigned int slices = - hweight32(intel_device_default_sseu(i915).slice_mask); + unsigned int slices = hweight32(engine->sseu.slice_mask); u32 rpcs = 0; int ret = 0; @@ -1047,8 +1046,8 @@ __igt_ctx_sseu(struct drm_i915_private *i915, const char *name, unsigned int flags) { - struct intel_sseu default_sseu = intel_device_default_sseu(i915); struct intel_engine_cs *engine = i915->engine[RCS0]; + struct intel_sseu default_sseu = engine->sseu; struct drm_i915_gem_object *obj; struct i915_gem_context *ctx; struct intel_sseu pg_sseu; From 86554f48e511faa58f729cc077b1733179882804 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 12:09:41 +0100 Subject: [PATCH 023/591] drm/i915/selftests: Verify whitelist of context registers The RING_NONPRIV allows us to add registers to a whitelist that allows userspace to modify them. Ideally such registers should be safe and saved within the context such that they do not impact system behaviour for other users. This selftest verifies that those registers we do add are (a) then writable by userspace and (b) only affect a single client. Opens: - Is GEN9_SLICE_COMMON_ECO_CHICKEN1 really write-only? v2: Remove the blatant copy-paste. v3: Emulate userspace register writes via the batch again. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190424110941.9869-1-chris@chris-wilson.co.uk --- .../drm/i915/selftests/intel_workarounds.c | 312 ++++++++++++++++++ 1 file changed, 312 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index a363748a7a4f..aa841e4d3031 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -700,6 +700,317 @@ out: return err; } +static int read_whitelisted_registers(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct i915_vma *results) +{ + intel_wakeref_t wakeref; + struct i915_request *rq; + int i, err = 0; + u32 srm, *cs; + + rq = ERR_PTR(-ENODEV); + with_intel_runtime_pm(engine->i915, wakeref) + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + srm = MI_STORE_REGISTER_MEM; + if (INTEL_GEN(ctx->i915) >= 8) + srm++; + + cs = intel_ring_begin(rq, 4 * engine->whitelist.count); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_req; + } + + for (i = 0; i < engine->whitelist.count; i++) { + u64 offset = results->node.start + sizeof(u32) * i; + + *cs++ = srm; + *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + } + intel_ring_advance(rq, cs); + +err_req: + i915_request_add(rq); + + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) + err = -EIO; + + return err; +} + +static int scrub_whitelisted_registers(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + intel_wakeref_t wakeref; + struct i915_request *rq; + struct i915_vma *batch; + int i, err = 0; + u32 *cs; + + batch = create_batch(ctx); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_batch; + } + + *cs++ = MI_LOAD_REGISTER_IMM(engine->whitelist.count); + for (i = 0; i < engine->whitelist.count; i++) { + *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + *cs++ = 0xffffffff; + } + *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(batch->obj); + i915_gem_chipset_flush(ctx->i915); + + rq = ERR_PTR(-ENODEV); + with_intel_runtime_pm(engine->i915, wakeref) + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + if (engine->emit_init_breadcrumb) { /* Be nice if we hang */ + err = engine->emit_init_breadcrumb(rq); + if (err) + goto err_request; + } + + /* Perform the writes from an unprivileged "user" batch */ + err = engine->emit_bb_start(rq, batch->node.start, 0, 0); + +err_request: + i915_request_add(rq); + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) + err = -EIO; + +err_unpin: + i915_gem_object_unpin_map(batch->obj); +err_batch: + i915_vma_unpin_and_release(&batch, 0); + return err; +} + +struct regmask { + i915_reg_t reg; + unsigned long gen_mask; +}; + +static bool find_reg(struct drm_i915_private *i915, + i915_reg_t reg, + const struct regmask *tbl, + unsigned long count) +{ + u32 offset = i915_mmio_reg_offset(reg); + + while (count--) { + if (INTEL_INFO(i915)->gen_mask & tbl->gen_mask && + i915_mmio_reg_offset(tbl->reg) == offset) + return true; + tbl++; + } + + return false; +} + +static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg) +{ + /* Alas, we must pardon some whitelists. Mistakes already made */ + static const struct regmask pardon[] = { + { GEN9_CTX_PREEMPT_REG, INTEL_GEN_MASK(9, 9) }, + { GEN8_L3SQCREG4, INTEL_GEN_MASK(9, 9) }, + }; + + return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon)); +} + +static bool result_eq(struct intel_engine_cs *engine, + u32 a, u32 b, i915_reg_t reg) +{ + if (a != b && !pardon_reg(engine->i915, reg)) { + pr_err("Whitelisted register 0x%4x not context saved: A=%08x, B=%08x\n", + i915_mmio_reg_offset(reg), a, b); + return false; + } + + return true; +} + +static bool writeonly_reg(struct drm_i915_private *i915, i915_reg_t reg) +{ + /* Some registers do not seem to behave and our writes unreadable */ + static const struct regmask wo[] = { + { GEN9_SLICE_COMMON_ECO_CHICKEN1, INTEL_GEN_MASK(9, 9) }, + }; + + return find_reg(i915, reg, wo, ARRAY_SIZE(wo)); +} + +static bool result_neq(struct intel_engine_cs *engine, + u32 a, u32 b, i915_reg_t reg) +{ + if (a == b && !writeonly_reg(engine->i915, reg)) { + pr_err("Whitelist register 0x%4x:%08x was unwritable\n", + i915_mmio_reg_offset(reg), a); + return false; + } + + return true; +} + +static int +check_whitelisted_registers(struct intel_engine_cs *engine, + struct i915_vma *A, + struct i915_vma *B, + bool (*fn)(struct intel_engine_cs *engine, + u32 a, u32 b, + i915_reg_t reg)) +{ + u32 *a, *b; + int i, err; + + a = i915_gem_object_pin_map(A->obj, I915_MAP_WB); + if (IS_ERR(a)) + return PTR_ERR(a); + + b = i915_gem_object_pin_map(B->obj, I915_MAP_WB); + if (IS_ERR(b)) { + err = PTR_ERR(b); + goto err_a; + } + + err = 0; + for (i = 0; i < engine->whitelist.count; i++) { + if (!fn(engine, a[i], b[i], engine->whitelist.list[i].reg)) + err = -EINVAL; + } + + i915_gem_object_unpin_map(B->obj); +err_a: + i915_gem_object_unpin_map(A->obj); + return err; +} + +static int live_isolated_whitelist(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct { + struct i915_gem_context *ctx; + struct i915_vma *scratch[2]; + } client[2] = {}; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int i, err = 0; + + /* + * Check that a write into a whitelist register works, but + * invisible to a second context. + */ + + if (!intel_engines_has_context_isolation(i915)) + return 0; + + if (!i915->kernel_context->ppgtt) + return 0; + + for (i = 0; i < ARRAY_SIZE(client); i++) { + struct i915_gem_context *c; + + c = kernel_context(i915); + if (IS_ERR(c)) { + err = PTR_ERR(c); + goto err; + } + + client[i].scratch[0] = create_scratch(&c->ppgtt->vm, 1024); + if (IS_ERR(client[i].scratch[0])) { + err = PTR_ERR(client[i].scratch[0]); + kernel_context_close(c); + goto err; + } + + client[i].scratch[1] = create_scratch(&c->ppgtt->vm, 1024); + if (IS_ERR(client[i].scratch[1])) { + err = PTR_ERR(client[i].scratch[1]); + i915_vma_unpin_and_release(&client[i].scratch[0], 0); + kernel_context_close(c); + goto err; + } + + client[i].ctx = c; + } + + for_each_engine(engine, i915, id) { + if (!engine->whitelist.count) + continue; + + /* Read default values */ + err = read_whitelisted_registers(client[0].ctx, engine, + client[0].scratch[0]); + if (err) + goto err; + + /* Try to overwrite registers (should only affect ctx0) */ + err = scrub_whitelisted_registers(client[0].ctx, engine); + if (err) + goto err; + + /* Read values from ctx1, we expect these to be defaults */ + err = read_whitelisted_registers(client[1].ctx, engine, + client[1].scratch[0]); + if (err) + goto err; + + /* Verify that both reads return the same default values */ + err = check_whitelisted_registers(engine, + client[0].scratch[0], + client[1].scratch[0], + result_eq); + if (err) + goto err; + + /* Read back the updated values in ctx0 */ + err = read_whitelisted_registers(client[0].ctx, engine, + client[0].scratch[1]); + if (err) + goto err; + + /* User should be granted privilege to overwhite regs */ + err = check_whitelisted_registers(engine, + client[0].scratch[0], + client[0].scratch[1], + result_neq); + if (err) + goto err; + } + +err: + for (i = 0; i < ARRAY_SIZE(client); i++) { + if (!client[i].ctx) + break; + + i915_vma_unpin_and_release(&client[i].scratch[1], 0); + i915_vma_unpin_and_release(&client[i].scratch[0], 0); + kernel_context_close(client[i].ctx); + } + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + return err; +} + static bool verify_gt_engine_wa(struct drm_i915_private *i915, struct wa_lists *lists, const char *str) { @@ -844,6 +1155,7 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(live_dirty_whitelist), SUBTEST(live_reset_whitelist), + SUBTEST(live_isolated_whitelist), SUBTEST(live_gpu_reset_gt_engine_workarounds), SUBTEST(live_engine_reset_gt_engine_workarounds), }; From 112ed2d31a46f4704085ad925435b77e62b8abee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 18:48:39 +0100 Subject: [PATCH 024/591] drm/i915: Move GraphicsTechnology files under gt/ Start partitioning off the code that talks to the hardware (GT) from the uapi layers and move the device facing code under gt/ One casualty is s/intel_ringbuffer.h/intel_engine.h/ with the plan to subdivide that header and body further (and split out the submission code from the ringbuffer and logical context handling). This patch aims to be simple motion so git can fixup inflight patches with little mess. Signed-off-by: Chris Wilson Acked-by: Joonas Lahtinen Acked-by: Jani Nikula Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20190424174839.7141-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 46 ++++++++++++------- drivers/gpu/drm/i915/Makefile.header-test | 6 +-- drivers/gpu/drm/i915/gt/Makefile | 2 + drivers/gpu/drm/i915/gt/Makefile.header-test | 16 +++++++ .../gpu/drm/i915/{ => gt}/intel_breadcrumbs.c | 0 drivers/gpu/drm/i915/{ => gt}/intel_context.c | 3 +- drivers/gpu/drm/i915/{ => gt}/intel_context.h | 0 .../drm/i915/{ => gt}/intel_context_types.h | 0 .../{intel_ringbuffer.h => gt/intel_engine.h} | 0 .../gpu/drm/i915/{ => gt}/intel_engine_cs.c | 8 ++-- .../drm/i915/{ => gt}/intel_engine_types.h | 5 +- .../drm/i915/{ => gt}/intel_gpu_commands.h | 0 .../gpu/drm/i915/{ => gt}/intel_hangcheck.c | 4 +- drivers/gpu/drm/i915/{ => gt}/intel_lrc.c | 5 +- drivers/gpu/drm/i915/{ => gt}/intel_lrc.h | 4 +- drivers/gpu/drm/i915/{ => gt}/intel_lrc_reg.h | 0 drivers/gpu/drm/i915/{ => gt}/intel_mocs.c | 4 +- drivers/gpu/drm/i915/{ => gt}/intel_mocs.h | 4 +- .../i915/{i915_reset.c => gt/intel_reset.c} | 2 +- .../i915/{i915_reset.h => gt/intel_reset.h} | 2 +- .../gpu/drm/i915/{ => gt}/intel_ringbuffer.c | 3 +- drivers/gpu/drm/i915/{ => gt}/intel_sseu.c | 0 drivers/gpu/drm/i915/{ => gt}/intel_sseu.h | 0 .../gpu/drm/i915/{ => gt}/intel_workarounds.c | 2 +- .../gpu/drm/i915/{ => gt}/intel_workarounds.h | 8 +++- .../i915/{ => gt}/intel_workarounds_types.h | 0 .../drm/i915/{selftests => gt}/mock_engine.c | 10 ++-- .../drm/i915/{selftests => gt}/mock_engine.h | 2 +- .../selftest_engine_cs.c} | 0 .../selftest_hangcheck.c} | 14 +++--- .../intel_lrc.c => gt/selftest_lrc.c} | 16 +++---- .../selftest_workarounds.c} | 16 +++---- drivers/gpu/drm/i915/i915_cmd_parser.c | 3 +- drivers/gpu/drm/i915/i915_debugfs.c | 3 +- drivers/gpu/drm/i915/i915_drv.c | 5 +- drivers/gpu/drm/i915/i915_drv.h | 7 +-- drivers/gpu/drm/i915/i915_gem.c | 7 +-- drivers/gpu/drm/i915/i915_gem_context.c | 7 ++- drivers/gpu/drm/i915/i915_gem_context.h | 3 +- drivers/gpu/drm/i915/i915_gem_context_types.h | 3 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 1 - drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- drivers/gpu/drm/i915/i915_gpu_error.h | 3 +- drivers/gpu/drm/i915/i915_perf.c | 3 +- drivers/gpu/drm/i915/i915_pmu.c | 4 +- drivers/gpu/drm/i915/i915_request.c | 1 - drivers/gpu/drm/i915/i915_scheduler_types.h | 2 +- drivers/gpu/drm/i915/i915_trace.h | 3 +- drivers/gpu/drm/i915/i915_vma.c | 3 +- drivers/gpu/drm/i915/intel_device_info.h | 6 ++- drivers/gpu/drm/i915/intel_display.c | 1 - drivers/gpu/drm/i915/intel_guc_submission.c | 3 +- drivers/gpu/drm/i915/intel_guc_submission.h | 3 +- drivers/gpu/drm/i915/intel_uc.c | 2 +- .../gpu/drm/i915/selftests/i915_gem_context.c | 5 +- drivers/gpu/drm/i915/selftests/igt_reset.c | 3 +- drivers/gpu/drm/i915/selftests/igt_spinner.h | 3 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 3 +- drivers/gpu/drm/i915/selftests/mock_request.c | 3 +- 59 files changed, 164 insertions(+), 110 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/Makefile create mode 100644 drivers/gpu/drm/i915/gt/Makefile.header-test rename drivers/gpu/drm/i915/{ => gt}/intel_breadcrumbs.c (100%) rename drivers/gpu/drm/i915/{ => gt}/intel_context.c (99%) rename drivers/gpu/drm/i915/{ => gt}/intel_context.h (100%) rename drivers/gpu/drm/i915/{ => gt}/intel_context_types.h (100%) rename drivers/gpu/drm/i915/{intel_ringbuffer.h => gt/intel_engine.h} (100%) rename drivers/gpu/drm/i915/{ => gt}/intel_engine_cs.c (99%) rename drivers/gpu/drm/i915/{ => gt}/intel_engine_types.h (99%) rename drivers/gpu/drm/i915/{ => gt}/intel_gpu_commands.h (100%) rename drivers/gpu/drm/i915/{ => gt}/intel_hangcheck.c (99%) rename drivers/gpu/drm/i915/{ => gt}/intel_lrc.c (99%) rename drivers/gpu/drm/i915/{ => gt}/intel_lrc.h (98%) rename drivers/gpu/drm/i915/{ => gt}/intel_lrc_reg.h (100%) rename drivers/gpu/drm/i915/{ => gt}/intel_mocs.c (99%) rename drivers/gpu/drm/i915/{ => gt}/intel_mocs.h (97%) rename drivers/gpu/drm/i915/{i915_reset.c => gt/intel_reset.c} (99%) rename drivers/gpu/drm/i915/{i915_reset.h => gt/intel_reset.h} (98%) rename drivers/gpu/drm/i915/{ => gt}/intel_ringbuffer.c (99%) rename drivers/gpu/drm/i915/{ => gt}/intel_sseu.c (100%) rename drivers/gpu/drm/i915/{ => gt}/intel_sseu.h (100%) rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds.c (99%) rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds.h (88%) rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds_types.h (100%) rename drivers/gpu/drm/i915/{selftests => gt}/mock_engine.c (97%) rename drivers/gpu/drm/i915/{selftests => gt}/mock_engine.h (98%) rename drivers/gpu/drm/i915/{selftests/intel_engine_cs.c => gt/selftest_engine_cs.c} (100%) rename drivers/gpu/drm/i915/{selftests/intel_hangcheck.c => gt/selftest_hangcheck.c} (99%) rename drivers/gpu/drm/i915/{selftests/intel_lrc.c => gt/selftest_lrc.c} (99%) rename drivers/gpu/drm/i915/{selftests/intel_workarounds.c => gt/selftest_workarounds.c} (98%) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 53ff209b91bb..40130cf5c003 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -35,32 +35,53 @@ subdir-ccflags-y += \ # Extra header tests include $(src)/Makefile.header-test +subdir-ccflags-y += -I$(src) + # Please keep these build lists sorted! # core driver code i915-y += i915_drv.o \ i915_irq.o \ - i915_memcpy.o \ - i915_mm.o \ i915_params.o \ i915_pci.o \ - i915_reset.o \ i915_suspend.o \ - i915_sw_fence.o \ - i915_syncmap.o \ i915_sysfs.o \ - i915_user_extensions.o \ intel_csr.o \ intel_device_info.o \ intel_pm.o \ intel_runtime_pm.o \ - intel_workarounds.o + intel_uncore.o + +# core library code +i915-y += \ + i915_memcpy.o \ + i915_mm.o \ + i915_sw_fence.o \ + i915_syncmap.o \ + i915_user_extensions.o i915-$(CONFIG_COMPAT) += i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o -# GEM code +# "Graphics Technology" (aka we talk to the gpu) +obj-y += gt/ +gt-y += \ + gt/intel_breadcrumbs.o \ + gt/intel_context.o \ + gt/intel_engine_cs.o \ + gt/intel_hangcheck.o \ + gt/intel_lrc.o \ + gt/intel_reset.o \ + gt/intel_ringbuffer.o \ + gt/intel_mocs.o \ + gt/intel_sseu.o \ + gt/intel_workarounds.o +gt-$(CONFIG_DRM_I915_SELFTEST) += \ + gt/mock_engine.o +i915-y += $(gt-y) + +# GEM (Graphics Execution Management) code i915-y += \ i915_active.o \ i915_cmd_parser.o \ @@ -88,15 +109,6 @@ i915-y += \ i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ - intel_breadcrumbs.o \ - intel_context.o \ - intel_engine_cs.o \ - intel_hangcheck.o \ - intel_lrc.o \ - intel_mocs.o \ - intel_ringbuffer.o \ - intel_sseu.o \ - intel_uncore.o \ intel_wopcm.o # general-purpose microcontroller (GuC) support diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index 5bcc78d7ac96..96a5d90629ec 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -13,13 +13,11 @@ header_test := \ intel_cdclk.h \ intel_color.h \ intel_connector.h \ - intel_context_types.h \ intel_crt.h \ intel_csr.h \ intel_ddi.h \ intel_dp.h \ intel_dvo.h \ - intel_engine_types.h \ intel_fbc.h \ intel_fbdev.h \ intel_frontbuffer.h \ @@ -33,9 +31,7 @@ header_test := \ intel_psr.h \ intel_sdvo.h \ intel_sprite.h \ - intel_sseu.h \ - intel_tv.h \ - intel_workarounds_types.h + intel_tv.h quiet_cmd_header_test = HDRTEST $@ cmd_header_test = echo "\#include \"$( $@ diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile new file mode 100644 index 000000000000..1c75b5c9790c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/Makefile @@ -0,0 +1,2 @@ +# Extra header tests +include $(src)/Makefile.header-test diff --git a/drivers/gpu/drm/i915/gt/Makefile.header-test b/drivers/gpu/drm/i915/gt/Makefile.header-test new file mode 100644 index 000000000000..61e06cbb4b32 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/Makefile.header-test @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: MIT +# Copyright © 2019 Intel Corporation + +# Test the headers are compilable as standalone units +header_test := $(notdir $(wildcard $(src)/*.h)) + +quiet_cmd_header_test = HDRTEST $@ + cmd_header_test = echo "\#include \"$( $@ + +header_test_%.c: %.h + $(call cmd,header_test) + +extra-$(CONFIG_DRM_I915_WERROR) += \ + $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h))) + +clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h))) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c similarity index 100% rename from drivers/gpu/drm/i915/intel_breadcrumbs.c rename to drivers/gpu/drm/i915/gt/intel_breadcrumbs.c diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c similarity index 99% rename from drivers/gpu/drm/i915/intel_context.c rename to drivers/gpu/drm/i915/gt/intel_context.c index 961d1445833d..ebd1e5919a4a 100644 --- a/drivers/gpu/drm/i915/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -7,8 +7,9 @@ #include "i915_drv.h" #include "i915_gem_context.h" #include "i915_globals.h" + #include "intel_context.h" -#include "intel_ringbuffer.h" +#include "intel_engine.h" static struct i915_global_context { struct i915_global base; diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h similarity index 100% rename from drivers/gpu/drm/i915/intel_context.h rename to drivers/gpu/drm/i915/gt/intel_context.h diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h similarity index 100% rename from drivers/gpu/drm/i915/intel_context_types.h rename to drivers/gpu/drm/i915/gt/intel_context_types.h diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/gt/intel_engine.h similarity index 100% rename from drivers/gpu/drm/i915/intel_ringbuffer.h rename to drivers/gpu/drm/i915/gt/intel_engine.h diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c similarity index 99% rename from drivers/gpu/drm/i915/intel_engine_cs.c rename to drivers/gpu/drm/i915/gt/intel_engine_cs.c index 202b4b7a24f1..79ac56748b90 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -25,9 +25,10 @@ #include #include "i915_drv.h" -#include "i915_reset.h" -#include "intel_ringbuffer.h" + +#include "intel_engine.h" #include "intel_lrc.h" +#include "intel_reset.h" /* Haswell does have the CXT_SIZE register however it does not appear to be * valid. Now, docs explain in dwords what is in the context object. The full @@ -1756,6 +1757,5 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_engine.c" -#include "selftests/intel_engine_cs.c" +#include "selftest_engine_cs.c" #endif diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h similarity index 99% rename from drivers/gpu/drm/i915/intel_engine_types.h rename to drivers/gpu/drm/i915/gt/intel_engine_types.h index d07a01b3ed0b..3adf58da6d2c 100644 --- a/drivers/gpu/drm/i915/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -14,15 +14,14 @@ #include #include "i915_gem.h" +#include "i915_gem_batch_pool.h" +#include "i915_pmu.h" #include "i915_priolist_types.h" #include "i915_selftest.h" #include "i915_timeline_types.h" #include "intel_sseu.h" #include "intel_workarounds_types.h" -#include "i915_gem_batch_pool.h" -#include "i915_pmu.h" - #define I915_MAX_SLICES 3 #define I915_MAX_SUBSLICES 8 diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h similarity index 100% rename from drivers/gpu/drm/i915/intel_gpu_commands.h rename to drivers/gpu/drm/i915/gt/intel_gpu_commands.h diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c similarity index 99% rename from drivers/gpu/drm/i915/intel_hangcheck.c rename to drivers/gpu/drm/i915/gt/intel_hangcheck.c index 3d51ed1428d4..3053a706a561 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -22,8 +22,8 @@ * */ +#include "intel_reset.h" #include "i915_drv.h" -#include "i915_reset.h" struct hangcheck { u64 acthd; @@ -330,5 +330,5 @@ void intel_hangcheck_init(struct drm_i915_private *i915) } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/intel_hangcheck.c" +#include "selftest_hangcheck.c" #endif diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c similarity index 99% rename from drivers/gpu/drm/i915/intel_lrc.c rename to drivers/gpu/drm/i915/gt/intel_lrc.c index 18a9dc6ca877..5cadf8f6a23d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -133,13 +133,12 @@ */ #include -#include #include "i915_drv.h" #include "i915_gem_render_state.h" -#include "i915_reset.h" #include "i915_vgpu.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" +#include "intel_reset.h" #include "intel_workarounds.h" #define RING_EXECLIST_QFULL (1 << 0x2) @@ -2905,5 +2904,5 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/intel_lrc.c" +#include "selftest_lrc.c" #endif diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h similarity index 98% rename from drivers/gpu/drm/i915/intel_lrc.h rename to drivers/gpu/drm/i915/gt/intel_lrc.h index 99f75ee9d087..1a33ec74af8c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -24,8 +24,7 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ -#include "intel_ringbuffer.h" -#include "i915_gem_context.h" +#include "intel_engine.h" /* Execlists regs */ #define RING_ELSP(base) _MMIO((base) + 0x230) @@ -99,7 +98,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine); struct drm_printer; struct drm_i915_private; -struct i915_gem_context; void intel_execlists_set_default_submission(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h similarity index 100% rename from drivers/gpu/drm/i915/intel_lrc_reg.h rename to drivers/gpu/drm/i915/gt/intel_lrc_reg.h diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c similarity index 99% rename from drivers/gpu/drm/i915/intel_mocs.c rename to drivers/gpu/drm/i915/gt/intel_mocs.c index 274ba78500c0..79df66022d3a 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -20,9 +20,11 @@ * SOFTWARE. */ +#include "i915_drv.h" + +#include "intel_engine.h" #include "intel_mocs.h" #include "intel_lrc.h" -#include "intel_ringbuffer.h" /* structures required */ struct drm_i915_mocs_entry { diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h similarity index 97% rename from drivers/gpu/drm/i915/intel_mocs.h rename to drivers/gpu/drm/i915/gt/intel_mocs.h index 3d99d1271b2b..0913704a1af2 100644 --- a/drivers/gpu/drm/i915/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -49,7 +49,9 @@ * context handling keep the MOCS in step. */ -#include "i915_drv.h" +struct drm_i915_private; +struct i915_request; +struct intel_engine_cs; int intel_rcs_context_init_mocs(struct i915_request *rq); void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c similarity index 99% rename from drivers/gpu/drm/i915/i915_reset.c rename to drivers/gpu/drm/i915/gt/intel_reset.c index 1092d16c289c..7db498567843 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -9,7 +9,7 @@ #include "i915_drv.h" #include "i915_gpu_error.h" -#include "i915_reset.h" +#include "intel_reset.h" #include "intel_guc.h" diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h similarity index 98% rename from drivers/gpu/drm/i915/i915_reset.h rename to drivers/gpu/drm/i915/gt/intel_reset.h index 3c0450289b8f..8e662bb43a9b 100644 --- a/drivers/gpu/drm/i915/i915_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -11,7 +11,7 @@ #include #include -#include "intel_engine_types.h" +#include "gt/intel_engine_types.h" struct drm_i915_private; struct i915_request; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c similarity index 99% rename from drivers/gpu/drm/i915/intel_ringbuffer.c rename to drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 3844581f622c..ac84a383748e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -33,9 +33,8 @@ #include "i915_drv.h" #include "i915_gem_render_state.h" -#include "i915_reset.h" #include "i915_trace.h" -#include "intel_drv.h" +#include "intel_reset.h" #include "intel_workarounds.h" /* Rough estimate of the typical request size, performing a flush, diff --git a/drivers/gpu/drm/i915/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c similarity index 100% rename from drivers/gpu/drm/i915/intel_sseu.c rename to drivers/gpu/drm/i915/gt/intel_sseu.c diff --git a/drivers/gpu/drm/i915/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h similarity index 100% rename from drivers/gpu/drm/i915/intel_sseu.h rename to drivers/gpu/drm/i915/gt/intel_sseu.h diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c similarity index 99% rename from drivers/gpu/drm/i915/intel_workarounds.c rename to drivers/gpu/drm/i915/gt/intel_workarounds.c index b3cbed1ee1c9..f46ed0e2f07c 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1398,5 +1398,5 @@ int intel_engine_verify_workarounds(struct intel_engine_cs *engine, } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/intel_workarounds.c" +#include "selftest_workarounds.c" #endif diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h similarity index 88% rename from drivers/gpu/drm/i915/intel_workarounds.h rename to drivers/gpu/drm/i915/gt/intel_workarounds.h index fdf7ebb90f28..3761a6ee58bb 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h @@ -4,13 +4,17 @@ * Copyright © 2014-2018 Intel Corporation */ -#ifndef _I915_WORKAROUNDS_H_ -#define _I915_WORKAROUNDS_H_ +#ifndef _INTEL_WORKAROUNDS_H_ +#define _INTEL_WORKAROUNDS_H_ #include #include "intel_workarounds_types.h" +struct drm_i915_private; +struct i915_request; +struct intel_engine_cs; + static inline void intel_wa_list_free(struct i915_wa_list *wal) { kfree(wal->list); diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h similarity index 100% rename from drivers/gpu/drm/i915/intel_workarounds_types.h rename to drivers/gpu/drm/i915/gt/intel_workarounds_types.h diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c similarity index 97% rename from drivers/gpu/drm/i915/selftests/mock_engine.c rename to drivers/gpu/drm/i915/gt/mock_engine.c index 61a8206ed677..414afd2f27fe 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -22,8 +22,11 @@ * */ +#include "i915_drv.h" +#include "intel_context.h" + #include "mock_engine.h" -#include "mock_request.h" +#include "selftests/mock_request.h" struct mock_ring { struct intel_ring base; @@ -268,8 +271,9 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); - if (pin_context(i915->kernel_context, &engine->base, - &engine->base.kernel_context)) + engine->base.kernel_context = + intel_context_pin(i915->kernel_context, &engine->base); + if (IS_ERR(engine->base.kernel_context)) goto err_breadcrumbs; return &engine->base; diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/gt/mock_engine.h similarity index 98% rename from drivers/gpu/drm/i915/selftests/mock_engine.h rename to drivers/gpu/drm/i915/gt/mock_engine.h index b9cc3a245f16..44b35a85e9d1 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.h +++ b/drivers/gpu/drm/i915/gt/mock_engine.h @@ -29,7 +29,7 @@ #include #include -#include "../intel_ringbuffer.h" +#include "gt/intel_engine.h" struct mock_engine { struct intel_engine_cs base; diff --git a/drivers/gpu/drm/i915/selftests/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c similarity index 100% rename from drivers/gpu/drm/i915/selftests/intel_engine_cs.c rename to drivers/gpu/drm/i915/gt/selftest_engine_cs.c diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c similarity index 99% rename from drivers/gpu/drm/i915/selftests/intel_hangcheck.c rename to drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 2fd33aad8683..acd33aa46068 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -24,14 +24,14 @@ #include -#include "../i915_selftest.h" -#include "i915_random.h" -#include "igt_flush_test.h" -#include "igt_reset.h" -#include "igt_wedge_me.h" +#include "i915_selftest.h" +#include "selftests/i915_random.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_reset.h" +#include "selftests/igt_wedge_me.h" -#include "mock_context.h" -#include "mock_drm.h" +#include "selftests/mock_context.h" +#include "selftests/mock_drm.h" #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c similarity index 99% rename from drivers/gpu/drm/i915/selftests/intel_lrc.c rename to drivers/gpu/drm/i915/gt/selftest_lrc.c index fbee030db940..cd0551f97c2f 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -6,15 +6,13 @@ #include -#include "../i915_reset.h" - -#include "../i915_selftest.h" -#include "igt_flush_test.h" -#include "igt_live_test.h" -#include "igt_spinner.h" -#include "i915_random.h" - -#include "mock_context.h" +#include "gt/intel_reset.h" +#include "i915_selftest.h" +#include "selftests/i915_random.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_live_test.h" +#include "selftests/igt_spinner.h" +#include "selftests/mock_context.h" static int live_sanitycheck(void *arg) { diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c similarity index 98% rename from drivers/gpu/drm/i915/selftests/intel_workarounds.c rename to drivers/gpu/drm/i915/gt/selftest_workarounds.c index aa841e4d3031..e61e47421ed2 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -4,15 +4,15 @@ * Copyright © 2018 Intel Corporation */ -#include "../i915_selftest.h" -#include "../i915_reset.h" +#include "i915_selftest.h" +#include "intel_reset.h" -#include "igt_flush_test.h" -#include "igt_reset.h" -#include "igt_spinner.h" -#include "igt_wedge_me.h" -#include "mock_context.h" -#include "mock_drm.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_reset.h" +#include "selftests/igt_spinner.h" +#include "selftests/igt_wedge_me.h" +#include "selftests/mock_context.h" +#include "selftests/mock_drm.h" static const struct wo_register { enum intel_platform platform; diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 503d548a55f7..e9fadcb4d592 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -25,8 +25,9 @@ * */ +#include "gt/intel_engine.h" + #include "i915_drv.h" -#include "intel_ringbuffer.h" /** * DOC: batch buffer command parser diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e0bfdf31032c..b3fbd9e361ae 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -32,7 +32,8 @@ #include #include -#include "i915_reset.h" +#include "gt/intel_reset.h" + #include "intel_dp.h" #include "intel_drv.h" #include "intel_fbc.h" diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 6354c68c94b3..ac416d2c02ca 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -47,10 +47,12 @@ #include #include +#include "gt/intel_workarounds.h" +#include "gt/intel_reset.h" + #include "i915_drv.h" #include "i915_pmu.h" #include "i915_query.h" -#include "i915_reset.h" #include "i915_trace.h" #include "i915_vgpu.h" #include "intel_audio.h" @@ -62,7 +64,6 @@ #include "intel_pm.h" #include "intel_sprite.h" #include "intel_uc.h" -#include "intel_workarounds.h" static struct drm_driver driver; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e6f9a5ddac3d..d37832ffb471 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -62,18 +62,19 @@ #include "i915_reg.h" #include "i915_utils.h" +#include "gt/intel_lrc.h" +#include "gt/intel_engine.h" +#include "gt/intel_workarounds.h" + #include "intel_bios.h" #include "intel_device_info.h" #include "intel_display.h" #include "intel_dpll_mgr.h" #include "intel_frontbuffer.h" -#include "intel_lrc.h" #include "intel_opregion.h" -#include "intel_ringbuffer.h" #include "intel_uc.h" #include "intel_uncore.h" #include "intel_wopcm.h" -#include "intel_workarounds.h" #include "i915_gem.h" #include "i915_gem_context.h" diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a2bf94c3cfca..21adeb340357 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -39,19 +39,20 @@ #include #include +#include "gt/intel_mocs.h" +#include "gt/intel_reset.h" +#include "gt/intel_workarounds.h" + #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gemfs.h" #include "i915_globals.h" -#include "i915_reset.h" #include "i915_trace.h" #include "i915_vgpu.h" #include "intel_drv.h" #include "intel_frontbuffer.h" -#include "intel_mocs.h" #include "intel_pm.h" -#include "intel_workarounds.h" static void i915_gem_flush_free_objects(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index c02a30612df9..37dff694456c 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -86,13 +86,16 @@ */ #include + #include + +#include "gt/intel_lrc_reg.h" +#include "gt/intel_workarounds.h" + #include "i915_drv.h" #include "i915_globals.h" #include "i915_trace.h" #include "i915_user_extensions.h" -#include "intel_lrc_reg.h" -#include "intel_workarounds.h" #define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1 << 1) #define I915_CONTEXT_PARAM_VM 0x9 diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 23dcb01bfd82..cec278ab04e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -27,9 +27,10 @@ #include "i915_gem_context_types.h" +#include "gt/intel_context.h" + #include "i915_gem.h" #include "i915_scheduler.h" -#include "intel_context.h" #include "intel_device_info.h" struct drm_device; diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h index e2ec58b10fb2..d282a6ab3b9f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h @@ -17,8 +17,9 @@ #include #include +#include "gt/intel_context_types.h" + #include "i915_scheduler.h" -#include "intel_context_types.h" struct pid; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3557233de0f5..8f5db787b7f2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -37,7 +37,6 @@ #include "i915_drv.h" #include "i915_vgpu.h" -#include "i915_reset.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index b51e779732c3..f85b75db1f98 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -38,8 +38,8 @@ #include #include +#include "gt/intel_reset.h" #include "i915_request.h" -#include "i915_reset.h" #include "i915_selftest.h" #include "i915_timeline.h" diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 5dc761e85d9d..b419d0f59275 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -13,8 +13,9 @@ #include +#include "gt/intel_engine.h" + #include "intel_device_info.h" -#include "intel_ringbuffer.h" #include "intel_uc_fw.h" #include "i915_gem.h" diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 56da457bed21..a87f790335c1 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -195,6 +195,8 @@ #include #include +#include "gt/intel_lrc_reg.h" + #include "i915_drv.h" #include "i915_oa_hsw.h" #include "i915_oa_bdw.h" @@ -210,7 +212,6 @@ #include "i915_oa_cflgt3.h" #include "i915_oa_cnl.h" #include "i915_oa_icl.h" -#include "intel_lrc_reg.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 46a52da3db29..35e502481f29 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -6,8 +6,10 @@ #include #include + +#include "gt/intel_engine.h" + #include "i915_pmu.h" -#include "intel_ringbuffer.h" #include "i915_drv.h" /* Frequency for the sampling timer for events which need it. */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b1f00b59bb95..64ca8b3ea12f 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -32,7 +32,6 @@ #include "i915_active.h" #include "i915_drv.h" #include "i915_globals.h" -#include "i915_reset.h" #include "intel_pm.h" struct execute_cb { diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index f1af3916a808..166a457884b2 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -9,8 +9,8 @@ #include +#include "gt/intel_engine_types.h" #include "i915_priolist_types.h" -#include "intel_engine_types.h" struct drm_i915_private; struct i915_request; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 12893304c8f8..b5286f3d8146 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -8,9 +8,10 @@ #include +#include "gt/intel_engine.h" + #include "i915_drv.h" #include "intel_drv.h" -#include "intel_ringbuffer.h" #undef TRACE_SYSTEM #define TRACE_SYSTEM i915 diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 36726392e737..d4d308b6d1d8 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -22,11 +22,12 @@ * */ +#include "gt/intel_engine.h" + #include "i915_vma.h" #include "i915_drv.h" #include "i915_globals.h" -#include "intel_ringbuffer.h" #include "intel_frontbuffer.h" #include diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 1598c7079ffd..5a2e17d6146b 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -27,9 +27,11 @@ #include -#include "intel_engine_types.h" +#include "gt/intel_engine_types.h" +#include "gt/intel_context_types.h" +#include "gt/intel_sseu.h" + #include "intel_display.h" -#include "intel_sseu.h" struct drm_printer; struct drm_i915_private; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 62d663e506ab..c3d1d38ccf4d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -46,7 +46,6 @@ #include "i915_drv.h" #include "i915_gem_clflush.h" -#include "i915_reset.h" #include "i915_trace.h" #include "intel_atomic_plane.h" #include "intel_color.h" diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 37f60cb8e9e1..1b6d6403ee92 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -25,8 +25,9 @@ #include #include +#include "gt/intel_lrc_reg.h" + #include "intel_guc_submission.h" -#include "intel_lrc_reg.h" #include "i915_drv.h" #define GUC_PREEMPT_FINISHED 0x1 diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h index aa5e6749c925..7d823a513b9c 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/intel_guc_submission.h @@ -27,9 +27,10 @@ #include +#include "gt/intel_engine_types.h" + #include "i915_gem.h" #include "i915_selftest.h" -#include "intel_engine_types.h" struct drm_i915_private; diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 488dffba04d2..30a8e376d19f 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -22,11 +22,11 @@ * */ +#include "gt/intel_reset.h" #include "intel_uc.h" #include "intel_guc_submission.h" #include "intel_guc.h" #include "i915_drv.h" -#include "i915_reset.h" static void guc_free_load_err_log(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index e1cb22f03e8e..6f52ca881173 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -24,8 +24,9 @@ #include -#include "../i915_reset.h" -#include "../i915_selftest.h" +#include "gt/intel_reset.h" +#include "i915_selftest.h" + #include "i915_random.h" #include "igt_flush_test.h" #include "igt_live_test.h" diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c index 208a966da8ca..4f31b137c428 100644 --- a/drivers/gpu/drm/i915/selftests/igt_reset.c +++ b/drivers/gpu/drm/i915/selftests/igt_reset.c @@ -6,8 +6,9 @@ #include "igt_reset.h" +#include "gt/intel_engine.h" + #include "../i915_drv.h" -#include "../intel_ringbuffer.h" void igt_global_reset_lock(struct drm_i915_private *i915) { diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.h b/drivers/gpu/drm/i915/selftests/igt_spinner.h index 391777c76dc7..d312e7cdab68 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.h +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.h @@ -9,9 +9,10 @@ #include "../i915_selftest.h" +#include "gt/intel_engine.h" + #include "../i915_drv.h" #include "../i915_request.h" -#include "../intel_ringbuffer.h" #include "../i915_gem_context.h" struct igt_spinner { diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 60bbf8b4df40..f444ee5add27 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -25,7 +25,8 @@ #include #include -#include "mock_engine.h" +#include "gt/mock_engine.h" + #include "mock_context.h" #include "mock_request.h" #include "mock_gem_device.h" diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index d1a7c9608712..f739ba63057f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -22,7 +22,8 @@ * */ -#include "mock_engine.h" +#include "gt/mock_engine.h" + #include "mock_request.h" struct i915_request * From d91e657876a96af4f00cc374e26a7a9e8c40d6de Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 21:07:13 +0100 Subject: [PATCH 025/591] drm/i915: Introduce struct intel_wakeref For controlling runtime pm of the GT and engines, we would like to have a callback to do extra work the first time we wake up and the last time we drop the wakeref. This first/last access needs serialisation and so we encompass a mutex with the regular intel_wakeref_t tracker. v2: Drop the _once naming and report the errors. Signed-off-by: Chris Wilson Cc; Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190424200717.1686-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/Makefile.header-test | 3 +- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/intel_wakeref.c | 61 ++++++++++ drivers/gpu/drm/i915/intel_wakeref.h | 133 ++++++++++++++++++++++ 5 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_wakeref.c create mode 100644 drivers/gpu/drm/i915/intel_wakeref.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 40130cf5c003..233bad5e361f 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -50,6 +50,7 @@ i915-y += i915_drv.o \ intel_device_info.o \ intel_pm.o \ intel_runtime_pm.o \ + intel_wakeref.o \ intel_uncore.o # core library code diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index 96a5d90629ec..e6b3e7588860 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -31,7 +31,8 @@ header_test := \ intel_psr.h \ intel_sdvo.h \ intel_sprite.h \ - intel_tv.h + intel_tv.h \ + intel_wakeref.h quiet_cmd_header_test = HDRTEST $@ cmd_header_test = echo "\#include \"$( $@ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d37832ffb471..437e394d9fa6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -74,6 +74,7 @@ #include "intel_opregion.h" #include "intel_uc.h" #include "intel_uncore.h" +#include "intel_wakeref.h" #include "intel_wopcm.h" #include "i915_gem.h" @@ -134,8 +135,6 @@ bool i915_error_injected(void); __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \ fmt, ##__VA_ARGS__) -typedef depot_stack_handle_t intel_wakeref_t; - enum hpd_pin { HPD_NONE = 0, HPD_TV = HPD_NONE, /* TV is known to be unreliable */ diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c new file mode 100644 index 000000000000..1f94bc4ff9e4 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -0,0 +1,61 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_drv.h" +#include "intel_wakeref.h" + +int __intel_wakeref_get_first(struct drm_i915_private *i915, + struct intel_wakeref *wf, + int (*fn)(struct intel_wakeref *wf)) +{ + /* + * Treat get/put as different subclasses, as we may need to run + * the put callback from under the shrinker and do not want to + * cross-contanimate that callback with any extra work performed + * upon acquiring the wakeref. + */ + mutex_lock_nested(&wf->mutex, SINGLE_DEPTH_NESTING); + if (!atomic_read(&wf->count)) { + int err; + + wf->wakeref = intel_runtime_pm_get(i915); + + err = fn(wf); + if (unlikely(err)) { + intel_runtime_pm_put(i915, wf->wakeref); + mutex_unlock(&wf->mutex); + return err; + } + + smp_mb__before_atomic(); /* release wf->count */ + } + atomic_inc(&wf->count); + mutex_unlock(&wf->mutex); + + return 0; +} + +int __intel_wakeref_put_last(struct drm_i915_private *i915, + struct intel_wakeref *wf, + int (*fn)(struct intel_wakeref *wf)) +{ + int err; + + err = fn(wf); + if (likely(!err)) + intel_runtime_pm_put(i915, wf->wakeref); + else + atomic_inc(&wf->count); + mutex_unlock(&wf->mutex); + + return err; +} + +void __intel_wakeref_init(struct intel_wakeref *wf, struct lock_class_key *key) +{ + __mutex_init(&wf->mutex, "wakeref", key); + atomic_set(&wf->count, 0); +} diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h new file mode 100644 index 000000000000..a979d638344b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -0,0 +1,133 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_WAKEREF_H +#define INTEL_WAKEREF_H + +#include +#include +#include + +struct drm_i915_private; + +typedef depot_stack_handle_t intel_wakeref_t; + +struct intel_wakeref { + atomic_t count; + struct mutex mutex; + intel_wakeref_t wakeref; +}; + +void __intel_wakeref_init(struct intel_wakeref *wf, + struct lock_class_key *key); +#define intel_wakeref_init(wf) do { \ + static struct lock_class_key __key; \ + \ + __intel_wakeref_init((wf), &__key); \ +} while (0) + +int __intel_wakeref_get_first(struct drm_i915_private *i915, + struct intel_wakeref *wf, + int (*fn)(struct intel_wakeref *wf)); +int __intel_wakeref_put_last(struct drm_i915_private *i915, + struct intel_wakeref *wf, + int (*fn)(struct intel_wakeref *wf)); + +/** + * intel_wakeref_get: Acquire the wakeref + * @i915: the drm_i915_private device + * @wf: the wakeref + * @fn: callback for acquired the wakeref, called only on first acquire. + * + * Acquire a hold on the wakeref. The first user to do so, will acquire + * the runtime pm wakeref and then call the @fn underneath the wakeref + * mutex. + * + * Note that @fn is allowed to fail, in which case the runtime-pm wakeref + * will be released and the acquisition unwound, and an error reported. + * + * Returns: 0 if the wakeref was acquired successfully, or a negative error + * code otherwise. + */ +static inline int +intel_wakeref_get(struct drm_i915_private *i915, + struct intel_wakeref *wf, + int (*fn)(struct intel_wakeref *wf)) +{ + if (unlikely(!atomic_inc_not_zero(&wf->count))) + return __intel_wakeref_get_first(i915, wf, fn); + + return 0; +} + +/** + * intel_wakeref_put: Release the wakeref + * @i915: the drm_i915_private device + * @wf: the wakeref + * @fn: callback for releasing the wakeref, called only on final release. + * + * Release our hold on the wakeref. When there are no more users, + * the runtime pm wakeref will be released after the @fn callback is called + * underneath the wakeref mutex. + * + * Note that @fn is allowed to fail, in which case the runtime-pm wakeref + * is retained and an error reported. + * + * Returns: 0 if the wakeref was released successfully, or a negative error + * code otherwise. + */ +static inline int +intel_wakeref_put(struct drm_i915_private *i915, + struct intel_wakeref *wf, + int (*fn)(struct intel_wakeref *wf)) +{ + if (atomic_dec_and_mutex_lock(&wf->count, &wf->mutex)) + return __intel_wakeref_put_last(i915, wf, fn); + + return 0; +} + +/** + * intel_wakeref_lock: Lock the wakeref (mutex) + * @wf: the wakeref + * + * Locks the wakeref to prevent it being acquired or released. New users + * can still adjust the counter, but the wakeref itself (and callback) + * cannot be acquired or released. + */ +static inline void +intel_wakeref_lock(struct intel_wakeref *wf) + __acquires(wf->mutex) +{ + mutex_lock(&wf->mutex); +} + +/** + * intel_wakeref_unlock: Unlock the wakeref + * @wf: the wakeref + * + * Releases a previously acquired intel_wakeref_lock(). + */ +static inline void +intel_wakeref_unlock(struct intel_wakeref *wf) + __releases(wf->mutex) +{ + mutex_unlock(&wf->mutex); +} + +/** + * intel_wakeref_active: Query whether the wakeref is currently held + * @wf: the wakeref + * + * Returns: true if the wakeref is currently held. + */ +static inline bool +intel_wakeref_active(struct intel_wakeref *wf) +{ + return atomic_read(&wf->count); +} + +#endif /* INTEL_WAKEREF_H */ From 23c3c3d04fa7fcc60c91f1368cc5652a6774626b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 21:07:14 +0100 Subject: [PATCH 026/591] drm/i915: Pull the GEM powermangement coupling into its own file Split out the powermanagement portion (GT wakeref, suspend/resume) of GEM from i915_gem.c into its own file. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190424200717.1686-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/Makefile.header-test | 1 + drivers/gpu/drm/i915/i915_debugfs.c | 4 +- drivers/gpu/drm/i915/i915_drv.h | 12 +- drivers/gpu/drm/i915/i915_gem.c | 363 +---------------- drivers/gpu/drm/i915/i915_gem_pm.c | 365 ++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_pm.h | 28 ++ .../gpu/drm/i915/selftests/i915_gem_context.c | 2 +- .../gpu/drm/i915/selftests/i915_gem_object.c | 8 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 10 +- 10 files changed, 418 insertions(+), 376 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_pm.c create mode 100644 drivers/gpu/drm/i915/i915_gem_pm.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 233bad5e361f..858642c7bc40 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -97,6 +97,7 @@ i915-y += \ i915_gem_internal.o \ i915_gem.o \ i915_gem_object.o \ + i915_gem_pm.o \ i915_gem_render_state.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index e6b3e7588860..702e3a7ade4c 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -5,6 +5,7 @@ header_test := \ i915_active_types.h \ i915_gem_context_types.h \ + i915_gem_pm.h \ i915_priolist_types.h \ i915_scheduler_types.h \ i915_timeline_types.h \ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b3fbd9e361ae..f77263d42253 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3942,8 +3942,8 @@ i915_drop_caches_set(void *data, u64 val) if (val & DROP_IDLE) { do { if (READ_ONCE(i915->gt.active_requests)) - flush_delayed_work(&i915->gt.retire_work); - drain_delayed_work(&i915->gt.idle_work); + flush_delayed_work(&i915->gem.retire_work); + drain_delayed_work(&i915->gem.idle_work); } while (READ_ONCE(i915->gt.awake)); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 437e394d9fa6..45e027f45e62 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2020,6 +2020,12 @@ struct drm_i915_private { */ intel_wakeref_t awake; + ktime_t last_init_time; + + struct i915_vma *scratch; + } gt; + + struct { /** * We leave the user IRQ off as much as possible, * but this means that requests will finish and never @@ -2037,11 +2043,7 @@ struct drm_i915_private { * off the idle_work. */ struct delayed_work idle_work; - - ktime_t last_init_time; - - struct i915_vma *scratch; - } gt; + } gem; /* For i945gm vblank irq vs. C3 workaround */ struct { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 21adeb340357..7f833c97138e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,7 +46,7 @@ #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gemfs.h" -#include "i915_globals.h" +#include "i915_gem_pm.h" #include "i915_trace.h" #include "i915_vgpu.h" @@ -103,105 +103,6 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, spin_unlock(&dev_priv->mm.object_stat_lock); } -static void __i915_gem_park(struct drm_i915_private *i915) -{ - intel_wakeref_t wakeref; - - GEM_TRACE("\n"); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(i915->gt.active_requests); - GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); - - if (!i915->gt.awake) - return; - - /* - * Be paranoid and flush a concurrent interrupt to make sure - * we don't reactivate any irq tasklets after parking. - * - * FIXME: Note that even though we have waited for execlists to be idle, - * there may still be an in-flight interrupt even though the CSB - * is now empty. synchronize_irq() makes sure that a residual interrupt - * is completed before we continue, but it doesn't prevent the HW from - * raising a spurious interrupt later. To complete the shield we should - * coordinate disabling the CS irq with flushing the interrupts. - */ - synchronize_irq(i915->drm.irq); - - intel_engines_park(i915); - i915_timelines_park(i915); - - i915_pmu_gt_parked(i915); - i915_vma_parked(i915); - - wakeref = fetch_and_zero(&i915->gt.awake); - GEM_BUG_ON(!wakeref); - - if (INTEL_GEN(i915) >= 6) - gen6_rps_idle(i915); - - intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); - - i915_globals_park(); -} - -void i915_gem_park(struct drm_i915_private *i915) -{ - GEM_TRACE("\n"); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(i915->gt.active_requests); - - if (!i915->gt.awake) - return; - - /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ - mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); -} - -void i915_gem_unpark(struct drm_i915_private *i915) -{ - GEM_TRACE("\n"); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(!i915->gt.active_requests); - assert_rpm_wakelock_held(i915); - - if (i915->gt.awake) - return; - - /* - * It seems that the DMC likes to transition between the DC states a lot - * when there are no connected displays (no active power domains) during - * command submission. - * - * This activity has negative impact on the performance of the chip with - * huge latencies observed in the interrupt handler and elsewhere. - * - * Work around it by grabbing a GT IRQ power domain whilst there is any - * GT activity, preventing any DC state transitions. - */ - i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); - GEM_BUG_ON(!i915->gt.awake); - - i915_globals_unpark(); - - intel_enable_gt_powersave(i915); - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); - i915_pmu_gt_unparked(i915); - - intel_engines_unpark(i915); - - i915_queue_hangcheck(i915); - - queue_delayed_work(i915->wq, - &i915->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -2088,7 +1989,7 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) if (!err) break; - } while (flush_delayed_work(&dev_priv->gt.retire_work)); + } while (flush_delayed_work(&dev_priv->gem.retire_work)); return err; } @@ -2848,132 +2749,6 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } -static void -i915_gem_retire_work_handler(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), gt.retire_work.work); - struct drm_device *dev = &dev_priv->drm; - - /* Come back later if the device is busy... */ - if (mutex_trylock(&dev->struct_mutex)) { - i915_retire_requests(dev_priv); - mutex_unlock(&dev->struct_mutex); - } - - /* - * Keep the retire handler running until we are finally idle. - * We do not need to do this test under locking as in the worst-case - * we queue the retire worker once too often. - */ - if (READ_ONCE(dev_priv->gt.awake)) - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - -static bool switch_to_kernel_context_sync(struct drm_i915_private *i915, - unsigned long mask) -{ - bool result = true; - - /* - * Even if we fail to switch, give whatever is running a small chance - * to save itself before we report the failure. Yes, this may be a - * false positive due to e.g. ENOMEM, caveat emptor! - */ - if (i915_gem_switch_to_kernel_context(i915, mask)) - result = false; - - if (i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT)) - result = false; - - if (!result) { - if (i915_modparams.reset) { /* XXX hide warning from gem_eio */ - dev_err(i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - } - - /* Forcibly cancel outstanding work and leave the gpu quiet. */ - i915_gem_set_wedged(i915); - } - - i915_retire_requests(i915); /* ensure we flush after wedging */ - return result; -} - -static bool load_power_context(struct drm_i915_private *i915) -{ - /* Force loading the kernel context on all engines */ - if (!switch_to_kernel_context_sync(i915, ALL_ENGINES)) - return false; - - /* - * Immediately park the GPU so that we enable powersaving and - * treat it as idle. The next time we issue a request, we will - * unpark and start using the engine->pinned_default_state, otherwise - * it is in limbo and an early reset may fail. - */ - __i915_gem_park(i915); - - return true; -} - -static void -i915_gem_idle_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gt.idle_work.work); - bool rearm_hangcheck; - - if (!READ_ONCE(i915->gt.awake)) - return; - - if (READ_ONCE(i915->gt.active_requests)) - return; - - rearm_hangcheck = - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); - - if (!mutex_trylock(&i915->drm.struct_mutex)) { - /* Currently busy, come back later */ - mod_delayed_work(i915->wq, - &i915->gt.idle_work, - msecs_to_jiffies(50)); - goto out_rearm; - } - - /* - * Flush out the last user context, leaving only the pinned - * kernel context resident. Should anything unfortunate happen - * while we are idle (such as the GPU being power cycled), no users - * will be harmed. - */ - if (!work_pending(&i915->gt.idle_work.work) && - !i915->gt.active_requests) { - ++i915->gt.active_requests; /* don't requeue idle */ - - switch_to_kernel_context_sync(i915, i915->gt.active_engines); - - if (!--i915->gt.active_requests) { - __i915_gem_park(i915); - rearm_hangcheck = false; - } - } - - mutex_unlock(&i915->drm.struct_mutex); - -out_rearm: - if (rearm_hangcheck) { - GEM_BUG_ON(!i915->gt.awake); - i915_queue_hangcheck(i915); - } -} - void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) { struct drm_i915_private *i915 = to_i915(gem->dev); @@ -4389,133 +4164,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915) mutex_unlock(&i915->drm.struct_mutex); } -void i915_gem_suspend(struct drm_i915_private *i915) -{ - intel_wakeref_t wakeref; - - GEM_TRACE("\n"); - - wakeref = intel_runtime_pm_get(i915); - - flush_workqueue(i915->wq); - - mutex_lock(&i915->drm.struct_mutex); - - /* - * We have to flush all the executing contexts to main memory so - * that they can saved in the hibernation image. To ensure the last - * context image is coherent, we have to switch away from it. That - * leaves the i915->kernel_context still active when - * we actually suspend, and its image in memory may not match the GPU - * state. Fortunately, the kernel_context is disposable and we do - * not rely on its state. - */ - switch_to_kernel_context_sync(i915, i915->gt.active_engines); - - mutex_unlock(&i915->drm.struct_mutex); - i915_reset_flush(i915); - - drain_delayed_work(&i915->gt.retire_work); - - /* - * As the idle_work is rearming if it detects a race, play safe and - * repeat the flush until it is definitely idle. - */ - drain_delayed_work(&i915->gt.idle_work); - - /* - * Assert that we successfully flushed all the work and - * reset the GPU back to its idle, low power state. - */ - GEM_BUG_ON(i915->gt.awake); - - intel_uc_suspend(i915); - - intel_runtime_pm_put(i915, wakeref); -} - -void i915_gem_suspend_late(struct drm_i915_private *i915) -{ - struct drm_i915_gem_object *obj; - struct list_head *phases[] = { - &i915->mm.unbound_list, - &i915->mm.bound_list, - NULL - }, **phase; - - /* - * Neither the BIOS, ourselves or any other kernel - * expects the system to be in execlists mode on startup, - * so we need to reset the GPU back to legacy mode. And the only - * known way to disable logical contexts is through a GPU reset. - * - * So in order to leave the system in a known default configuration, - * always reset the GPU upon unload and suspend. Afterwards we then - * clean up the GEM state tracking, flushing off the requests and - * leaving the system in a known idle state. - * - * Note that is of the upmost importance that the GPU is idle and - * all stray writes are flushed *before* we dismantle the backing - * storage for the pinned objects. - * - * However, since we are uncertain that resetting the GPU on older - * machines is a good idea, we don't - just in case it leaves the - * machine in an unusable condition. - */ - - mutex_lock(&i915->drm.struct_mutex); - for (phase = phases; *phase; phase++) { - list_for_each_entry(obj, *phase, mm.link) - WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); - } - mutex_unlock(&i915->drm.struct_mutex); - - intel_uc_sanitize(i915); - i915_gem_sanitize(i915); -} - -void i915_gem_resume(struct drm_i915_private *i915) -{ - GEM_TRACE("\n"); - - WARN_ON(i915->gt.awake); - - mutex_lock(&i915->drm.struct_mutex); - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - - i915_gem_restore_gtt_mappings(i915); - i915_gem_restore_fences(i915); - - /* - * As we didn't flush the kernel context before suspend, we cannot - * guarantee that the context image is complete. So let's just reset - * it and start again. - */ - intel_gt_resume(i915); - - if (i915_gem_init_hw(i915)) - goto err_wedged; - - intel_uc_resume(i915); - - /* Always reload a context for powersaving. */ - if (!load_power_context(i915)) - goto err_wedged; - -out_unlock: - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - mutex_unlock(&i915->drm.struct_mutex); - return; - -err_wedged: - if (!i915_reset_failed(i915)) { - dev_err(i915->drm.dev, - "Failed to re-initialize GPU, declaring it wedged!\n"); - i915_gem_set_wedged(i915); - } - goto out_unlock; -} - void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) { if (INTEL_GEN(dev_priv) < 5 || @@ -4698,7 +4346,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) } /* Flush the default context image to memory, and enable powersaving. */ - if (!load_power_context(i915)) { + if (!i915_gem_load_power_context(i915)) { err = -EIO; goto err_active; } @@ -5113,11 +4761,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&dev_priv->gt.closed_vma); i915_gem_init__mm(dev_priv); + i915_gem_init__pm(dev_priv); - INIT_DELAYED_WORK(&dev_priv->gt.retire_work, - i915_gem_retire_work_handler); - INIT_DELAYED_WORK(&dev_priv->gt.idle_work, - i915_gem_idle_work_handler); init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); mutex_init(&dev_priv->gpu_error.wedge_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c new file mode 100644 index 000000000000..9fb0e8d567a2 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_pm.c @@ -0,0 +1,365 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_gem_pm.h" +#include "i915_globals.h" +#include "intel_pm.h" + +static void __i915_gem_park(struct drm_i915_private *i915) +{ + intel_wakeref_t wakeref; + + GEM_TRACE("\n"); + + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(i915->gt.active_requests); + GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); + + if (!i915->gt.awake) + return; + + /* + * Be paranoid and flush a concurrent interrupt to make sure + * we don't reactivate any irq tasklets after parking. + * + * FIXME: Note that even though we have waited for execlists to be idle, + * there may still be an in-flight interrupt even though the CSB + * is now empty. synchronize_irq() makes sure that a residual interrupt + * is completed before we continue, but it doesn't prevent the HW from + * raising a spurious interrupt later. To complete the shield we should + * coordinate disabling the CS irq with flushing the interrupts. + */ + synchronize_irq(i915->drm.irq); + + intel_engines_park(i915); + i915_timelines_park(i915); + + i915_pmu_gt_parked(i915); + i915_vma_parked(i915); + + wakeref = fetch_and_zero(&i915->gt.awake); + GEM_BUG_ON(!wakeref); + + if (INTEL_GEN(i915) >= 6) + gen6_rps_idle(i915); + + intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); + + i915_globals_park(); +} + +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915, + unsigned long mask) +{ + bool result = true; + + /* + * Even if we fail to switch, give whatever is running a small chance + * to save itself before we report the failure. Yes, this may be a + * false positive due to e.g. ENOMEM, caveat emptor! + */ + if (i915_gem_switch_to_kernel_context(i915, mask)) + result = false; + + if (i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_FOR_IDLE_BOOST, + I915_GEM_IDLE_TIMEOUT)) + result = false; + + if (!result) { + if (i915_modparams.reset) { /* XXX hide warning from gem_eio */ + dev_err(i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); + } + + /* Forcibly cancel outstanding work and leave the gpu quiet. */ + i915_gem_set_wedged(i915); + } + + i915_retire_requests(i915); /* ensure we flush after wedging */ + return result; +} + +static void idle_work_handler(struct work_struct *work) +{ + struct drm_i915_private *i915 = + container_of(work, typeof(*i915), gem.idle_work.work); + bool rearm_hangcheck; + + if (!READ_ONCE(i915->gt.awake)) + return; + + if (READ_ONCE(i915->gt.active_requests)) + return; + + rearm_hangcheck = + cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + + if (!mutex_trylock(&i915->drm.struct_mutex)) { + /* Currently busy, come back later */ + mod_delayed_work(i915->wq, + &i915->gem.idle_work, + msecs_to_jiffies(50)); + goto out_rearm; + } + + /* + * Flush out the last user context, leaving only the pinned + * kernel context resident. Should anything unfortunate happen + * while we are idle (such as the GPU being power cycled), no users + * will be harmed. + */ + if (!work_pending(&i915->gem.idle_work.work) && + !i915->gt.active_requests) { + ++i915->gt.active_requests; /* don't requeue idle */ + + switch_to_kernel_context_sync(i915, i915->gt.active_engines); + + if (!--i915->gt.active_requests) { + __i915_gem_park(i915); + rearm_hangcheck = false; + } + } + + mutex_unlock(&i915->drm.struct_mutex); + +out_rearm: + if (rearm_hangcheck) { + GEM_BUG_ON(!i915->gt.awake); + i915_queue_hangcheck(i915); + } +} + +static void retire_work_handler(struct work_struct *work) +{ + struct drm_i915_private *i915 = + container_of(work, typeof(*i915), gem.retire_work.work); + + /* Come back later if the device is busy... */ + if (mutex_trylock(&i915->drm.struct_mutex)) { + i915_retire_requests(i915); + mutex_unlock(&i915->drm.struct_mutex); + } + + /* + * Keep the retire handler running until we are finally idle. + * We do not need to do this test under locking as in the worst-case + * we queue the retire worker once too often. + */ + if (READ_ONCE(i915->gt.awake)) + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); +} + +void i915_gem_park(struct drm_i915_private *i915) +{ + GEM_TRACE("\n"); + + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(i915->gt.active_requests); + + if (!i915->gt.awake) + return; + + /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ + mod_delayed_work(i915->wq, &i915->gem.idle_work, msecs_to_jiffies(100)); +} + +void i915_gem_unpark(struct drm_i915_private *i915) +{ + GEM_TRACE("\n"); + + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(!i915->gt.active_requests); + assert_rpm_wakelock_held(i915); + + if (i915->gt.awake) + return; + + /* + * It seems that the DMC likes to transition between the DC states a lot + * when there are no connected displays (no active power domains) during + * command submission. + * + * This activity has negative impact on the performance of the chip with + * huge latencies observed in the interrupt handler and elsewhere. + * + * Work around it by grabbing a GT IRQ power domain whilst there is any + * GT activity, preventing any DC state transitions. + */ + i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + GEM_BUG_ON(!i915->gt.awake); + + i915_globals_unpark(); + + intel_enable_gt_powersave(i915); + i915_update_gfx_val(i915); + if (INTEL_GEN(i915) >= 6) + gen6_rps_busy(i915); + i915_pmu_gt_unparked(i915); + + intel_engines_unpark(i915); + + i915_queue_hangcheck(i915); + + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); +} + +bool i915_gem_load_power_context(struct drm_i915_private *i915) +{ + /* Force loading the kernel context on all engines */ + if (!switch_to_kernel_context_sync(i915, ALL_ENGINES)) + return false; + + /* + * Immediately park the GPU so that we enable powersaving and + * treat it as idle. The next time we issue a request, we will + * unpark and start using the engine->pinned_default_state, otherwise + * it is in limbo and an early reset may fail. + */ + __i915_gem_park(i915); + + return true; +} + +void i915_gem_suspend(struct drm_i915_private *i915) +{ + intel_wakeref_t wakeref; + + GEM_TRACE("\n"); + + wakeref = intel_runtime_pm_get(i915); + + mutex_lock(&i915->drm.struct_mutex); + + /* + * We have to flush all the executing contexts to main memory so + * that they can saved in the hibernation image. To ensure the last + * context image is coherent, we have to switch away from it. That + * leaves the i915->kernel_context still active when + * we actually suspend, and its image in memory may not match the GPU + * state. Fortunately, the kernel_context is disposable and we do + * not rely on its state. + */ + switch_to_kernel_context_sync(i915, i915->gt.active_engines); + + mutex_unlock(&i915->drm.struct_mutex); + i915_reset_flush(i915); + + drain_delayed_work(&i915->gem.retire_work); + + /* + * As the idle_work is rearming if it detects a race, play safe and + * repeat the flush until it is definitely idle. + */ + drain_delayed_work(&i915->gem.idle_work); + + flush_workqueue(i915->wq); + + /* + * Assert that we successfully flushed all the work and + * reset the GPU back to its idle, low power state. + */ + GEM_BUG_ON(i915->gt.awake); + + intel_uc_suspend(i915); + + intel_runtime_pm_put(i915, wakeref); +} + +void i915_gem_suspend_late(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct list_head *phases[] = { + &i915->mm.unbound_list, + &i915->mm.bound_list, + NULL + }, **phase; + + /* + * Neither the BIOS, ourselves or any other kernel + * expects the system to be in execlists mode on startup, + * so we need to reset the GPU back to legacy mode. And the only + * known way to disable logical contexts is through a GPU reset. + * + * So in order to leave the system in a known default configuration, + * always reset the GPU upon unload and suspend. Afterwards we then + * clean up the GEM state tracking, flushing off the requests and + * leaving the system in a known idle state. + * + * Note that is of the upmost importance that the GPU is idle and + * all stray writes are flushed *before* we dismantle the backing + * storage for the pinned objects. + * + * However, since we are uncertain that resetting the GPU on older + * machines is a good idea, we don't - just in case it leaves the + * machine in an unusable condition. + */ + + mutex_lock(&i915->drm.struct_mutex); + for (phase = phases; *phase; phase++) { + list_for_each_entry(obj, *phase, mm.link) + WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); + } + mutex_unlock(&i915->drm.struct_mutex); + + intel_uc_sanitize(i915); + i915_gem_sanitize(i915); +} + +void i915_gem_resume(struct drm_i915_private *i915) +{ + GEM_TRACE("\n"); + + WARN_ON(i915->gt.awake); + + mutex_lock(&i915->drm.struct_mutex); + intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); + + i915_gem_restore_gtt_mappings(i915); + i915_gem_restore_fences(i915); + + /* + * As we didn't flush the kernel context before suspend, we cannot + * guarantee that the context image is complete. So let's just reset + * it and start again. + */ + intel_gt_resume(i915); + + if (i915_gem_init_hw(i915)) + goto err_wedged; + + intel_uc_resume(i915); + + /* Always reload a context for powersaving. */ + if (!i915_gem_load_power_context(i915)) + goto err_wedged; + +out_unlock: + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + mutex_unlock(&i915->drm.struct_mutex); + return; + +err_wedged: + if (!i915_reset_failed(i915)) { + dev_err(i915->drm.dev, + "Failed to re-initialize GPU, declaring it wedged!\n"); + i915_gem_set_wedged(i915); + } + goto out_unlock; +} + +void i915_gem_init__pm(struct drm_i915_private *i915) +{ + INIT_DELAYED_WORK(&i915->gem.idle_work, idle_work_handler); + INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); +} diff --git a/drivers/gpu/drm/i915/i915_gem_pm.h b/drivers/gpu/drm/i915/i915_gem_pm.h new file mode 100644 index 000000000000..52f65e3f06b5 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_pm.h @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_PM_H__ +#define __I915_GEM_PM_H__ + +#include + +struct drm_i915_private; +struct work_struct; + +void i915_gem_init__pm(struct drm_i915_private *i915); + +bool i915_gem_load_power_context(struct drm_i915_private *i915); +void i915_gem_resume(struct drm_i915_private *i915); + +void i915_gem_unpark(struct drm_i915_private *i915); +void i915_gem_park(struct drm_i915_private *i915); + +void i915_gem_idle_work_handler(struct work_struct *work); + +void i915_gem_suspend(struct drm_i915_private *i915); +void i915_gem_suspend_late(struct drm_i915_private *i915); + +#endif /* __I915_GEM_PM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 6f52ca881173..9d646fa1b74e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -1658,7 +1658,7 @@ static int __igt_switch_to_kernel_context(struct drm_i915_private *i915, /* XXX Bonus points for proving we are the kernel context! */ mutex_unlock(&i915->drm.struct_mutex); - drain_delayed_work(&i915->gt.idle_work); + drain_delayed_work(&i915->gem.idle_work); mutex_lock(&i915->drm.struct_mutex); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 971148fbe6f5..12fc53c694a6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -514,8 +514,8 @@ static void disable_retire_worker(struct drm_i915_private *i915) } mutex_unlock(&i915->drm.struct_mutex); - cancel_delayed_work_sync(&i915->gt.retire_work); - cancel_delayed_work_sync(&i915->gt.idle_work); + cancel_delayed_work_sync(&i915->gem.retire_work); + cancel_delayed_work_sync(&i915->gem.idle_work); } static int igt_mmap_offset_exhaustion(void *arg) @@ -617,9 +617,9 @@ out: out_park: mutex_lock(&i915->drm.struct_mutex); if (--i915->gt.active_requests) - queue_delayed_work(i915->wq, &i915->gt.retire_work, 0); + queue_delayed_work(i915->wq, &i915->gem.retire_work, 0); else - queue_delayed_work(i915->wq, &i915->gt.idle_work, 0); + queue_delayed_work(i915->wq, &i915->gem.idle_work, 0); mutex_unlock(&i915->drm.struct_mutex); i915_gem_shrinker_register(i915); return err; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index f444ee5add27..fb677b4019a0 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -59,8 +59,8 @@ static void mock_device_release(struct drm_device *dev) i915_gem_contexts_lost(i915); mutex_unlock(&i915->drm.struct_mutex); - drain_delayed_work(&i915->gt.retire_work); - drain_delayed_work(&i915->gt.idle_work); + drain_delayed_work(&i915->gem.retire_work); + drain_delayed_work(&i915->gem.idle_work); i915_gem_drain_workqueue(i915); mutex_lock(&i915->drm.struct_mutex); @@ -111,7 +111,7 @@ static void mock_retire_work_handler(struct work_struct *work) static void mock_idle_work_handler(struct work_struct *work) { struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gt.idle_work.work); + container_of(work, typeof(*i915), gem.idle_work.work); i915->gt.active_engines = 0; } @@ -197,8 +197,8 @@ struct drm_i915_private *mock_gem_device(void) mock_init_contexts(i915); - INIT_DELAYED_WORK(&i915->gt.retire_work, mock_retire_work_handler); - INIT_DELAYED_WORK(&i915->gt.idle_work, mock_idle_work_handler); + INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler); + INIT_DELAYED_WORK(&i915->gem.idle_work, mock_idle_work_handler); i915->gt.awake = true; From 6eee33e87f6d1f6263162ce0874c1ef503eff041 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 21:07:15 +0100 Subject: [PATCH 027/591] drm/i915: Introduce context->enter() and context->exit() We wish to start segregating the power management into different control domains, both with respect to the hardware and the user interface. The first step is that at the lowest level flow of requests, we want to process a context event (and not a global GEM operation). In this patch, we introduce the context callbacks that in future patches will be redirected to per-engine interfaces leading to global operations as required. The intent is that this will be guarded by the timeline->mutex, except that retiring has not quite finished transitioning over from being guarded by struct_mutex. So at the moment it is protected by struct_mutex with a reminded to switch. v2: Rename default handlers to intel_context_enter_engine. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190424200717.1686-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_context.c | 17 ++++++++++++++ drivers/gpu/drm/i915/gt/intel_context.h | 21 ++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_context_types.h | 5 +++++ drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++ drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 3 +++ drivers/gpu/drm/i915/gt/mock_engine.c | 3 +++ drivers/gpu/drm/i915/i915_request.c | 22 ++++--------------- 7 files changed, 56 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index ebd1e5919a4a..4410e20e8e13 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -266,3 +266,20 @@ int __init i915_global_context_init(void) i915_global_register(&global.base); return 0; } + +void intel_context_enter_engine(struct intel_context *ce) +{ + struct drm_i915_private *i915 = ce->gem_context->i915; + + if (!i915->gt.active_requests++) + i915_gem_unpark(i915); +} + +void intel_context_exit_engine(struct intel_context *ce) +{ + struct drm_i915_private *i915 = ce->gem_context->i915; + + GEM_BUG_ON(!i915->gt.active_requests); + if (!--i915->gt.active_requests) + i915_gem_park(i915); +} diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index ebc861b1a49e..b732cf99efcb 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -73,6 +73,27 @@ static inline void __intel_context_pin(struct intel_context *ce) void intel_context_unpin(struct intel_context *ce); +void intel_context_enter_engine(struct intel_context *ce); +void intel_context_exit_engine(struct intel_context *ce); + +static inline void intel_context_enter(struct intel_context *ce) +{ + if (!ce->active_count++) + ce->ops->enter(ce); +} + +static inline void intel_context_mark_active(struct intel_context *ce) +{ + ++ce->active_count; +} + +static inline void intel_context_exit(struct intel_context *ce) +{ + GEM_BUG_ON(!ce->active_count); + if (!--ce->active_count) + ce->ops->exit(ce); +} + static inline struct intel_context *intel_context_get(struct intel_context *ce) { kref_get(&ce->ref); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 9ec4f787c908..f02d27734e3b 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -25,6 +25,9 @@ struct intel_context_ops { int (*pin)(struct intel_context *ce); void (*unpin)(struct intel_context *ce); + void (*enter)(struct intel_context *ce); + void (*exit)(struct intel_context *ce); + void (*reset)(struct intel_context *ce); void (*destroy)(struct kref *kref); }; @@ -46,6 +49,8 @@ struct intel_context { u32 *lrc_reg_state; u64 lrc_desc; + unsigned int active_count; /* notionally protected by timeline->mutex */ + atomic_t pin_count; struct mutex pin_mutex; /* guards pinning and associated on-gpuing */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 5cadf8f6a23d..edec7f183688 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1315,6 +1315,9 @@ static const struct intel_context_ops execlists_context_ops = { .pin = execlists_context_pin, .unpin = execlists_context_unpin, + .enter = intel_context_enter_engine, + .exit = intel_context_exit_engine, + .reset = execlists_context_reset, .destroy = execlists_context_destroy, }; diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index ac84a383748e..5404fe382691 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1510,6 +1510,9 @@ static const struct intel_context_ops ring_context_ops = { .pin = ring_context_pin, .unpin = ring_context_unpin, + .enter = intel_context_enter_engine, + .exit = intel_context_exit_engine, + .reset = ring_context_reset, .destroy = ring_context_destroy, }; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 414afd2f27fe..bcfeb0c67997 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -157,6 +157,9 @@ static const struct intel_context_ops mock_context_ops = { .pin = mock_context_pin, .unpin = mock_context_unpin, + .enter = intel_context_enter_engine, + .exit = intel_context_exit_engine, + .destroy = mock_context_destroy, }; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 64ca8b3ea12f..9a2665ee012a 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -131,19 +131,6 @@ i915_request_remove_from_client(struct i915_request *request) spin_unlock(&file_priv->mm.lock); } -static void reserve_gt(struct drm_i915_private *i915) -{ - if (!i915->gt.active_requests++) - i915_gem_unpark(i915); -} - -static void unreserve_gt(struct drm_i915_private *i915) -{ - GEM_BUG_ON(!i915->gt.active_requests); - if (!--i915->gt.active_requests) - i915_gem_park(i915); -} - static void advance_ring(struct i915_request *request) { struct intel_ring *ring = request->ring; @@ -301,11 +288,10 @@ static void i915_request_retire(struct i915_request *request) i915_request_remove_from_client(request); - intel_context_unpin(request->hw_context); - __retire_engine_upto(request->engine, request); - unreserve_gt(request->i915); + intel_context_exit(request->hw_context); + intel_context_unpin(request->hw_context); i915_sched_node_fini(&request->sched); i915_request_put(request); @@ -659,8 +645,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (IS_ERR(ce)) return ERR_CAST(ce); - reserve_gt(i915); mutex_lock(&ce->ring->timeline->mutex); + intel_context_enter(ce); /* Move our oldest request to the slab-cache (if not in use!) */ rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); @@ -791,8 +777,8 @@ err_unwind: err_free: kmem_cache_free(global.slab_requests, rq); err_unreserve: + intel_context_exit(ce); mutex_unlock(&ce->ring->timeline->mutex); - unreserve_gt(i915); intel_context_unpin(ce); return ERR_PTR(ret); } From 2ccdf6a1c3f7ff51d721ee7a5bed96e03da77205 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 21:07:16 +0100 Subject: [PATCH 028/591] drm/i915: Pass intel_context to i915_request_create() Start acquiring the logical intel_context and using that as our primary means for request allocation. This is the initial step to allow us to avoid requiring struct_mutex for request allocation along the perma-pinned kernel context, but it also provides a foundation for breaking up the complex request allocation to handle different scenarios inside execbuf. For the purpose of emitting a request from inside retirement (see the next patch for engine power management), we also need to lift control over the timeline mutex to the caller. v2: Note that the request carries the active reference upon construction. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190424200717.1686-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_context.h | 12 + drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 3 - drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 4 +- drivers/gpu/drm/i915/i915_perf.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 247 ++++++++++-------- drivers/gpu/drm/i915/i915_request.h | 7 + drivers/gpu/drm/i915/intel_overlay.c | 5 +- drivers/gpu/drm/i915/selftests/i915_active.c | 2 +- .../drm/i915/selftests/i915_gem_coherency.c | 2 +- .../gpu/drm/i915/selftests/i915_gem_object.c | 2 +- drivers/gpu/drm/i915/selftests/i915_request.c | 9 +- .../gpu/drm/i915/selftests/i915_timeline.c | 4 +- 14 files changed, 177 insertions(+), 126 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index b732cf99efcb..60379eb37949 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -105,4 +105,16 @@ static inline void intel_context_put(struct intel_context *ce) kref_put(&ce->ref, ce->ops->destroy); } +static inline void intel_context_timeline_lock(struct intel_context *ce) + __acquires(&ce->ring->timeline->mutex) +{ + mutex_lock(&ce->ring->timeline->mutex); +} + +static inline void intel_context_timeline_unlock(struct intel_context *ce) + __releases(&ce->ring->timeline->mutex) +{ + mutex_unlock(&ce->ring->timeline->mutex); +} + #endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 7db498567843..cdf184403d46 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -783,7 +783,7 @@ static void restart_work(struct work_struct *work) if (!intel_engine_is_idle(engine)) continue; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (!IS_ERR(rq)) i915_request_add(rq); } diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 5404fe382691..f89541274d44 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1772,7 +1772,6 @@ static int switch_context(struct i915_request *rq) u32 hw_flags = 0; int ret, i; - lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); if (ppgtt) { @@ -1902,8 +1901,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) struct i915_request *target; long timeout; - lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); - if (intel_ring_update_space(ring) >= bytes) return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index f46ed0e2f07c..364696221fd7 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1356,7 +1356,7 @@ static int engine_wa_list_verify(struct intel_engine_cs *engine, if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_request_alloc(engine, engine->kernel_context->gem_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_vma; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 37dff694456c..3eb1a664b5fa 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -942,7 +942,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, struct intel_ring *ring; struct i915_request *rq; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -1188,7 +1188,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) /* Submitting requests etc needs the hw awake. */ wakeref = intel_runtime_pm_get(i915); - rq = i915_request_alloc(ce->engine, i915->kernel_context); + rq = i915_request_create(ce->engine->kernel_context); if (IS_ERR(rq)) { ret = PTR_ERR(rq); goto out_put; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a87f790335c1..328a740e72cb 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1762,7 +1762,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, * Apply the configuration by doing one context restore of the edited * context image. */ - rq = i915_request_alloc(engine, dev_priv->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9a2665ee012a..705c125bafc6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -581,7 +581,7 @@ static void ring_retire_requests(struct intel_ring *ring) } static noinline struct i915_request * -i915_request_alloc_slow(struct intel_context *ce) +request_alloc_slow(struct intel_context *ce, gfp_t gfp) { struct intel_ring *ring = ce->ring; struct i915_request *rq; @@ -589,6 +589,9 @@ i915_request_alloc_slow(struct intel_context *ce) if (list_empty(&ring->request_list)) goto out; + if (!gfpflags_allow_blocking(gfp)) + goto out; + /* Ratelimit ourselves to prevent oom from malicious clients */ rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link); cond_synchronize_rcu(rq->rcustate); @@ -597,62 +600,21 @@ i915_request_alloc_slow(struct intel_context *ce) ring_retire_requests(ring); out: - return kmem_cache_alloc(global.slab_requests, GFP_KERNEL); + return kmem_cache_alloc(global.slab_requests, gfp); } -/** - * i915_request_alloc - allocate a request structure - * - * @engine: engine that we wish to issue the request on. - * @ctx: context that the request will be associated with. - * - * Returns a pointer to the allocated request if successful, - * or an error code if not. - */ struct i915_request * -i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) +__i915_request_create(struct intel_context *ce, gfp_t gfp) { - struct drm_i915_private *i915 = engine->i915; - struct intel_context *ce; - struct i915_timeline *tl; + struct i915_timeline *tl = ce->ring->timeline; struct i915_request *rq; u32 seqno; int ret; - lockdep_assert_held(&i915->drm.struct_mutex); + might_sleep_if(gfpflags_allow_blocking(gfp)); - /* - * Preempt contexts are reserved for exclusive use to inject a - * preemption context switch. They are never to be used for any trivial - * request! - */ - GEM_BUG_ON(ctx == i915->preempt_context); - - /* - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - ret = i915_terminally_wedged(i915); - if (ret) - return ERR_PTR(ret); - - /* - * Pinning the contexts may generate requests in order to acquire - * GGTT space, so do this first before we reserve a seqno for - * ourselves. - */ - ce = intel_context_pin(ctx, engine); - if (IS_ERR(ce)) - return ERR_CAST(ce); - - mutex_lock(&ce->ring->timeline->mutex); - intel_context_enter(ce); - - /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); - if (!list_is_last(&rq->ring_link, &ce->ring->request_list) && - i915_request_completed(rq)) - i915_request_retire(rq); + /* Check that the caller provided an already pinned context */ + __intel_context_pin(ce); /* * Beware: Dragons be flying overhead. @@ -684,30 +646,26 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * Do not use kmem_cache_zalloc() here! */ rq = kmem_cache_alloc(global.slab_requests, - GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(!rq)) { - rq = i915_request_alloc_slow(ce); + rq = request_alloc_slow(ce, gfp); if (!rq) { ret = -ENOMEM; goto err_unreserve; } } - INIT_LIST_HEAD(&rq->active_list); - INIT_LIST_HEAD(&rq->execute_cb); - - tl = ce->ring->timeline; ret = i915_timeline_get_seqno(tl, rq, &seqno); if (ret) goto err_free; - rq->i915 = i915; - rq->engine = engine; - rq->gem_context = ctx; + rq->i915 = ce->engine->i915; rq->hw_context = ce; + rq->gem_context = ce->gem_context; + rq->engine = ce->engine; rq->ring = ce->ring; rq->timeline = tl; - GEM_BUG_ON(rq->timeline == &engine->timeline); + GEM_BUG_ON(rq->timeline == &ce->engine->timeline); rq->hwsp_seqno = tl->hwsp_seqno; rq->hwsp_cacheline = tl->hwsp_cacheline; rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ @@ -728,6 +686,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->capture_list = NULL; rq->waitboost = false; + INIT_LIST_HEAD(&rq->active_list); + INIT_LIST_HEAD(&rq->execute_cb); + /* * Reserve space in the ring buffer for all the commands required to * eventually emit this request. This is to guarantee that the @@ -740,7 +701,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * around inside i915_request_add() there is sufficient space at * the beginning of the ring as well. */ - rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32); + rq->reserved_space = + 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32); /* * Record the position of the start of the request so that @@ -750,20 +712,16 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) */ rq->head = rq->ring->emit; - ret = engine->request_alloc(rq); + ret = rq->engine->request_alloc(rq); if (ret) goto err_unwind; + rq->infix = rq->ring->emit; /* end of header; start of user payload */ + /* Keep a second pin for the dual retirement along engine and ring */ __intel_context_pin(ce); - rq->infix = rq->ring->emit; /* end of header; start of user payload */ - - /* Check that we didn't interrupt ourselves with a new request */ - lockdep_assert_held(&rq->timeline->mutex); - GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); - rq->cookie = lockdep_pin_lock(&rq->timeline->mutex); - + intel_context_mark_active(ce); return rq; err_unwind: @@ -777,12 +735,86 @@ err_unwind: err_free: kmem_cache_free(global.slab_requests, rq); err_unreserve: - intel_context_exit(ce); - mutex_unlock(&ce->ring->timeline->mutex); intel_context_unpin(ce); return ERR_PTR(ret); } +struct i915_request * +i915_request_create(struct intel_context *ce) +{ + struct i915_request *rq; + + intel_context_timeline_lock(ce); + + /* Move our oldest request to the slab-cache (if not in use!) */ + rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); + if (!list_is_last(&rq->ring_link, &ce->ring->request_list) && + i915_request_completed(rq)) + i915_request_retire(rq); + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_KERNEL); + intel_context_exit(ce); /* active reference transferred to request */ + if (IS_ERR(rq)) + goto err_unlock; + + /* Check that we do not interrupt ourselves with a new request */ + rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex); + + return rq; + +err_unlock: + intel_context_timeline_unlock(ce); + return rq; +} + +/** + * i915_request_alloc - allocate a request structure + * + * @engine: engine that we wish to issue the request on. + * @ctx: context that the request will be associated with. + * + * Returns a pointer to the allocated request if successful, + * or an error code if not. + */ +struct i915_request * +i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) +{ + struct drm_i915_private *i915 = engine->i915; + struct intel_context *ce; + struct i915_request *rq; + int ret; + + /* + * Preempt contexts are reserved for exclusive use to inject a + * preemption context switch. They are never to be used for any trivial + * request! + */ + GEM_BUG_ON(ctx == i915->preempt_context); + + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + ret = i915_terminally_wedged(i915); + if (ret) + return ERR_PTR(ret); + + /* + * Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + ce = intel_context_pin(ctx, engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + rq = i915_request_create(ce); + intel_context_unpin(ce); + + return rq; +} + static int emit_semaphore_wait(struct i915_request *to, struct i915_request *from, @@ -1043,8 +1075,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) * precludes optimising to use semaphores serialisation of a single * timeline across engines. */ - prev = i915_active_request_raw(&timeline->last_request, - &rq->i915->drm.struct_mutex); + prev = rcu_dereference_protected(timeline->last_request.request, 1); if (prev && !i915_request_completed(prev)) { if (is_power_of_2(prev->engine->mask | rq->engine->mask)) i915_sw_fence_await_sw_fence(&rq->submit, @@ -1065,6 +1096,11 @@ __i915_request_add_to_timeline(struct i915_request *rq) list_add_tail(&rq->link, &timeline->requests); spin_unlock_irq(&timeline->lock); + /* + * Make sure that no request gazumped us - if it was allocated after + * our i915_request_alloc() and called __i915_request_add() before + * us, the timeline will hold its seqno which is later than ours. + */ GEM_BUG_ON(timeline->seqno != rq->fence.seqno); __i915_active_request_set(&timeline->last_request, rq); @@ -1076,36 +1112,23 @@ __i915_request_add_to_timeline(struct i915_request *rq) * request is not being tracked for completion but the work itself is * going to happen on the hardware. This would be a Bad Thing(tm). */ -void i915_request_add(struct i915_request *request) +struct i915_request *__i915_request_commit(struct i915_request *rq) { - struct intel_engine_cs *engine = request->engine; - struct i915_timeline *timeline = request->timeline; - struct intel_ring *ring = request->ring; + struct intel_engine_cs *engine = rq->engine; + struct intel_ring *ring = rq->ring; struct i915_request *prev; u32 *cs; GEM_TRACE("%s fence %llx:%lld\n", - engine->name, request->fence.context, request->fence.seqno); - - lockdep_assert_held(&request->timeline->mutex); - lockdep_unpin_lock(&request->timeline->mutex, request->cookie); - - trace_i915_request_add(request); - - /* - * Make sure that no request gazumped us - if it was allocated after - * our i915_request_alloc() and called __i915_request_add() before - * us, the timeline will hold its seqno which is later than ours. - */ - GEM_BUG_ON(timeline->seqno != request->fence.seqno); + engine->name, rq->fence.context, rq->fence.seqno); /* * To ensure that this call will not fail, space for its emissions * should already have been reserved in the ring buffer. Let the ring * know that it is time to use that space up. */ - GEM_BUG_ON(request->reserved_space > request->ring->space); - request->reserved_space = 0; + GEM_BUG_ON(rq->reserved_space > ring->space); + rq->reserved_space = 0; /* * Record the position of the start of the breadcrumb so that @@ -1113,17 +1136,17 @@ void i915_request_add(struct i915_request *request) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ - cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw); + cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw); GEM_BUG_ON(IS_ERR(cs)); - request->postfix = intel_ring_offset(request, cs); + rq->postfix = intel_ring_offset(rq, cs); - prev = __i915_request_add_to_timeline(request); + prev = __i915_request_add_to_timeline(rq); - list_add_tail(&request->ring_link, &ring->request_list); - if (list_is_first(&request->ring_link, &ring->request_list)) - list_add(&ring->active_link, &request->i915->gt.active_rings); - request->i915->gt.active_engines |= request->engine->mask; - request->emitted_jiffies = jiffies; + list_add_tail(&rq->ring_link, &ring->request_list); + if (list_is_first(&rq->ring_link, &ring->request_list)) + list_add(&ring->active_link, &rq->i915->gt.active_rings); + rq->i915->gt.active_engines |= rq->engine->mask; + rq->emitted_jiffies = jiffies; /* * Let the backend know a new request has arrived that may need @@ -1137,10 +1160,10 @@ void i915_request_add(struct i915_request *request) * run at the earliest possible convenience. */ local_bh_disable(); - i915_sw_fence_commit(&request->semaphore); + i915_sw_fence_commit(&rq->semaphore); rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) { - struct i915_sched_attr attr = request->gem_context->sched; + struct i915_sched_attr attr = rq->gem_context->sched; /* * Boost actual workloads past semaphores! @@ -1154,7 +1177,7 @@ void i915_request_add(struct i915_request *request) * far in the distance past over useful work, we keep a history * of any semaphore use along our dependency chain. */ - if (!(request->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) + if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) attr.priority |= I915_PRIORITY_NOSEMAPHORE; /* @@ -1163,15 +1186,29 @@ void i915_request_add(struct i915_request *request) * Allow interactive/synchronous clients to jump ahead of * the bulk clients. (FQ_CODEL) */ - if (list_empty(&request->sched.signalers_list)) + if (list_empty(&rq->sched.signalers_list)) attr.priority |= I915_PRIORITY_NEWCLIENT; - engine->schedule(request, &attr); + engine->schedule(rq, &attr); } rcu_read_unlock(); - i915_sw_fence_commit(&request->submit); + i915_sw_fence_commit(&rq->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ + return prev; +} + +void i915_request_add(struct i915_request *rq) +{ + struct i915_request *prev; + + lockdep_assert_held(&rq->timeline->mutex); + lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie); + + trace_i915_request_add(rq); + + prev = __i915_request_commit(rq); + /* * In typical scenarios, we do not expect the previous request on * the timeline to be still tracked by timeline->last_request if it @@ -1192,7 +1229,7 @@ void i915_request_add(struct i915_request *request) if (prev && i915_request_completed(prev)) i915_request_retire_upto(prev); - mutex_unlock(&request->timeline->mutex); + mutex_unlock(&rq->timeline->mutex); } static unsigned long local_clock_us(unsigned int *cpu) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index a982664618c2..36f13b74ec58 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -239,6 +239,13 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence) return fence->ops == &i915_fence_ops; } +struct i915_request * __must_check +__i915_request_create(struct intel_context *ce, gfp_t gfp); +struct i915_request * __must_check +i915_request_create(struct intel_context *ce); + +struct i915_request *__i915_request_commit(struct i915_request *request); + struct i915_request * __must_check i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index eb317759b5d3..5c496b11ab5c 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -235,10 +235,9 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay, static struct i915_request *alloc_request(struct intel_overlay *overlay) { - struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = dev_priv->engine[RCS0]; + struct intel_engine_cs *engine = overlay->i915->engine[RCS0]; - return i915_request_alloc(engine, dev_priv->kernel_context); + return i915_request_create(engine->kernel_context); } /* overlay needs to be disable in OCMD reg */ diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 27d8f853111b..eee838dc0634 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -46,7 +46,7 @@ static int __live_active_setup(struct drm_i915_private *i915, for_each_engine(engine, i915, id) { struct i915_request *rq; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index e43630b40fce..046a38743152 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -202,7 +202,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context); + rq = i915_request_create(i915->engine[RCS0]->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 12fc53c694a6..12203d665a4e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -468,7 +468,7 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) if (err) return err; - rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context); + rq = i915_request_create(i915->engine[RCS0]->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index e6ffe2240126..098d7b3aa131 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -551,8 +551,7 @@ static int live_nop_request(void *arg) times[1] = ktime_get_raw(); for (n = 0; n < prime; n++) { - request = i915_request_alloc(engine, - i915->kernel_context); + request = i915_request_create(engine->kernel_context); if (IS_ERR(request)) { err = PTR_ERR(request); goto out_unlock; @@ -649,7 +648,7 @@ empty_request(struct intel_engine_cs *engine, struct i915_request *request; int err; - request = i915_request_alloc(engine, engine->i915->kernel_context); + request = i915_request_create(engine->kernel_context); if (IS_ERR(request)) return request; @@ -853,7 +852,7 @@ static int live_all_engines(void *arg) } for_each_engine(engine, i915, id) { - request[id] = i915_request_alloc(engine, i915->kernel_context); + request[id] = i915_request_create(engine->kernel_context); if (IS_ERR(request[id])) { err = PTR_ERR(request[id]); pr_err("%s: Request allocation failed with err=%d\n", @@ -962,7 +961,7 @@ static int live_sequential_engines(void *arg) goto out_unlock; } - request[id] = i915_request_alloc(engine, i915->kernel_context); + request[id] = i915_request_create(engine->kernel_context); if (IS_ERR(request[id])) { err = PTR_ERR(request[id]); pr_err("%s: Request allocation failed for %s with err=%d\n", diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index bd96afcadfe7..ff9ebe50fae8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -454,7 +454,7 @@ tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) goto out; } - rq = i915_request_alloc(engine, engine->i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) goto out_unpin; @@ -678,7 +678,7 @@ static int live_hwsp_wrap(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out; From 79ffac8599c4d8aa84d313920d3d86d7361c252b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 21:07:17 +0100 Subject: [PATCH 029/591] drm/i915: Invert the GEM wakeref hierarchy In the current scheme, on submitting a request we take a single global GEM wakeref, which trickles down to wake up all GT power domains. This is undesirable as we would like to be able to localise our power management to the available power domains and to remove the global GEM operations from the heart of the driver. (The intent there is to push global GEM decisions to the boundary as used by the GEM user interface.) Now during request construction, each request is responsible via its logical context to acquire a wakeref on each power domain it intends to utilize. Currently, each request takes a wakeref on the engine(s) and the engines themselves take a chipset wakeref. This gives us a transition on each engine which we can extend if we want to insert more powermangement control (such as soft rc6). The global GEM operations that currently require a struct_mutex are reduced to listening to pm events from the chipset GT wakeref. As we reduce the struct_mutex requirement, these listeners should evaporate. Perhaps the biggest immediate change is that this removes the struct_mutex requirement around GT power management, allowing us greater flexibility in request construction. Another important knock-on effect, is that by tracking engine usage, we can insert a switch back to the kernel context on that engine immediately, avoiding any extra delay or inserting global synchronisation barriers. This makes tracking when an engine and its associated contexts are idle much easier -- important for when we forgo our assumed execution ordering and need idle barriers to unpin used contexts. In the process, it means we remove a large chunk of code whose only purpose was to switch back to the kernel context. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Imre Deak Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190424200717.1686-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 2 + drivers/gpu/drm/i915/gt/intel_context.c | 18 +- drivers/gpu/drm/i915/gt/intel_engine.h | 9 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 142 +--------- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 147 ++++++++++ drivers/gpu/drm/i915/gt/intel_engine_pm.h | 20 ++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 7 +- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 143 ++++++++++ drivers/gpu/drm/i915/gt/intel_gt_pm.h | 27 ++ drivers/gpu/drm/i915/gt/intel_hangcheck.c | 7 + drivers/gpu/drm/i915/gt/intel_lrc.c | 6 +- drivers/gpu/drm/i915/gt/intel_reset.c | 101 +------ drivers/gpu/drm/i915/gt/intel_reset.h | 1 - drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 13 +- drivers/gpu/drm/i915/gt/mock_engine.c | 3 + drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 49 +--- .../gpu/drm/i915/gt/selftest_workarounds.c | 5 +- drivers/gpu/drm/i915/i915_debugfs.c | 16 +- drivers/gpu/drm/i915/i915_drv.c | 5 +- drivers/gpu/drm/i915/i915_drv.h | 8 +- drivers/gpu/drm/i915/i915_gem.c | 41 +-- drivers/gpu/drm/i915/i915_gem.h | 3 - drivers/gpu/drm/i915/i915_gem_context.c | 85 +----- drivers/gpu/drm/i915/i915_gem_context.h | 4 - drivers/gpu/drm/i915/i915_gem_evict.c | 47 +--- drivers/gpu/drm/i915/i915_gem_pm.c | 262 ++++++------------ drivers/gpu/drm/i915/i915_gem_pm.h | 3 - drivers/gpu/drm/i915/i915_gpu_error.h | 4 - drivers/gpu/drm/i915/i915_request.c | 10 +- drivers/gpu/drm/i915/i915_request.h | 2 +- drivers/gpu/drm/i915/intel_uc.c | 22 +- drivers/gpu/drm/i915/intel_uc.h | 2 +- drivers/gpu/drm/i915/selftests/i915_gem.c | 16 +- .../gpu/drm/i915/selftests/i915_gem_context.c | 114 +------- .../gpu/drm/i915/selftests/i915_gem_object.c | 29 +- .../gpu/drm/i915/selftests/igt_flush_test.c | 32 ++- .../gpu/drm/i915/selftests/mock_gem_device.c | 15 +- 37 files changed, 591 insertions(+), 829 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pm.c create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pm.h create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_pm.c create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_pm.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 858642c7bc40..dd8d923aa1c6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -71,6 +71,8 @@ gt-y += \ gt/intel_breadcrumbs.o \ gt/intel_context.o \ gt/intel_engine_cs.o \ + gt/intel_engine_pm.o \ + gt/intel_gt_pm.o \ gt/intel_hangcheck.o \ gt/intel_lrc.o \ gt/intel_reset.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 4410e20e8e13..298e463ad082 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -10,6 +10,7 @@ #include "intel_context.h" #include "intel_engine.h" +#include "intel_engine_pm.h" static struct i915_global_context { struct i915_global base; @@ -162,7 +163,11 @@ intel_context_pin(struct i915_gem_context *ctx, return ERR_PTR(-EINTR); if (likely(!atomic_read(&ce->pin_count))) { - err = ce->ops->pin(ce); + intel_wakeref_t wakeref; + + err = 0; + with_intel_runtime_pm(ce->engine->i915, wakeref) + err = ce->ops->pin(ce); if (err) goto err; @@ -269,17 +274,10 @@ int __init i915_global_context_init(void) void intel_context_enter_engine(struct intel_context *ce) { - struct drm_i915_private *i915 = ce->gem_context->i915; - - if (!i915->gt.active_requests++) - i915_gem_unpark(i915); + intel_engine_pm_get(ce->engine); } void intel_context_exit_engine(struct intel_context *ce) { - struct drm_i915_private *i915 = ce->gem_context->i915; - - GEM_BUG_ON(!i915->gt.active_requests); - if (!--i915->gt.active_requests) - i915_gem_park(i915); + intel_engine_pm_put(ce->engine); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 72c7c337ace9..a228dc1774d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -382,6 +382,8 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); +void intel_engine_init_execlists(struct intel_engine_cs *engine); + void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); @@ -458,19 +460,14 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine, { if (engine->reset.reset) engine->reset.reset(engine, stalled); + engine->serial++; /* contexts lost */ } -void intel_engines_sanitize(struct drm_i915_private *i915, bool force); -void intel_gt_resume(struct drm_i915_private *i915); - bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); void intel_engine_lost_context(struct intel_engine_cs *engine); -void intel_engines_park(struct drm_i915_private *i915); -void intel_engines_unpark(struct drm_i915_private *i915); - void intel_engines_reset_default_submission(struct drm_i915_private *i915); unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 79ac56748b90..cbebe812b317 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -27,6 +27,7 @@ #include "i915_drv.h" #include "intel_engine.h" +#include "intel_engine_pm.h" #include "intel_lrc.h" #include "intel_reset.h" @@ -451,7 +452,7 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) i915_gem_batch_pool_init(&engine->batch_pool, engine); } -static void intel_engine_init_execlist(struct intel_engine_cs *engine) +void intel_engine_init_execlists(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -584,10 +585,11 @@ int intel_engine_setup_common(struct intel_engine_cs *engine) i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); intel_engine_init_breadcrumbs(engine); - intel_engine_init_execlist(engine); + intel_engine_init_execlists(engine); intel_engine_init_hangcheck(engine); intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); + intel_engine_init__pm(engine); /* Use the whole device by default */ engine->sseu = @@ -758,30 +760,6 @@ err_unpin: return ret; } -void intel_gt_resume(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* - * After resume, we may need to poke into the pinned kernel - * contexts to paper over any damage caused by the sudden suspend. - * Only the kernel contexts should remain pinned over suspend, - * allowing us to fixup the user contexts on their first pin. - */ - for_each_engine(engine, i915, id) { - struct intel_context *ce; - - ce = engine->kernel_context; - if (ce) - ce->ops->reset(ce); - - ce = engine->preempt_context; - if (ce) - ce->ops->reset(ce); - } -} - /** * intel_engines_cleanup_common - cleans up the engine state created by * the common initiailizers. @@ -1128,117 +1106,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915) engine->set_default_submission(engine); } -static bool reset_engines(struct drm_i915_private *i915) -{ - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) - return false; - - return intel_gpu_reset(i915, ALL_ENGINES) == 0; -} - -/** - * intel_engines_sanitize: called after the GPU has lost power - * @i915: the i915 device - * @force: ignore a failed reset and sanitize engine state anyway - * - * Anytime we reset the GPU, either with an explicit GPU reset or through a - * PCI power cycle, the GPU loses state and we must reset our state tracking - * to match. Note that calling intel_engines_sanitize() if the GPU has not - * been reset results in much confusion! - */ -void intel_engines_sanitize(struct drm_i915_private *i915, bool force) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - GEM_TRACE("\n"); - - if (!reset_engines(i915) && !force) - return; - - for_each_engine(engine, i915, id) - intel_engine_reset(engine, false); -} - -/** - * intel_engines_park: called when the GT is transitioning from busy->idle - * @i915: the i915 device - * - * The GT is now idle and about to go to sleep (maybe never to wake again?). - * Time for us to tidy and put away our toys (release resources back to the - * system). - */ -void intel_engines_park(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - /* Flush the residual irq tasklets first. */ - intel_engine_disarm_breadcrumbs(engine); - tasklet_kill(&engine->execlists.tasklet); - - /* - * We are committed now to parking the engines, make sure there - * will be no more interrupts arriving later and the engines - * are truly idle. - */ - if (wait_for(intel_engine_is_idle(engine), 10)) { - struct drm_printer p = drm_debug_printer(__func__); - - dev_err(i915->drm.dev, - "%s is not idle before parking\n", - engine->name); - intel_engine_dump(engine, &p, NULL); - } - - /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); - - if (engine->park) - engine->park(engine); - - if (engine->pinned_default_state) { - i915_gem_object_unpin_map(engine->default_state); - engine->pinned_default_state = NULL; - } - - i915_gem_batch_pool_fini(&engine->batch_pool); - engine->execlists.no_priolist = false; - } - - i915->gt.active_engines = 0; -} - -/** - * intel_engines_unpark: called when the GT is transitioning from idle->busy - * @i915: the i915 device - * - * The GT was idle and now about to fire up with some new user requests. - */ -void intel_engines_unpark(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - void *map; - - /* Pin the default state for fast resets from atomic context. */ - map = NULL; - if (engine->default_state) - map = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); - if (!IS_ERR_OR_NULL(map)) - engine->pinned_default_state = map; - - if (engine->unpark) - engine->unpark(engine); - - intel_engine_init_hangcheck(engine); - } -} - /** * intel_engine_lost_context: called when the GPU is reset into unknown state * @engine: the engine @@ -1523,6 +1390,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_reset_failed(engine->i915)) drm_printf(m, "*** WEDGED ***\n"); + drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); drm_printf(m, "\tHangcheck %x:%x [%d ms]\n", engine->hangcheck.last_seqno, engine->hangcheck.next_seqno, diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c new file mode 100644 index 000000000000..3976aea3c1d1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -0,0 +1,147 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" + +#include "intel_engine.h" +#include "intel_engine_pm.h" +#include "intel_gt_pm.h" + +static int intel_engine_unpark(struct intel_wakeref *wf) +{ + struct intel_engine_cs *engine = + container_of(wf, typeof(*engine), wakeref); + void *map; + + GEM_TRACE("%s\n", engine->name); + + intel_gt_pm_get(engine->i915); + + /* Pin the default state for fast resets from atomic context. */ + map = NULL; + if (engine->default_state) + map = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (!IS_ERR_OR_NULL(map)) + engine->pinned_default_state = map; + + if (engine->unpark) + engine->unpark(engine); + + intel_engine_init_hangcheck(engine); + return 0; +} + +void intel_engine_pm_get(struct intel_engine_cs *engine) +{ + intel_wakeref_get(engine->i915, &engine->wakeref, intel_engine_unpark); +} + +static bool switch_to_kernel_context(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + /* Already inside the kernel context, safe to power down. */ + if (engine->wakeref_serial == engine->serial) + return true; + + /* GPU is pointing to the void, as good as in the kernel context. */ + if (i915_reset_failed(engine->i915)) + return true; + + /* + * Note, we do this without taking the timeline->mutex. We cannot + * as we may be called while retiring the kernel context and so + * already underneath the timeline->mutex. Instead we rely on the + * exclusive property of the intel_engine_park that prevents anyone + * else from creating a request on this engine. This also requires + * that the ring is empty and we avoid any waits while constructing + * the context, as they assume protection by the timeline->mutex. + * This should hold true as we can only park the engine after + * retiring the last request, thus all rings should be empty and + * all timelines idle. + */ + rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); + if (IS_ERR(rq)) + /* Context switch failed, hope for the best! Maybe reset? */ + return true; + + /* Check again on the next retirement. */ + engine->wakeref_serial = engine->serial + 1; + __i915_request_commit(rq); + + return false; +} + +static int intel_engine_park(struct intel_wakeref *wf) +{ + struct intel_engine_cs *engine = + container_of(wf, typeof(*engine), wakeref); + + /* + * If one and only one request is completed between pm events, + * we know that we are inside the kernel context and it is + * safe to power down. (We are paranoid in case that runtime + * suspend causes corruption to the active context image, and + * want to avoid that impacting userspace.) + */ + if (!switch_to_kernel_context(engine)) + return -EBUSY; + + GEM_TRACE("%s\n", engine->name); + + intel_engine_disarm_breadcrumbs(engine); + + /* Must be reset upon idling, or we may miss the busy wakeup. */ + GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); + + if (engine->park) + engine->park(engine); + + if (engine->pinned_default_state) { + i915_gem_object_unpin_map(engine->default_state); + engine->pinned_default_state = NULL; + } + + engine->execlists.no_priolist = false; + + intel_gt_pm_put(engine->i915); + return 0; +} + +void intel_engine_pm_put(struct intel_engine_cs *engine) +{ + intel_wakeref_put(engine->i915, &engine->wakeref, intel_engine_park); +} + +void intel_engine_init__pm(struct intel_engine_cs *engine) +{ + intel_wakeref_init(&engine->wakeref); +} + +int intel_engines_resume(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + intel_gt_pm_get(i915); + for_each_engine(engine, i915, id) { + intel_engine_pm_get(engine); + engine->serial++; /* kernel context lost */ + err = engine->resume(engine); + intel_engine_pm_put(engine); + if (err) { + dev_err(i915->drm.dev, + "Failed to restart %s (%d)\n", + engine->name, err); + break; + } + } + intel_gt_pm_put(i915); + + return err; +} diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h new file mode 100644 index 000000000000..143ac90ba117 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -0,0 +1,20 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_ENGINE_PM_H +#define INTEL_ENGINE_PM_H + +struct drm_i915_private; +struct intel_engine_cs; + +void intel_engine_pm_get(struct intel_engine_cs *engine); +void intel_engine_pm_put(struct intel_engine_cs *engine); + +void intel_engine_init__pm(struct intel_engine_cs *engine); + +int intel_engines_resume(struct drm_i915_private *i915); + +#endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 3adf58da6d2c..d972c339309c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -20,6 +20,7 @@ #include "i915_selftest.h" #include "i915_timeline_types.h" #include "intel_sseu.h" +#include "intel_wakeref.h" #include "intel_workarounds_types.h" #define I915_MAX_SLICES 3 @@ -287,6 +288,10 @@ struct intel_engine_cs { struct intel_context *kernel_context; /* pinned */ struct intel_context *preempt_context; /* pinned; optional */ + unsigned long serial; + + unsigned long wakeref_serial; + struct intel_wakeref wakeref; struct drm_i915_gem_object *default_state; void *pinned_default_state; @@ -359,7 +364,7 @@ struct intel_engine_cs { void (*irq_enable)(struct intel_engine_cs *engine); void (*irq_disable)(struct intel_engine_cs *engine); - int (*init_hw)(struct intel_engine_cs *engine); + int (*resume)(struct intel_engine_cs *engine); struct { void (*prepare)(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c new file mode 100644 index 000000000000..ae7155f0e063 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -0,0 +1,143 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt_pm.h" +#include "intel_pm.h" +#include "intel_wakeref.h" + +static void pm_notify(struct drm_i915_private *i915, int state) +{ + blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915); +} + +static int intel_gt_unpark(struct intel_wakeref *wf) +{ + struct drm_i915_private *i915 = + container_of(wf, typeof(*i915), gt.wakeref); + + GEM_TRACE("\n"); + + /* + * It seems that the DMC likes to transition between the DC states a lot + * when there are no connected displays (no active power domains) during + * command submission. + * + * This activity has negative impact on the performance of the chip with + * huge latencies observed in the interrupt handler and elsewhere. + * + * Work around it by grabbing a GT IRQ power domain whilst there is any + * GT activity, preventing any DC state transitions. + */ + i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + GEM_BUG_ON(!i915->gt.awake); + + intel_enable_gt_powersave(i915); + + i915_update_gfx_val(i915); + if (INTEL_GEN(i915) >= 6) + gen6_rps_busy(i915); + + i915_pmu_gt_unparked(i915); + + i915_queue_hangcheck(i915); + + pm_notify(i915, INTEL_GT_UNPARK); + + return 0; +} + +void intel_gt_pm_get(struct drm_i915_private *i915) +{ + intel_wakeref_get(i915, &i915->gt.wakeref, intel_gt_unpark); +} + +static int intel_gt_park(struct intel_wakeref *wf) +{ + struct drm_i915_private *i915 = + container_of(wf, typeof(*i915), gt.wakeref); + intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake); + + GEM_TRACE("\n"); + + pm_notify(i915, INTEL_GT_PARK); + + i915_pmu_gt_parked(i915); + if (INTEL_GEN(i915) >= 6) + gen6_rps_idle(i915); + + GEM_BUG_ON(!wakeref); + intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); + + return 0; +} + +void intel_gt_pm_put(struct drm_i915_private *i915) +{ + intel_wakeref_put(i915, &i915->gt.wakeref, intel_gt_park); +} + +void intel_gt_pm_init(struct drm_i915_private *i915) +{ + intel_wakeref_init(&i915->gt.wakeref); + BLOCKING_INIT_NOTIFIER_HEAD(&i915->gt.pm_notifications); +} + +static bool reset_engines(struct drm_i915_private *i915) +{ + if (INTEL_INFO(i915)->gpu_reset_clobbers_display) + return false; + + return intel_gpu_reset(i915, ALL_ENGINES) == 0; +} + +/** + * intel_gt_sanitize: called after the GPU has lost power + * @i915: the i915 device + * @force: ignore a failed reset and sanitize engine state anyway + * + * Anytime we reset the GPU, either with an explicit GPU reset or through a + * PCI power cycle, the GPU loses state and we must reset our state tracking + * to match. Note that calling intel_gt_sanitize() if the GPU has not + * been reset results in much confusion! + */ +void intel_gt_sanitize(struct drm_i915_private *i915, bool force) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + GEM_TRACE("\n"); + + if (!reset_engines(i915) && !force) + return; + + for_each_engine(engine, i915, id) + intel_engine_reset(engine, false); +} + +void intel_gt_resume(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * After resume, we may need to poke into the pinned kernel + * contexts to paper over any damage caused by the sudden suspend. + * Only the kernel contexts should remain pinned over suspend, + * allowing us to fixup the user contexts on their first pin. + */ + for_each_engine(engine, i915, id) { + struct intel_context *ce; + + ce = engine->kernel_context; + if (ce) + ce->ops->reset(ce); + + ce = engine->preempt_context; + if (ce) + ce->ops->reset(ce); + } +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h new file mode 100644 index 000000000000..7dd1130a19a4 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_PM_H +#define INTEL_GT_PM_H + +#include + +struct drm_i915_private; + +enum { + INTEL_GT_UNPARK, + INTEL_GT_PARK, +}; + +void intel_gt_pm_get(struct drm_i915_private *i915); +void intel_gt_pm_put(struct drm_i915_private *i915); + +void intel_gt_pm_init(struct drm_i915_private *i915); + +void intel_gt_sanitize(struct drm_i915_private *i915, bool force); +void intel_gt_resume(struct drm_i915_private *i915); + +#endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c index 3053a706a561..e5eaa06fe74d 100644 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -256,6 +256,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) struct intel_engine_cs *engine; enum intel_engine_id id; unsigned int hung = 0, stuck = 0, wedged = 0; + intel_wakeref_t wakeref; if (!i915_modparams.enable_hangcheck) return; @@ -266,6 +267,10 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (i915_terminally_wedged(dev_priv)) return; + wakeref = intel_runtime_pm_get_if_in_use(dev_priv); + if (!wakeref) + return; + /* As enabling the GPU requires fairly extensive mmio access, * periodically arm the mmio checker to see if we are triggering * any invalid access. @@ -313,6 +318,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (hung) hangcheck_declare_hang(dev_priv, hung, stuck); + intel_runtime_pm_put(dev_priv, wakeref); + /* Reset timer in case GPU hangs without another request being added */ i915_queue_hangcheck(dev_priv); } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index edec7f183688..d17c08e26935 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1789,7 +1789,7 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine) return unexpected; } -static int gen8_init_common_ring(struct intel_engine_cs *engine) +static int execlists_resume(struct intel_engine_cs *engine) { intel_engine_apply_workarounds(engine); intel_engine_apply_whitelist(engine); @@ -1822,7 +1822,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) * completed the reset in i915_gem_reset_finish(). If a request * is completed by one engine, it may then queue a request * to a second via its execlists->tasklet *just* as we are - * calling engine->init_hw() and also writing the ELSP. + * calling engine->resume() and also writing the ELSP. * Turning off the execlists->tasklet until the reset is over * prevents the race. */ @@ -2391,7 +2391,7 @@ static void logical_ring_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overriden by each engine. */ - engine->init_hw = gen8_init_common_ring; + engine->resume = execlists_resume; engine->reset.prepare = execlists_reset_prepare; engine->reset.reset = execlists_reset; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index cdf184403d46..3424d28650af 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -9,6 +9,8 @@ #include "i915_drv.h" #include "i915_gpu_error.h" +#include "intel_engine_pm.h" +#include "intel_gt_pm.h" #include "intel_reset.h" #include "intel_guc.h" @@ -680,6 +682,7 @@ static void reset_prepare_engine(struct intel_engine_cs *engine) * written to the powercontext is undefined and so we may lose * GPU state upon resume, i.e. fail to restart after a reset. */ + intel_engine_pm_get(engine); intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); engine->reset.prepare(engine); } @@ -715,6 +718,7 @@ static void reset_prepare(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; + intel_gt_pm_get(i915); for_each_engine(engine, i915, id) reset_prepare_engine(engine); @@ -752,48 +756,10 @@ static int gt_reset(struct drm_i915_private *i915, static void reset_finish_engine(struct intel_engine_cs *engine) { engine->reset.finish(engine); + intel_engine_pm_put(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); } -struct i915_gpu_restart { - struct work_struct work; - struct drm_i915_private *i915; -}; - -static void restart_work(struct work_struct *work) -{ - struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work); - struct drm_i915_private *i915 = arg->i915; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - - wakeref = intel_runtime_pm_get(i915); - mutex_lock(&i915->drm.struct_mutex); - WRITE_ONCE(i915->gpu_error.restart, NULL); - - for_each_engine(engine, i915, id) { - struct i915_request *rq; - - /* - * Ostensibily, we always want a context loaded for powersaving, - * so if the engine is idle after the reset, send a request - * to load our scratch kernel_context. - */ - if (!intel_engine_is_idle(engine)) - continue; - - rq = i915_request_create(engine->kernel_context); - if (!IS_ERR(rq)) - i915_request_add(rq); - } - - mutex_unlock(&i915->drm.struct_mutex); - intel_runtime_pm_put(i915, wakeref); - - kfree(arg); -} - static void reset_finish(struct drm_i915_private *i915) { struct intel_engine_cs *engine; @@ -803,29 +769,7 @@ static void reset_finish(struct drm_i915_private *i915) reset_finish_engine(engine); intel_engine_signal_breadcrumbs(engine); } -} - -static void reset_restart(struct drm_i915_private *i915) -{ - struct i915_gpu_restart *arg; - - /* - * Following the reset, ensure that we always reload context for - * powersaving, and to correct engine->last_retired_context. Since - * this requires us to submit a request, queue a worker to do that - * task for us to evade any locking here. - */ - if (READ_ONCE(i915->gpu_error.restart)) - return; - - arg = kmalloc(sizeof(*arg), GFP_KERNEL); - if (arg) { - arg->i915 = i915; - INIT_WORK(&arg->work, restart_work); - - WRITE_ONCE(i915->gpu_error.restart, arg); - queue_work(i915->wq, &arg->work); - } + intel_gt_pm_put(i915); } static void nop_submit_request(struct i915_request *request) @@ -886,6 +830,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) * in nop_submit_request. */ synchronize_rcu_expedited(); + set_bit(I915_WEDGED, &error->flags); /* Mark all executing requests as skipped */ for_each_engine(engine, i915, id) @@ -893,9 +838,6 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) reset_finish(i915); - smp_mb__before_atomic(); - set_bit(I915_WEDGED, &error->flags); - GEM_TRACE("end\n"); } @@ -953,7 +895,7 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) } mutex_unlock(&i915->gt.timelines.mutex); - intel_engines_sanitize(i915, false); + intel_gt_sanitize(i915, false); /* * Undo nop_submit_request. We prevent all new i915 requests from @@ -1031,7 +973,6 @@ void i915_reset(struct drm_i915_private *i915, GEM_TRACE("flags=%lx\n", error->flags); might_sleep(); - assert_rpm_wakelock_held(i915); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); /* Clear any previous failed attempts at recovery. Time to try again. */ @@ -1084,8 +1025,6 @@ void i915_reset(struct drm_i915_private *i915, finish: reset_finish(i915); - if (!__i915_wedged(error)) - reset_restart(i915); return; taint: @@ -1134,6 +1073,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); + if (!intel_wakeref_active(&engine->wakeref)) + return 0; + reset_prepare_engine(engine); if (msg) @@ -1165,7 +1107,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) * have been reset to their default values. Follow the init_ring * process to program RING_MODE, HWSP and re-enable submission. */ - ret = engine->init_hw(engine); + ret = engine->resume(engine); if (ret) goto out; @@ -1422,25 +1364,6 @@ int i915_terminally_wedged(struct drm_i915_private *i915) return __i915_wedged(error) ? -EIO : 0; } -bool i915_reset_flush(struct drm_i915_private *i915) -{ - int err; - - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); - - flush_workqueue(i915->wq); - GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart)); - - mutex_lock(&i915->drm.struct_mutex); - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - MAX_SCHEDULE_TIMEOUT); - mutex_unlock(&i915->drm.struct_mutex); - - return !err; -} - static void i915_wedge_me(struct work_struct *work) { struct i915_wedge_me *w = container_of(work, typeof(*w), work.work); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 8e662bb43a9b..b52efaab4941 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -34,7 +34,6 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *reason); void i915_reset_request(struct i915_request *rq, bool guilty); -bool i915_reset_flush(struct drm_i915_private *i915); int __must_check i915_reset_trylock(struct drm_i915_private *i915); void i915_reset_unlock(struct drm_i915_private *i915, int tag); diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index f89541274d44..b791da2711e0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -637,12 +637,15 @@ static bool stop_ring(struct intel_engine_cs *engine) return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0; } -static int init_ring_common(struct intel_engine_cs *engine) +static int xcs_resume(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; struct intel_ring *ring = engine->buffer; int ret = 0; + GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n", + engine->name, ring->head, ring->tail); + intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); if (!stop_ring(engine)) { @@ -827,7 +830,7 @@ static int intel_rcs_ctx_init(struct i915_request *rq) return 0; } -static int init_render_ring(struct intel_engine_cs *engine) +static int rcs_resume(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -869,7 +872,7 @@ static int init_render_ring(struct intel_engine_cs *engine) if (IS_GEN_RANGE(dev_priv, 6, 7)) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - return init_ring_common(engine); + return xcs_resume(engine); } static void cancel_requests(struct intel_engine_cs *engine) @@ -2201,7 +2204,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, intel_ring_init_irq(dev_priv, engine); - engine->init_hw = init_ring_common; + engine->resume = xcs_resume; engine->reset.prepare = reset_prepare; engine->reset.reset = reset_ring; engine->reset.finish = reset_finish; @@ -2263,7 +2266,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (IS_HASWELL(dev_priv)) engine->emit_bb_start = hsw_emit_bb_start; - engine->init_hw = init_render_ring; + engine->resume = rcs_resume; ret = intel_init_ring_buffer(engine); if (ret) diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index bcfeb0c67997..a97a0ab35703 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -24,6 +24,7 @@ #include "i915_drv.h" #include "intel_context.h" +#include "intel_engine_pm.h" #include "mock_engine.h" #include "selftests/mock_request.h" @@ -268,6 +269,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE); intel_engine_init_breadcrumbs(&engine->base); + intel_engine_init_execlists(&engine->base); + intel_engine_init__pm(&engine->base); /* fake hw queue */ spin_lock_init(&engine->hw_lock); diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index acd33aa46068..9dece55a091c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -24,6 +24,8 @@ #include +#include "intel_engine_pm.h" + #include "i915_selftest.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" @@ -479,19 +481,6 @@ static int igt_reset_nop(void *arg) break; } - if (!i915_reset_flush(i915)) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("%s failed to idle after reset\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - err = -EIO; - break; - } - err = igt_flush_test(i915, 0); if (err) break; @@ -594,19 +583,6 @@ static int igt_reset_nop_engine(void *arg) err = -EINVAL; break; } - - if (!i915_reset_flush(i915)) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("%s failed to idle after reset\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - err = -EIO; - break; - } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); pr_info("%s(%s): %d resets\n", __func__, engine->name, count); @@ -669,6 +645,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine); + intel_engine_pm_get(engine); set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { if (active) { @@ -721,21 +698,9 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) err = -EINVAL; break; } - - if (!i915_reset_flush(i915)) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("%s failed to idle after reset\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - err = -EIO; - break; - } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + intel_engine_pm_put(engine); if (err) break; @@ -942,6 +907,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, get_task_struct(tsk); } + intel_engine_pm_get(engine); set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { struct i915_request *rq = NULL; @@ -1018,6 +984,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + intel_engine_pm_put(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); @@ -1069,7 +1036,9 @@ unwind: if (err) break; - err = igt_flush_test(i915, 0); + mutex_lock(&i915->drm.struct_mutex); + err = igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); if (err) break; } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index e61e47421ed2..6b9e9b6d82f7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -71,7 +71,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { const u32 base = engine->mmio_base; struct drm_i915_gem_object *result; - intel_wakeref_t wakeref; struct i915_request *rq; struct i915_vma *vma; u32 srm, *cs; @@ -103,9 +102,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) if (err) goto err_obj; - rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(engine->i915, wakeref) - rq = i915_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, ctx); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_pin; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f77263d42253..6972f9b6ae83 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2041,8 +2041,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) } seq_printf(m, "RPS enabled? %d\n", rps->enabled); - seq_printf(m, "GPU busy? %s [%d requests]\n", - yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); + seq_printf(m, "GPU busy? %s\n", yesno(dev_priv->gt.awake)); seq_printf(m, "Boosts outstanding? %d\n", atomic_read(&rps->num_waiters)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); @@ -2061,9 +2060,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); - if (INTEL_GEN(dev_priv) >= 6 && - rps->enabled && - dev_priv->gt.active_requests) { + if (INTEL_GEN(dev_priv) >= 6 && rps->enabled && dev_priv->gt.awake) { u32 rpup, rpupei; u32 rpdown, rpdownei; @@ -3093,9 +3090,9 @@ static int i915_engine_info(struct seq_file *m, void *unused) wakeref = intel_runtime_pm_get(dev_priv); - seq_printf(m, "GT awake? %s\n", yesno(dev_priv->gt.awake)); - seq_printf(m, "Global active requests: %d\n", - dev_priv->gt.active_requests); + seq_printf(m, "GT awake? %s [%d]\n", + yesno(dev_priv->gt.awake), + atomic_read(&dev_priv->gt.wakeref.count)); seq_printf(m, "CS timestamp frequency: %u kHz\n", RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz); @@ -3941,8 +3938,7 @@ i915_drop_caches_set(void *data, u64 val) if (val & DROP_IDLE) { do { - if (READ_ONCE(i915->gt.active_requests)) - flush_delayed_work(&i915->gem.retire_work); + flush_delayed_work(&i915->gem.retire_work); drain_delayed_work(&i915->gem.idle_work); } while (READ_ONCE(i915->gt.awake)); } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ac416d2c02ca..824409ffd03f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -47,8 +47,9 @@ #include #include -#include "gt/intel_workarounds.h" +#include "gt/intel_gt_pm.h" #include "gt/intel_reset.h" +#include "gt/intel_workarounds.h" #include "i915_drv.h" #include "i915_pmu.h" @@ -2323,7 +2324,7 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_power_domains_resume(dev_priv); - intel_engines_sanitize(dev_priv, true); + intel_gt_sanitize(dev_priv, true); enable_rpm_wakeref_asserts(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 45e027f45e62..5c77bf5b735b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2006,10 +2006,10 @@ struct drm_i915_private { struct list_head hwsp_free_list; } timelines; - intel_engine_mask_t active_engines; struct list_head active_rings; struct list_head closed_vma; - u32 active_requests; + + struct intel_wakeref wakeref; /** * Is the GPU currently considered idle, or busy executing @@ -2020,12 +2020,16 @@ struct drm_i915_private { */ intel_wakeref_t awake; + struct blocking_notifier_head pm_notifications; + ktime_t last_init_time; struct i915_vma *scratch; } gt; struct { + struct notifier_block pm_notifier; + /** * We leave the user IRQ off as much as possible, * but this means that requests will finish and never diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7f833c97138e..54f27cabae2a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -39,6 +39,8 @@ #include #include +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt_pm.h" #include "gt/intel_mocs.h" #include "gt/intel_reset.h" #include "gt/intel_workarounds.h" @@ -2888,9 +2890,6 @@ wait_for_timelines(struct drm_i915_private *i915, struct i915_gt_timelines *gt = &i915->gt.timelines; struct i915_timeline *tl; - if (!READ_ONCE(i915->gt.active_requests)) - return timeout; - mutex_lock(>->mutex); list_for_each_entry(tl, >->active_list, link) { struct i915_request *rq; @@ -2930,9 +2929,10 @@ wait_for_timelines(struct drm_i915_private *i915, int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags, long timeout) { - GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", + GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n", flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", - timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); + timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "", + yesno(i915->gt.awake)); /* If the device is asleep, we have no requests outstanding */ if (!READ_ONCE(i915->gt.awake)) @@ -4154,7 +4154,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915) * it may impact the display and we are uncertain about the stability * of the reset, so this could be applied to even earlier gen. */ - intel_engines_sanitize(i915, false); + intel_gt_sanitize(i915, false); intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); intel_runtime_pm_put(i915, wakeref); @@ -4212,27 +4212,6 @@ static void init_unused_rings(struct drm_i915_private *dev_priv) } } -static int __i915_gem_restart_engines(void *data) -{ - struct drm_i915_private *i915 = data; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err; - - for_each_engine(engine, i915, id) { - err = engine->init_hw(engine); - if (err) { - DRM_ERROR("Failed to restart %s (%d)\n", - engine->name, err); - return err; - } - } - - intel_engines_set_scheduler_caps(i915); - - return 0; -} - int i915_gem_init_hw(struct drm_i915_private *dev_priv) { int ret; @@ -4291,12 +4270,13 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) intel_mocs_init_l3cc_table(dev_priv); /* Only when the HW is re-initialised, can we replay the requests */ - ret = __i915_gem_restart_engines(dev_priv); + ret = intel_engines_resume(dev_priv); if (ret) goto cleanup_uc; intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); + intel_engines_set_scheduler_caps(dev_priv); return 0; cleanup_uc: @@ -4602,6 +4582,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) err_init_hw: mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_set_wedged(dev_priv); i915_gem_suspend(dev_priv); i915_gem_suspend_late(dev_priv); @@ -4663,6 +4644,8 @@ err_uc_misc: void i915_gem_fini(struct drm_i915_private *dev_priv) { + GEM_BUG_ON(dev_priv->gt.awake); + i915_gem_suspend_late(dev_priv); intel_disable_gt_powersave(dev_priv); @@ -4757,6 +4740,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) { int err; + intel_gt_pm_init(dev_priv); + INIT_LIST_HEAD(&dev_priv->gt.active_rings); INIT_LIST_HEAD(&dev_priv->gt.closed_vma); diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 9074eb1e843f..67f8a4a807a0 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -75,9 +75,6 @@ struct drm_i915_private; #define I915_GEM_IDLE_TIMEOUT (HZ / 5) -void i915_gem_park(struct drm_i915_private *i915); -void i915_gem_unpark(struct drm_i915_private *i915); - static inline void __tasklet_disable_sync_once(struct tasklet_struct *t) { if (!atomic_fetch_inc(&t->count)) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3eb1a664b5fa..76ed74e75d82 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -824,26 +824,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, return 0; } -static struct i915_request * -last_request_on_engine(struct i915_timeline *timeline, - struct intel_engine_cs *engine) -{ - struct i915_request *rq; - - GEM_BUG_ON(timeline == &engine->timeline); - - rq = i915_active_request_raw(&timeline->last_request, - &engine->i915->drm.struct_mutex); - if (rq && rq->engine->mask & engine->mask) { - GEM_TRACE("last request on engine %s: %llx:%llu\n", - engine->name, rq->fence.context, rq->fence.seqno); - GEM_BUG_ON(rq->timeline != timeline); - return rq; - } - - return NULL; -} - struct context_barrier_task { struct i915_active base; void (*task)(void *data); @@ -871,7 +851,6 @@ static int context_barrier_task(struct i915_gem_context *ctx, struct drm_i915_private *i915 = ctx->i915; struct context_barrier_task *cb; struct intel_context *ce, *next; - intel_wakeref_t wakeref; int err = 0; lockdep_assert_held(&i915->drm.struct_mutex); @@ -884,7 +863,6 @@ static int context_barrier_task(struct i915_gem_context *ctx, i915_active_init(i915, &cb->base, cb_retire); i915_active_acquire(&cb->base); - wakeref = intel_runtime_pm_get(i915); rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) { struct intel_engine_cs *engine = ce->engine; struct i915_request *rq; @@ -914,7 +892,6 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (err) break; } - intel_runtime_pm_put(i915, wakeref); cb->task = err ? NULL : task; /* caller needs to unwind instead */ cb->data = data; @@ -924,54 +901,6 @@ static int context_barrier_task(struct i915_gem_context *ctx, return err; } -int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, - intel_engine_mask_t mask) -{ - struct intel_engine_cs *engine; - - GEM_TRACE("awake?=%s\n", yesno(i915->gt.awake)); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(!i915->kernel_context); - - /* Inoperable, so presume the GPU is safely pointing into the void! */ - if (i915_terminally_wedged(i915)) - return 0; - - for_each_engine_masked(engine, i915, mask, mask) { - struct intel_ring *ring; - struct i915_request *rq; - - rq = i915_request_create(engine->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - /* Queue this switch after all other activity */ - list_for_each_entry(ring, &i915->gt.active_rings, active_link) { - struct i915_request *prev; - - prev = last_request_on_engine(ring->timeline, engine); - if (!prev) - continue; - - if (prev->gem_context == i915->kernel_context) - continue; - - GEM_TRACE("add barrier on %s for %llx:%lld\n", - engine->name, - prev->fence.context, - prev->fence.seqno); - i915_sw_fence_await_sw_fence_gfp(&rq->submit, - &prev->submit, - I915_FENCE_GFP); - } - - i915_request_add(rq); - } - - return 0; -} - static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1169,9 +1098,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, static int gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) { - struct drm_i915_private *i915 = ce->engine->i915; struct i915_request *rq; - intel_wakeref_t wakeref; int ret; lockdep_assert_held(&ce->pin_mutex); @@ -1185,14 +1112,9 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) if (!intel_context_is_pinned(ce)) return 0; - /* Submitting requests etc needs the hw awake. */ - wakeref = intel_runtime_pm_get(i915); - rq = i915_request_create(ce->engine->kernel_context); - if (IS_ERR(rq)) { - ret = PTR_ERR(rq); - goto out_put; - } + if (IS_ERR(rq)) + return PTR_ERR(rq); /* Queue this switch after all other activity by this context. */ ret = i915_active_request_set(&ce->ring->timeline->last_request, rq); @@ -1216,9 +1138,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) out_add: i915_request_add(rq); -out_put: - intel_runtime_pm_put(i915, wakeref); - return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index cec278ab04e2..5a8e080499fb 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -141,10 +141,6 @@ int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file); void i915_gem_context_close(struct drm_file *file); -int i915_switch_context(struct i915_request *rq); -int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask); - void i915_gem_context_release(struct kref *ctx_ref); struct i915_gem_context * i915_gem_context_create_gvt(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 060f5903544a..0bdb3e072ba5 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -36,15 +36,8 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl { bool fail_if_busy:1; } igt_evict_ctl;) -static bool ggtt_is_idle(struct drm_i915_private *i915) -{ - return !i915->gt.active_requests; -} - static int ggtt_flush(struct drm_i915_private *i915) { - int err; - /* * Not everything in the GGTT is tracked via vma (otherwise we * could evict as required with minimal stalling) so we are forced @@ -52,19 +45,10 @@ static int ggtt_flush(struct drm_i915_private *i915) * the hopes that we can then remove contexts and the like only * bound by their active reference. */ - err = i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines); - if (err) - return err; - - err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (err) - return err; - - GEM_BUG_ON(!ggtt_is_idle(i915)); - return 0; + return i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); } static bool @@ -222,24 +206,17 @@ search_again: * us a termination condition, when the last retired context is * the kernel's there is no more we can evict. */ - if (!ggtt_is_idle(dev_priv)) { - if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) - return -EBUSY; + if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) + return -EBUSY; - ret = ggtt_flush(dev_priv); - if (ret) - return ret; + ret = ggtt_flush(dev_priv); + if (ret) + return ret; - cond_resched(); - goto search_again; - } + cond_resched(); - /* - * If we still have pending pageflip completions, drop - * back to userspace to give our workqueues time to - * acquire our locks and unpin the old scanouts. - */ - return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC; + flags |= PIN_NONBLOCK; + goto search_again; found: /* drm_mm doesn't allow any other other operations while diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c index 9fb0e8d567a2..3554d55dae35 100644 --- a/drivers/gpu/drm/i915/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/i915_gem_pm.c @@ -4,136 +4,63 @@ * Copyright © 2019 Intel Corporation */ +#include "gt/intel_gt_pm.h" + #include "i915_drv.h" #include "i915_gem_pm.h" #include "i915_globals.h" -#include "intel_pm.h" -static void __i915_gem_park(struct drm_i915_private *i915) +static void i915_gem_park(struct drm_i915_private *i915) { - intel_wakeref_t wakeref; - - GEM_TRACE("\n"); + struct intel_engine_cs *engine; + enum intel_engine_id id; lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(i915->gt.active_requests); - GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); - if (!i915->gt.awake) - return; + for_each_engine(engine, i915, id) { + /* + * We are committed now to parking the engines, make sure there + * will be no more interrupts arriving later and the engines + * are truly idle. + */ + if (wait_for(intel_engine_is_idle(engine), 10)) { + struct drm_printer p = drm_debug_printer(__func__); - /* - * Be paranoid and flush a concurrent interrupt to make sure - * we don't reactivate any irq tasklets after parking. - * - * FIXME: Note that even though we have waited for execlists to be idle, - * there may still be an in-flight interrupt even though the CSB - * is now empty. synchronize_irq() makes sure that a residual interrupt - * is completed before we continue, but it doesn't prevent the HW from - * raising a spurious interrupt later. To complete the shield we should - * coordinate disabling the CS irq with flushing the interrupts. - */ - synchronize_irq(i915->drm.irq); - - intel_engines_park(i915); - i915_timelines_park(i915); - - i915_pmu_gt_parked(i915); - i915_vma_parked(i915); - - wakeref = fetch_and_zero(&i915->gt.awake); - GEM_BUG_ON(!wakeref); - - if (INTEL_GEN(i915) >= 6) - gen6_rps_idle(i915); - - intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); - - i915_globals_park(); -} - -static bool switch_to_kernel_context_sync(struct drm_i915_private *i915, - unsigned long mask) -{ - bool result = true; - - /* - * Even if we fail to switch, give whatever is running a small chance - * to save itself before we report the failure. Yes, this may be a - * false positive due to e.g. ENOMEM, caveat emptor! - */ - if (i915_gem_switch_to_kernel_context(i915, mask)) - result = false; - - if (i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT)) - result = false; - - if (!result) { - if (i915_modparams.reset) { /* XXX hide warning from gem_eio */ dev_err(i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); + "%s is not idle before parking\n", + engine->name); + intel_engine_dump(engine, &p, NULL); } + tasklet_kill(&engine->execlists.tasklet); - /* Forcibly cancel outstanding work and leave the gpu quiet. */ - i915_gem_set_wedged(i915); + i915_gem_batch_pool_fini(&engine->batch_pool); } - i915_retire_requests(i915); /* ensure we flush after wedging */ - return result; + i915_timelines_park(i915); + i915_vma_parked(i915); + + i915_globals_park(); } static void idle_work_handler(struct work_struct *work) { struct drm_i915_private *i915 = container_of(work, typeof(*i915), gem.idle_work.work); - bool rearm_hangcheck; - - if (!READ_ONCE(i915->gt.awake)) - return; - - if (READ_ONCE(i915->gt.active_requests)) - return; - - rearm_hangcheck = - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); if (!mutex_trylock(&i915->drm.struct_mutex)) { /* Currently busy, come back later */ mod_delayed_work(i915->wq, &i915->gem.idle_work, msecs_to_jiffies(50)); - goto out_rearm; + return; } - /* - * Flush out the last user context, leaving only the pinned - * kernel context resident. Should anything unfortunate happen - * while we are idle (such as the GPU being power cycled), no users - * will be harmed. - */ - if (!work_pending(&i915->gem.idle_work.work) && - !i915->gt.active_requests) { - ++i915->gt.active_requests; /* don't requeue idle */ - - switch_to_kernel_context_sync(i915, i915->gt.active_engines); - - if (!--i915->gt.active_requests) { - __i915_gem_park(i915); - rearm_hangcheck = false; - } - } + intel_wakeref_lock(&i915->gt.wakeref); + if (!intel_wakeref_active(&i915->gt.wakeref)) + i915_gem_park(i915); + intel_wakeref_unlock(&i915->gt.wakeref); mutex_unlock(&i915->drm.struct_mutex); - -out_rearm: - if (rearm_hangcheck) { - GEM_BUG_ON(!i915->gt.awake); - i915_queue_hangcheck(i915); - } } static void retire_work_handler(struct work_struct *work) @@ -147,97 +74,76 @@ static void retire_work_handler(struct work_struct *work) mutex_unlock(&i915->drm.struct_mutex); } - /* - * Keep the retire handler running until we are finally idle. - * We do not need to do this test under locking as in the worst-case - * we queue the retire worker once too often. - */ - if (READ_ONCE(i915->gt.awake)) + if (intel_wakeref_active(&i915->gt.wakeref)) queue_delayed_work(i915->wq, &i915->gem.retire_work, round_jiffies_up_relative(HZ)); } -void i915_gem_park(struct drm_i915_private *i915) +static int pm_notifier(struct notifier_block *nb, + unsigned long action, + void *data) { - GEM_TRACE("\n"); + struct drm_i915_private *i915 = + container_of(nb, typeof(*i915), gem.pm_notifier); - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(i915->gt.active_requests); + switch (action) { + case INTEL_GT_UNPARK: + i915_globals_unpark(); + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); + break; - if (!i915->gt.awake) - return; + case INTEL_GT_PARK: + mod_delayed_work(i915->wq, + &i915->gem.idle_work, + msecs_to_jiffies(100)); + break; + } - /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ - mod_delayed_work(i915->wq, &i915->gem.idle_work, msecs_to_jiffies(100)); + return NOTIFY_OK; } -void i915_gem_unpark(struct drm_i915_private *i915) +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) { - GEM_TRACE("\n"); + bool result = true; - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(!i915->gt.active_requests); - assert_rpm_wakelock_held(i915); + do { + if (i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_FOR_IDLE_BOOST, + I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* XXX hide warning from gem_eio */ + if (i915_modparams.reset) { + dev_err(i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); + } - if (i915->gt.awake) - return; + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + i915_gem_set_wedged(i915); + result = false; + } + } while (i915_retire_requests(i915) && result); - /* - * It seems that the DMC likes to transition between the DC states a lot - * when there are no connected displays (no active power domains) during - * command submission. - * - * This activity has negative impact on the performance of the chip with - * huge latencies observed in the interrupt handler and elsewhere. - * - * Work around it by grabbing a GT IRQ power domain whilst there is any - * GT activity, preventing any DC state transitions. - */ - i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); - GEM_BUG_ON(!i915->gt.awake); - - i915_globals_unpark(); - - intel_enable_gt_powersave(i915); - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); - i915_pmu_gt_unparked(i915); - - intel_engines_unpark(i915); - - i915_queue_hangcheck(i915); - - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); + GEM_BUG_ON(i915->gt.awake); + return result; } bool i915_gem_load_power_context(struct drm_i915_private *i915) { - /* Force loading the kernel context on all engines */ - if (!switch_to_kernel_context_sync(i915, ALL_ENGINES)) - return false; - - /* - * Immediately park the GPU so that we enable powersaving and - * treat it as idle. The next time we issue a request, we will - * unpark and start using the engine->pinned_default_state, otherwise - * it is in limbo and an early reset may fail. - */ - __i915_gem_park(i915); - - return true; + return switch_to_kernel_context_sync(i915); } void i915_gem_suspend(struct drm_i915_private *i915) { - intel_wakeref_t wakeref; - GEM_TRACE("\n"); - wakeref = intel_runtime_pm_get(i915); + flush_workqueue(i915->wq); mutex_lock(&i915->drm.struct_mutex); @@ -250,10 +156,16 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - switch_to_kernel_context_sync(i915, i915->gt.active_engines); + switch_to_kernel_context_sync(i915); mutex_unlock(&i915->drm.struct_mutex); - i915_reset_flush(i915); + + /* + * Assert that we successfully flushed all the work and + * reset the GPU back to its idle, low power state. + */ + GEM_BUG_ON(i915->gt.awake); + cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); drain_delayed_work(&i915->gem.retire_work); @@ -263,17 +175,9 @@ void i915_gem_suspend(struct drm_i915_private *i915) */ drain_delayed_work(&i915->gem.idle_work); - flush_workqueue(i915->wq); - - /* - * Assert that we successfully flushed all the work and - * reset the GPU back to its idle, low power state. - */ - GEM_BUG_ON(i915->gt.awake); + i915_gem_drain_freed_objects(i915); intel_uc_suspend(i915); - - intel_runtime_pm_put(i915, wakeref); } void i915_gem_suspend_late(struct drm_i915_private *i915) @@ -362,4 +266,8 @@ void i915_gem_init__pm(struct drm_i915_private *i915) { INIT_DELAYED_WORK(&i915->gem.idle_work, idle_work_handler); INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); + + i915->gem.pm_notifier.notifier_call = pm_notifier; + blocking_notifier_chain_register(&i915->gt.pm_notifications, + &i915->gem.pm_notifier); } diff --git a/drivers/gpu/drm/i915/i915_gem_pm.h b/drivers/gpu/drm/i915/i915_gem_pm.h index 52f65e3f06b5..6f7d5d11ac3b 100644 --- a/drivers/gpu/drm/i915/i915_gem_pm.h +++ b/drivers/gpu/drm/i915/i915_gem_pm.h @@ -17,9 +17,6 @@ void i915_gem_init__pm(struct drm_i915_private *i915); bool i915_gem_load_power_context(struct drm_i915_private *i915); void i915_gem_resume(struct drm_i915_private *i915); -void i915_gem_unpark(struct drm_i915_private *i915); -void i915_gem_park(struct drm_i915_private *i915); - void i915_gem_idle_work_handler(struct work_struct *work); void i915_gem_suspend(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index b419d0f59275..2ecd0c6a1c94 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -179,8 +179,6 @@ struct i915_gpu_state { struct scatterlist *sgl, *fit; }; -struct i915_gpu_restart; - struct i915_gpu_error { /* For hangcheck timer */ #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ @@ -241,8 +239,6 @@ struct i915_gpu_error { wait_queue_head_t reset_queue; struct srcu_struct reset_backoff_srcu; - - struct i915_gpu_restart *restart; }; struct drm_i915_error_state_buf { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 705c125bafc6..11c484e679b6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -430,6 +430,8 @@ void __i915_request_submit(struct i915_request *request) /* Transfer from per-context onto the global per-engine timeline */ move_to_timeline(request, &engine->timeline); + engine->serial++; + trace_i915_request_execute(request); } @@ -1145,7 +1147,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) list_add_tail(&rq->ring_link, &ring->request_list); if (list_is_first(&rq->ring_link, &ring->request_list)) list_add(&ring->active_link, &rq->i915->gt.active_rings); - rq->i915->gt.active_engines |= rq->engine->mask; rq->emitted_jiffies = jiffies; /* @@ -1440,21 +1441,20 @@ out: return timeout; } -void i915_retire_requests(struct drm_i915_private *i915) +bool i915_retire_requests(struct drm_i915_private *i915) { struct intel_ring *ring, *tmp; lockdep_assert_held(&i915->drm.struct_mutex); - if (!i915->gt.active_requests) - return; - list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link) { intel_ring_get(ring); /* last rq holds reference! */ ring_retire_requests(ring); intel_ring_put(ring); } + + return !list_empty(&i915->gt.active_rings); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 36f13b74ec58..1eee7416af31 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -425,6 +425,6 @@ static inline void i915_request_mark_complete(struct i915_request *rq) rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */ } -void i915_retire_requests(struct drm_i915_private *i915); +bool i915_retire_requests(struct drm_i915_private *i915); #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 30a8e376d19f..01ea36e3150c 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -481,26 +481,22 @@ void intel_uc_reset_prepare(struct drm_i915_private *i915) intel_uc_sanitize(i915); } -int intel_uc_suspend(struct drm_i915_private *i915) +void intel_uc_suspend(struct drm_i915_private *i915) { struct intel_guc *guc = &i915->guc; + intel_wakeref_t wakeref; int err; - if (!USES_GUC(i915)) - return 0; - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; + return; - err = intel_guc_suspend(guc); - if (err) { - DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); - return err; + with_intel_runtime_pm(i915, wakeref) { + err = intel_guc_suspend(guc); + if (err) + DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); + + guc_disable_communication(guc); } - - guc_disable_communication(guc); - - return 0; } int intel_uc_resume(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index c14729786652..c92436b1f1c5 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -39,7 +39,7 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv); int intel_uc_init(struct drm_i915_private *dev_priv); void intel_uc_fini(struct drm_i915_private *dev_priv); void intel_uc_reset_prepare(struct drm_i915_private *i915); -int intel_uc_suspend(struct drm_i915_private *dev_priv); +void intel_uc_suspend(struct drm_i915_private *i915); int intel_uc_resume(struct drm_i915_private *dev_priv); static inline bool intel_uc_is_using_guc(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 6fd70d326468..0342de369d3e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -16,26 +16,18 @@ static int switch_to_context(struct drm_i915_private *i915, { struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; - int err = 0; - - wakeref = intel_runtime_pm_get(i915); for_each_engine(engine, i915, id) { struct i915_request *rq; rq = i915_request_alloc(engine, ctx); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - break; - } + if (IS_ERR(rq)) + return PTR_ERR(rq); i915_request_add(rq); } - intel_runtime_pm_put(i915, wakeref); - - return err; + return 0; } static void trash_stolen(struct drm_i915_private *i915) @@ -120,7 +112,7 @@ static void pm_resume(struct drm_i915_private *i915) * that runtime-pm just works. */ with_intel_runtime_pm(i915, wakeref) { - intel_engines_sanitize(i915, false); + intel_gt_sanitize(i915, false); i915_gem_sanitize(i915); i915_gem_resume(i915); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 9d646fa1b74e..71d896bbade2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -1608,113 +1608,6 @@ __engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines) return "none"; } -static int __igt_switch_to_kernel_context(struct drm_i915_private *i915, - struct i915_gem_context *ctx, - intel_engine_mask_t engines) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - int pass; - - GEM_TRACE("Testing %s\n", __engine_name(i915, engines)); - for (pass = 0; pass < 4; pass++) { /* Once busy; once idle; repeat */ - bool from_idle = pass & 1; - int err; - - if (!from_idle) { - for_each_engine_masked(engine, i915, engines, tmp) { - struct i915_request *rq; - - rq = i915_request_alloc(engine, ctx); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - i915_request_add(rq); - } - } - - err = i915_gem_switch_to_kernel_context(i915, - i915->gt.active_engines); - if (err) - return err; - - if (!from_idle) { - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (err) - return err; - } - - if (i915->gt.active_requests) { - pr_err("%d active requests remain after switching to kernel context, pass %d (%s) on %s engine%s\n", - i915->gt.active_requests, - pass, from_idle ? "idle" : "busy", - __engine_name(i915, engines), - is_power_of_2(engines) ? "" : "s"); - return -EINVAL; - } - - /* XXX Bonus points for proving we are the kernel context! */ - - mutex_unlock(&i915->drm.struct_mutex); - drain_delayed_work(&i915->gem.idle_work); - mutex_lock(&i915->drm.struct_mutex); - } - - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - return -EIO; - - return 0; -} - -static int igt_switch_to_kernel_context(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - enum intel_engine_id id; - intel_wakeref_t wakeref; - int err; - - /* - * A core premise of switching to the kernel context is that - * if an engine is already idling in the kernel context, we - * do not emit another request and wake it up. The other being - * that we do indeed end up idling in the kernel context. - */ - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - ctx = kernel_context(i915); - if (IS_ERR(ctx)) { - mutex_unlock(&i915->drm.struct_mutex); - return PTR_ERR(ctx); - } - - /* First check idling each individual engine */ - for_each_engine(engine, i915, id) { - err = __igt_switch_to_kernel_context(i915, ctx, BIT(id)); - if (err) - goto out_unlock; - } - - /* Now en masse */ - err = __igt_switch_to_kernel_context(i915, ctx, ALL_ENGINES); - if (err) - goto out_unlock; - -out_unlock: - GEM_TRACE_DUMP_ON(err); - - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - - kernel_context_close(ctx); - return err; -} - static void mock_barrier_task(void *data) { unsigned int *counter = data; @@ -1729,7 +1622,6 @@ static int mock_context_barrier(void *arg) struct drm_i915_private *i915 = arg; struct i915_gem_context *ctx; struct i915_request *rq; - intel_wakeref_t wakeref; unsigned int counter; int err; @@ -1772,9 +1664,7 @@ static int mock_context_barrier(void *arg) goto out; } - rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(i915, wakeref) - rq = i915_request_alloc(i915->engine[RCS0], ctx); + rq = i915_request_alloc(i915->engine[RCS0], ctx); if (IS_ERR(rq)) { pr_err("Request allocation failed!\n"); goto out; @@ -1824,7 +1714,6 @@ unlock: int i915_gem_context_mock_selftests(void) { static const struct i915_subtest tests[] = { - SUBTEST(igt_switch_to_kernel_context), SUBTEST(mock_context_barrier), }; struct drm_i915_private *i915; @@ -1843,7 +1732,6 @@ int i915_gem_context_mock_selftests(void) int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) { static const struct i915_subtest tests[] = { - SUBTEST(igt_switch_to_kernel_context), SUBTEST(live_nop_switch), SUBTEST(igt_ctx_exec), SUBTEST(igt_ctx_readonly), diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 12203d665a4e..088b2aa05dcd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -24,6 +24,7 @@ #include "../i915_selftest.h" +#include "igt_flush_test.h" #include "mock_gem_device.h" #include "huge_gem_object.h" @@ -505,19 +506,23 @@ static void disable_retire_worker(struct drm_i915_private *i915) { i915_gem_shrinker_unregister(i915); - mutex_lock(&i915->drm.struct_mutex); - if (!i915->gt.active_requests++) { - intel_wakeref_t wakeref; - - with_intel_runtime_pm(i915, wakeref) - i915_gem_unpark(i915); - } - mutex_unlock(&i915->drm.struct_mutex); + intel_gt_pm_get(i915); cancel_delayed_work_sync(&i915->gem.retire_work); cancel_delayed_work_sync(&i915->gem.idle_work); } +static void restore_retire_worker(struct drm_i915_private *i915) +{ + intel_gt_pm_put(i915); + + mutex_lock(&i915->drm.struct_mutex); + igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + + i915_gem_shrinker_register(i915); +} + static int igt_mmap_offset_exhaustion(void *arg) { struct drm_i915_private *i915 = arg; @@ -615,13 +620,7 @@ static int igt_mmap_offset_exhaustion(void *arg) out: drm_mm_remove_node(&resv); out_park: - mutex_lock(&i915->drm.struct_mutex); - if (--i915->gt.active_requests) - queue_delayed_work(i915->wq, &i915->gem.retire_work, 0); - else - queue_delayed_work(i915->wq, &i915->gem.idle_work, 0); - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_shrinker_register(i915); + restore_retire_worker(i915); return err; err_obj: i915_gem_object_put(obj); diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index 94aee4071a66..e42f3c58536a 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -11,23 +11,29 @@ int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) { + int ret = i915_terminally_wedged(i915) ? -EIO : 0; + int repeat = !!(flags & I915_WAIT_LOCKED); + cond_resched(); - if (flags & I915_WAIT_LOCKED && - i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines)) { - pr_err("Failed to switch back to kernel context; declaring wedged\n"); - i915_gem_set_wedged(i915); - } + do { + if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) { + pr_err("%pS timed out, cancelling all further testing.\n", + __builtin_return_address(0)); - if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) { - pr_err("%pS timed out, cancelling all further testing.\n", - __builtin_return_address(0)); + GEM_TRACE("%pS timed out.\n", + __builtin_return_address(0)); + GEM_TRACE_DUMP(); - GEM_TRACE("%pS timed out.\n", __builtin_return_address(0)); - GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + repeat = 0; + ret = -EIO; + } - i915_gem_set_wedged(i915); - } + /* Ensure we also flush after wedging. */ + if (flags & I915_WAIT_LOCKED) + i915_retire_requests(i915); + } while (repeat--); - return i915_terminally_wedged(i915); + return ret; } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index fb677b4019a0..c072424c6b7c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -41,11 +41,10 @@ void mock_device_flush(struct drm_i915_private *i915) lockdep_assert_held(&i915->drm.struct_mutex); - for_each_engine(engine, i915, id) - mock_engine_flush(engine); - - i915_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests); + do { + for_each_engine(engine, i915, id) + mock_engine_flush(engine); + } while (i915_retire_requests(i915)); } static void mock_device_release(struct drm_device *dev) @@ -110,10 +109,6 @@ static void mock_retire_work_handler(struct work_struct *work) static void mock_idle_work_handler(struct work_struct *work) { - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.idle_work.work); - - i915->gt.active_engines = 0; } static int pm_domain_resume(struct device *dev) @@ -185,6 +180,8 @@ struct drm_i915_private *mock_gem_device(void) mock_uncore_init(&i915->uncore); i915_gem_init__mm(i915); + intel_gt_pm_init(i915); + atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */ init_waitqueue_head(&i915->gpu_error.wait_queue); init_waitqueue_head(&i915->gpu_error.reset_queue); From 8f2a1057d6ec217aefb8bf0de6996294452a2577 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 25 Apr 2019 06:01:43 +0100 Subject: [PATCH 030/591] drm/i915: Explicitly pin the logical context for execbuf In order to separate the reservation phase of building a request from its emission phase, we need to pull some of the request alloc activities from deep inside i915_request to the surface, GEM_EXECBUFFER. v2: Be frivolous, use a local drm_i915_private. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190425050143.811-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 108 +++++++++++++-------- drivers/gpu/drm/i915/i915_request.c | 9 -- 2 files changed, 69 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3d672c9edb94..794af8edc6a2 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -34,6 +34,8 @@ #include #include +#include "gt/intel_gt_pm.h" + #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_trace.h" @@ -236,7 +238,8 @@ struct i915_execbuffer { unsigned int *flags; struct intel_engine_cs *engine; /** engine to queue the request to */ - struct i915_gem_context *ctx; /** context for building the request */ + struct intel_context *context; /* logical state for the request */ + struct i915_gem_context *gem_context; /** caller's context */ struct i915_address_space *vm; /** GTT and vma for the request */ struct i915_request *request; /** our request to build */ @@ -738,7 +741,7 @@ static int eb_select_context(struct i915_execbuffer *eb) if (unlikely(!ctx)) return -ENOENT; - eb->ctx = ctx; + eb->gem_context = ctx; if (ctx->ppgtt) { eb->vm = &ctx->ppgtt->vm; eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; @@ -784,7 +787,6 @@ static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) static int eb_wait_for_ring(const struct i915_execbuffer *eb) { - const struct intel_context *ce; struct i915_request *rq; int ret = 0; @@ -794,11 +796,7 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb) * keeping all of their resources pinned. */ - ce = intel_context_lookup(eb->ctx, eb->engine); - if (!ce || !ce->ring) /* first use, assume empty! */ - return 0; - - rq = __eb_wait_for_ring(ce->ring); + rq = __eb_wait_for_ring(eb->context->ring); if (rq) { mutex_unlock(&eb->i915->drm.struct_mutex); @@ -817,15 +815,15 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb) static int eb_lookup_vmas(struct i915_execbuffer *eb) { - struct radix_tree_root *handles_vma = &eb->ctx->handles_vma; + struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; struct drm_i915_gem_object *obj; unsigned int i, batch; int err; - if (unlikely(i915_gem_context_is_closed(eb->ctx))) + if (unlikely(i915_gem_context_is_closed(eb->gem_context))) return -ENOENT; - if (unlikely(i915_gem_context_is_banned(eb->ctx))) + if (unlikely(i915_gem_context_is_banned(eb->gem_context))) return -EIO; INIT_LIST_HEAD(&eb->relocs); @@ -870,8 +868,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) if (!vma->open_count++) i915_vma_reopen(vma); list_add(&lut->obj_link, &obj->lut_list); - list_add(&lut->ctx_link, &eb->ctx->handles_list); - lut->ctx = eb->ctx; + list_add(&lut->ctx_link, &eb->gem_context->handles_list); + lut->ctx = eb->gem_context; lut->handle = handle; add_vma: @@ -1227,7 +1225,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_unmap; - rq = i915_request_alloc(eb->engine, eb->ctx); + rq = i915_request_create(eb->context); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; @@ -2088,31 +2086,65 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { [I915_EXEC_VEBOX] = VECS0 }; -static struct intel_engine_cs * -eb_select_engine(struct drm_i915_private *dev_priv, +static int eb_pin_context(struct i915_execbuffer *eb, + struct intel_engine_cs *engine) +{ + struct intel_context *ce; + int err; + + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + err = i915_terminally_wedged(eb->i915); + if (err) + return err; + + /* + * Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + ce = intel_context_pin(eb->gem_context, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + eb->engine = engine; + eb->context = ce; + return 0; +} + +static void eb_unpin_context(struct i915_execbuffer *eb) +{ + intel_context_unpin(eb->context); +} + +static int +eb_select_engine(struct i915_execbuffer *eb, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args) { + struct drm_i915_private *i915 = eb->i915; unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; struct intel_engine_cs *engine; if (user_ring_id > I915_USER_RINGS) { DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); - return NULL; + return -EINVAL; } if ((user_ring_id != I915_EXEC_BSD) && ((args->flags & I915_EXEC_BSD_MASK) != 0)) { DRM_DEBUG("execbuf with non bsd ring but with invalid " "bsd dispatch flags: %d\n", (int)(args->flags)); - return NULL; + return -EINVAL; } - if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) { + if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) { unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { - bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file); + bsd_idx = gen8_dispatch_bsd_engine(i915, file); } else if (bsd_idx >= I915_EXEC_BSD_RING1 && bsd_idx <= I915_EXEC_BSD_RING2) { bsd_idx >>= I915_EXEC_BSD_SHIFT; @@ -2120,20 +2152,20 @@ eb_select_engine(struct drm_i915_private *dev_priv, } else { DRM_DEBUG("execbuf with unknown bsd ring: %u\n", bsd_idx); - return NULL; + return -EINVAL; } - engine = dev_priv->engine[_VCS(bsd_idx)]; + engine = i915->engine[_VCS(bsd_idx)]; } else { - engine = dev_priv->engine[user_ring_map[user_ring_id]]; + engine = i915->engine[user_ring_map[user_ring_id]]; } if (!engine) { DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); - return NULL; + return -EINVAL; } - return engine; + return eb_pin_context(eb, engine); } static void @@ -2275,7 +2307,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct sync_file *out_fence = NULL; - intel_wakeref_t wakeref; int out_fence_fd = -1; int err; @@ -2335,12 +2366,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_destroy; - eb.engine = eb_select_engine(eb.i915, file, args); - if (!eb.engine) { - err = -EINVAL; - goto err_engine; - } - /* * Take a local wakeref for preparing to dispatch the execbuf as * we expect to access the hardware fairly frequently in the @@ -2348,16 +2373,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, * wakeref that we hold until the GPU has been idle for at least * 100ms. */ - wakeref = intel_runtime_pm_get(eb.i915); + intel_gt_pm_get(eb.i915); err = i915_mutex_lock_interruptible(dev); if (err) goto err_rpm; - err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ + err = eb_select_engine(&eb, file, args); if (unlikely(err)) goto err_unlock; + err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ + if (unlikely(err)) + goto err_engine; + err = eb_relocate(&eb); if (err) { /* @@ -2441,7 +2470,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, GEM_BUG_ON(eb.reloc_cache.rq); /* Allocate a request for this batch buffer nice and early. */ - eb.request = i915_request_alloc(eb.engine, eb.ctx); + eb.request = i915_request_create(eb.context); if (IS_ERR(eb.request)) { err = PTR_ERR(eb.request); goto err_batch_unpin; @@ -2479,8 +2508,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb); err_request: - i915_request_add(eb.request); add_to_client(eb.request, file); + i915_request_add(eb.request); if (fences) signal_fence_array(&eb, fences); @@ -2502,12 +2531,13 @@ err_batch_unpin: err_vma: if (eb.exec) eb_release_vmas(&eb); +err_engine: + eb_unpin_context(&eb); err_unlock: mutex_unlock(&dev->struct_mutex); err_rpm: - intel_runtime_pm_put(eb.i915, wakeref); -err_engine: - i915_gem_context_put(eb.ctx); + intel_gt_pm_put(eb.i915); + i915_gem_context_put(eb.gem_context); err_destroy: eb_destroy(&eb); err_out_fence: diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 11c484e679b6..5869c37a35e1 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -785,7 +785,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) struct drm_i915_private *i915 = engine->i915; struct intel_context *ce; struct i915_request *rq; - int ret; /* * Preempt contexts are reserved for exclusive use to inject a @@ -794,14 +793,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) */ GEM_BUG_ON(ctx == i915->preempt_context); - /* - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - ret = i915_terminally_wedged(i915); - if (ret) - return ERR_PTR(ret); - /* * Pinning the contexts may generate requests in order to acquire * GGTT space, so do this first before we reserve a seqno for From 1f2b4a7edbc3b89c27f661b81a699095d922467c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 25 Apr 2019 06:43:33 +0100 Subject: [PATCH 031/591] drm/i915: Allow multiple user handles to the same VM It was noted that we made the same mistake for VM_ID as for object handles, whereby we ensured that we only allocated a single handle for one ppgtt. This has the unfortunate consequence for userspace that they need to reference count the handles to avoid destroying an active ID. If we allow multiple handles to the same ppgtt, userspace can freely unreference any handle they own without fear of destroying the same handle in use elsewhere. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190425054333.27299-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 26 ++++++++----------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 -- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 76ed74e75d82..05496ea7a123 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -772,8 +772,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, if (err < 0) goto err_unlock; - GEM_BUG_ON(err == 0); /* reserved for default/unassigned ppgtt */ - ppgtt->user_handle = err; + GEM_BUG_ON(err == 0); /* reserved for invalid/unassigned ppgtt */ mutex_unlock(&file_priv->vm_idr_lock); @@ -811,10 +810,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, return err; ppgtt = idr_remove(&file_priv->vm_idr, id); - if (ppgtt) { - GEM_BUG_ON(ppgtt->user_handle != id); - ppgtt->user_handle = 0; - } mutex_unlock(&file_priv->vm_idr_lock); if (!ppgtt) @@ -925,18 +920,15 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, if (ret) goto err_put; - if (!ppgtt->user_handle) { - ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL); - GEM_BUG_ON(!ret); - if (ret < 0) - goto err_unlock; + ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL); + GEM_BUG_ON(!ret); + if (ret < 0) + goto err_unlock; - ppgtt->user_handle = ret; - i915_ppgtt_get(ppgtt); - } + i915_ppgtt_get(ppgtt); args->size = 0; - args->value = ppgtt->user_handle; + args->value = ret; ret = 0; err_unlock: @@ -1027,10 +1019,8 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, return err; ppgtt = idr_find(&file_priv->vm_idr, args->value); - if (ppgtt) { - GEM_BUG_ON(ppgtt->user_handle != args->value); + if (ppgtt) i915_ppgtt_get(ppgtt); - } mutex_unlock(&file_priv->vm_idr_lock); if (!ppgtt) return -ENOENT; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f85b75db1f98..2fafa04c45ec 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -397,8 +397,6 @@ struct i915_hw_ppgtt { struct i915_page_directory_pointer pdp; /* GEN8+ */ struct i915_page_directory pd; /* GEN6-7 */ }; - - u32 user_handle; }; struct gen6_hw_ppgtt { From a75d035fedbdecf83f86767aa2e4d05c8c4ffd95 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Apr 2019 09:17:18 +0100 Subject: [PATCH 032/591] drm/i915: Disable preemption and sleeping while using the punit sideband MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While we talk to the punit over its sideband, we need to prevent the cpu from sleeping in order to prevent a potential machine hang. Note that by itself, it appears that pm_qos_update_request (via intel_idle) doesn't provide a sufficient barrier to ensure that all core are indeed awake (out of Cstate) and that the package is awake. To do so, we need to supplement the pm_qos with a manual ping on_each_cpu. v2: Restrict the heavy-weight wakeup to just the ISOF_PORT_PUNIT, there is insufficient evidence to implicate a wider problem atm. Similarly, restrict the w/a to Valleyview, as Cherryview doesn't have an angry cadre of users. The working theory, courtesy of Ville and Hans, is the issue lies within the power delivery and so is likely to be unit and board specific and occurs when both the unit/fw require extra power at the same time as the cpu package is changing its own power state. References: https://bugzilla.kernel.org/show_bug.cgi?id=109051 References: https://bugs.freedesktop.org/show_bug.cgi?id=102657 References: https://bugzilla.kernel.org/show_bug.cgi?id=195255 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Hans de Goede Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190426081725.31217-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 6 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_sideband.c | 235 ++++++++++++++++---------- 3 files changed, 155 insertions(+), 87 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 824409ffd03f..aacc8dd6ecfd 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -886,6 +886,9 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->backlight_lock); mutex_init(&dev_priv->sb_lock); + pm_qos_add_request(&dev_priv->sb_qos, + PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE); + mutex_init(&dev_priv->av_mutex); mutex_init(&dev_priv->wm.wm_mutex); mutex_init(&dev_priv->pps_mutex); @@ -945,6 +948,9 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) i915_gem_cleanup_early(dev_priv); i915_workqueues_cleanup(dev_priv); i915_engines_cleanup(dev_priv); + + pm_qos_remove_request(&dev_priv->sb_qos); + mutex_destroy(&dev_priv->sb_lock); } /** diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5c77bf5b735b..662dbd2e3245 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1561,6 +1561,7 @@ struct drm_i915_private { /* Sideband mailbox protection */ struct mutex sb_lock; + struct pm_qos_request sb_qos; /** Cached value of IMR to avoid reads in updating the bitfield */ union { diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 57de41b1f989..fc8913461622 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -22,6 +22,8 @@ * */ +#include + #include "i915_drv.h" #include "intel_drv.h" @@ -39,19 +41,50 @@ /* Private register write, double-word addressing, non-posted */ #define SB_CRWRDA_NP 0x07 -static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, - u32 port, u32 opcode, u32 addr, u32 *val) +static void ping(void *info) { - u32 cmd, be = 0xf, bar = 0; - bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP); +} - cmd = (devfn << IOSF_DEVFN_SHIFT) | (opcode << IOSF_OPCODE_SHIFT) | - (port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) | - (bar << IOSF_BAR_SHIFT); +static void __vlv_punit_get(struct drm_i915_private *i915) +{ + iosf_mbi_punit_acquire(); - WARN_ON(!mutex_is_locked(&dev_priv->sb_lock)); + /* + * Prevent the cpu from sleeping while we use this sideband, otherwise + * the punit may cause a machine hang. The issue appears to be isolated + * with changing the power state of the CPU package while changing + * the power state via the punit, and we have only observed it + * reliably on 4-core Baytail systems suggesting the issue is in the + * power delivery mechanism and likely to be be board/function + * specific. Hence we presume the workaround needs only be applied + * to the Valleyview P-unit and not all sideband communications. + */ + if (IS_VALLEYVIEW(i915)) { + pm_qos_update_request(&i915->sb_qos, 0); + on_each_cpu(ping, NULL, 1); + } +} - if (intel_wait_for_register(&dev_priv->uncore, +static void __vlv_punit_put(struct drm_i915_private *i915) +{ + if (IS_VALLEYVIEW(i915)) + pm_qos_update_request(&i915->sb_qos, PM_QOS_DEFAULT_VALUE); + + iosf_mbi_punit_release(); +} + +static int vlv_sideband_rw(struct drm_i915_private *i915, + u32 devfn, u32 port, u32 opcode, + u32 addr, u32 *val) +{ + struct intel_uncore *uncore = &i915->uncore; + const bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP); + int err; + + lockdep_assert_held(&i915->sb_lock); + + /* Flush the previous comms, just in case it failed last time. */ + if (intel_wait_for_register(uncore, VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0, 5)) { DRM_DEBUG_DRIVER("IOSF sideband idle wait (%s) timed out\n", @@ -59,131 +92,156 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, return -EAGAIN; } - I915_WRITE(VLV_IOSF_ADDR, addr); - I915_WRITE(VLV_IOSF_DATA, is_read ? 0 : *val); - I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd); + preempt_disable(); - if (intel_wait_for_register(&dev_priv->uncore, - VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0, - 5)) { + intel_uncore_write_fw(uncore, VLV_IOSF_ADDR, addr); + intel_uncore_write_fw(uncore, VLV_IOSF_DATA, is_read ? 0 : *val); + intel_uncore_write_fw(uncore, VLV_IOSF_DOORBELL_REQ, + (devfn << IOSF_DEVFN_SHIFT) | + (opcode << IOSF_OPCODE_SHIFT) | + (port << IOSF_PORT_SHIFT) | + (0xf << IOSF_BYTE_ENABLES_SHIFT) | + (0 << IOSF_BAR_SHIFT) | + IOSF_SB_BUSY); + + if (__intel_wait_for_register_fw(uncore, + VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0, + 10000, 0, NULL) == 0) { + if (is_read) + *val = intel_uncore_read_fw(uncore, VLV_IOSF_DATA); + err = 0; + } else { DRM_DEBUG_DRIVER("IOSF sideband finish wait (%s) timed out\n", is_read ? "read" : "write"); - return -ETIMEDOUT; + err = -ETIMEDOUT; } - if (is_read) - *val = I915_READ(VLV_IOSF_DATA); - - return 0; -} - -u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr) -{ - u32 val = 0; - - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); - - mutex_lock(&dev_priv->sb_lock); - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, - SB_CRRDDA_NP, addr, &val); - mutex_unlock(&dev_priv->sb_lock); - - return val; -} - -int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) -{ - int err; - - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); - - mutex_lock(&dev_priv->sb_lock); - err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, - SB_CRWRDA_NP, addr, &val); - mutex_unlock(&dev_priv->sb_lock); + preempt_enable(); return err; } -u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg) +u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) { u32 val = 0; - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_BUNIT, + WARN_ON(!mutex_is_locked(&i915->pcu_lock)); + + mutex_lock(&i915->sb_lock); + __vlv_punit_get(i915); + + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, + SB_CRRDDA_NP, addr, &val); + + __vlv_punit_put(i915); + mutex_unlock(&i915->sb_lock); + + return val; +} + +int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val) +{ + int err; + + WARN_ON(!mutex_is_locked(&i915->pcu_lock)); + + mutex_lock(&i915->sb_lock); + __vlv_punit_get(i915); + + err = vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, + SB_CRWRDA_NP, addr, &val); + + __vlv_punit_put(i915); + mutex_unlock(&i915->sb_lock); + + return err; +} + +u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg) +{ + u32 val = 0; + + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_BUNIT, SB_CRRDDA_NP, reg, &val); return val; } -void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val) +void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val) { - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_BUNIT, + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_BUNIT, SB_CRWRDA_NP, reg, &val); } -u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr) +u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); + WARN_ON(!mutex_is_locked(&i915->pcu_lock)); - mutex_lock(&dev_priv->sb_lock); - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC, + mutex_lock(&i915->sb_lock); + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_NC, SB_CRRDDA_NP, addr, &val); - mutex_unlock(&dev_priv->sb_lock); + mutex_unlock(&i915->sb_lock); return val; } -u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg) +u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg) { u32 val = 0; - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), port, + + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), port, SB_CRRDDA_NP, reg, &val); + return val; } -void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, +void vlv_iosf_sb_write(struct drm_i915_private *i915, u8 port, u32 reg, u32 val) { - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), port, + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), port, SB_CRWRDA_NP, reg, &val); } -u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg) +u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg) { u32 val = 0; - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCK, + + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_CCK, SB_CRRDDA_NP, reg, &val); + return val; } -void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val) +void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val) { - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCK, + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_CCK, SB_CRWRDA_NP, reg, &val); } -u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg) +u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg) { u32 val = 0; - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCU, + + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_CCU, SB_CRRDDA_NP, reg, &val); + return val; } -void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val) +void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val) { - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCU, + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_CCU, SB_CRWRDA_NP, reg, &val); } -u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg) +u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg) { + int port = i915->dpio_phy_iosf_port[DPIO_PHY(pipe)]; u32 val = 0; - vlv_sideband_rw(dev_priv, DPIO_DEVFN, DPIO_PHY_IOSF_PORT(DPIO_PHY(pipe)), - SB_MRD_NP, reg, &val); + vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MRD_NP, reg, &val); /* * FIXME: There might be some registers where all 1's is a valid value, @@ -195,10 +253,27 @@ u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg) return val; } -void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val) +void vlv_dpio_write(struct drm_i915_private *i915, + enum pipe pipe, int reg, u32 val) { - vlv_sideband_rw(dev_priv, DPIO_DEVFN, DPIO_PHY_IOSF_PORT(DPIO_PHY(pipe)), - SB_MWR_NP, reg, &val); + int port = i915->dpio_phy_iosf_port[DPIO_PHY(pipe)]; + + vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MWR_NP, reg, &val); +} + +u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg) +{ + u32 val = 0; + + vlv_sideband_rw(i915, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRRDDA_NP, + reg, &val); + return val; +} + +void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val) +{ + vlv_sideband_rw(i915, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRWRDA_NP, + reg, &val); } /* SBI access */ @@ -279,17 +354,3 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, return; } } - -u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg) -{ - u32 val = 0; - vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRRDDA_NP, - reg, &val); - return val; -} - -void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val) -{ - vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRWRDA_NP, - reg, &val); -} From 221c78623ea5a1b94e1d53443ccf78fae0bc5982 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Apr 2019 09:17:19 +0100 Subject: [PATCH 033/591] drm/i915: Lift acquiring the vlv punit magic to a common sb-get MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we now employ a very heavy pm_qos around the punit access, we want to minimise the number of synchronous requests by performing one for the whole punit sequence rather than around individual accesses. The sideband lock is used for this, so push the pm_qos into the sideband lock acquisition and release, moving it from the lowlevel punit rw routine to the callers. In the first step, we move the punit magic into the common sideband lock so that we can acquire a bunch of ports simultaneously, and if need be extend the workaround protection later. Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190426081725.31217-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 124 +++++++++++++++++++++--- drivers/gpu/drm/i915/intel_cdclk.c | 6 +- drivers/gpu/drm/i915/intel_display.c | 37 +++---- drivers/gpu/drm/i915/intel_dp.c | 4 +- drivers/gpu/drm/i915/intel_dpio_phy.c | 37 +++---- drivers/gpu/drm/i915/intel_dsi_vbt.c | 8 +- drivers/gpu/drm/i915/intel_hdmi.c | 4 +- drivers/gpu/drm/i915/intel_pm.c | 4 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 8 +- drivers/gpu/drm/i915/intel_sideband.c | 45 ++++++--- drivers/gpu/drm/i915/vlv_dsi.c | 8 +- drivers/gpu/drm/i915/vlv_dsi_pll.c | 14 +-- 12 files changed, 206 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 662dbd2e3245..b2215fb7f562 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3442,25 +3442,119 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_base_ms); /* intel_sideband.c */ -u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr); -int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val); -u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr); -u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg); -void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val); -u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg); -void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); -u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg); -void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); -u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg); -void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); -u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg); -void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val); + +enum { + VLV_IOSF_SB_BUNIT, + VLV_IOSF_SB_CCK, + VLV_IOSF_SB_CCU, + VLV_IOSF_SB_DPIO, + VLV_IOSF_SB_FLISDSI, + VLV_IOSF_SB_GPIO, + VLV_IOSF_SB_NC, + VLV_IOSF_SB_PUNIT, +}; + +void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports); +u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg); +void vlv_iosf_sb_write(struct drm_i915_private *i915, + u8 port, u32 reg, u32 val); +void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports); + +static inline void vlv_bunit_get(struct drm_i915_private *i915) +{ + vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_BUNIT)); +} + +u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg); +void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val); + +static inline void vlv_bunit_put(struct drm_i915_private *i915) +{ + vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_BUNIT)); +} + +static inline void vlv_cck_get(struct drm_i915_private *i915) +{ + vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_CCK)); +} + +u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg); +void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val); + +static inline void vlv_cck_put(struct drm_i915_private *i915) +{ + vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_CCK)); +} + +static inline void vlv_ccu_get(struct drm_i915_private *i915) +{ + vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_CCU)); +} + +u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg); +void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val); + +static inline void vlv_ccu_put(struct drm_i915_private *i915) +{ + vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_CCU)); +} + +static inline void vlv_dpio_get(struct drm_i915_private *i915) +{ + vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_DPIO)); +} + +u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg); +void vlv_dpio_write(struct drm_i915_private *i915, + enum pipe pipe, int reg, u32 val); + +static inline void vlv_dpio_put(struct drm_i915_private *i915) +{ + vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_DPIO)); +} + +static inline void vlv_flisdsi_get(struct drm_i915_private *i915) +{ + vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_FLISDSI)); +} + +u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg); +void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val); + +static inline void vlv_flisdsi_put(struct drm_i915_private *i915) +{ + vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_FLISDSI)); +} + +static inline void vlv_nc_get(struct drm_i915_private *i915) +{ + vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_NC)); +} + +u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr); + +static inline void vlv_nc_put(struct drm_i915_private *i915) +{ + vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_NC)); +} + +static inline void vlv_punit_get(struct drm_i915_private *i915) +{ + vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_PUNIT)); +} + +u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr); +int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val); + +static inline void vlv_punit_put(struct drm_i915_private *i915) +{ + vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_PUNIT)); +} + u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, enum intel_sbi_destination destination); void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, enum intel_sbi_destination destination); -u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg); -void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); /* intel_dpio_phy.c */ void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port, diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index ae40a8679314..5845d0a37599 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -557,7 +557,8 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, } mutex_unlock(&dev_priv->pcu_lock); - mutex_lock(&dev_priv->sb_lock); + vlv_iosf_sb_get(dev_priv, + BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT)); if (cdclk == 400000) { u32 divider; @@ -591,7 +592,8 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, val |= 3000 / 250; /* 3.0 usec */ vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); - mutex_unlock(&dev_priv->sb_lock); + vlv_iosf_sb_put(dev_priv, + BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT)); intel_update_cdclk(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c3d1d38ccf4d..297efab33b44 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -152,10 +152,10 @@ int vlv_get_hpll_vco(struct drm_i915_private *dev_priv) int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; /* Obtain SKU information */ - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); hpll_freq = vlv_cck_read(dev_priv, CCK_FUSE_REG) & CCK_FUSE_HPLL_FREQ_MASK; - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); return vco_freq[hpll_freq] * 1000; } @@ -166,9 +166,9 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv, u32 val; int divider; - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); val = vlv_cck_read(dev_priv, reg); - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); divider = val & CCK_FREQUENCY_VALUES; @@ -1093,9 +1093,9 @@ void assert_dsi_pll(struct drm_i915_private *dev_priv, bool state) u32 val; bool cur_state; - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); val = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL); - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); cur_state = val & DSI_PLL_VCO_EN; I915_STATE_WARN(cur_state != state, @@ -1405,14 +1405,14 @@ static void _chv_enable_pll(struct intel_crtc *crtc, enum dpio_channel port = vlv_pipe_to_channel(pipe); u32 tmp; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Enable back the 10bit clock to display controller */ tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)); tmp |= DPIO_DCLKP_EN; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), tmp); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); /* * Need to wait > 100ns between dclkp clock enable bit and PLL enable. @@ -1569,14 +1569,14 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) I915_WRITE(DPLL(pipe), val); POSTING_READ(DPLL(pipe)); - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Disable 10bit clock to display controller */ val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)); val &= ~DPIO_DCLKP_EN; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), val); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } void vlv_wait_port_ready(struct drm_i915_private *dev_priv, @@ -7276,7 +7276,7 @@ static void vlv_prepare_pll(struct intel_crtc *crtc, if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0) return; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); bestn = pipe_config->dpll.n; bestm1 = pipe_config->dpll.m1; @@ -7353,7 +7353,8 @@ static void vlv_prepare_pll(struct intel_crtc *crtc, vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW7(pipe), coreclk); vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW11(pipe), 0x87871000); - mutex_unlock(&dev_priv->sb_lock); + + vlv_dpio_put(dev_priv); } static void chv_prepare_pll(struct intel_crtc *crtc, @@ -7386,7 +7387,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc, dpio_val = 0; loopfilter = 0; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* p1 and p2 divider */ vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW13(port), @@ -7458,7 +7459,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc, vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)) | DPIO_AFC_RECAL); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } /** @@ -8084,9 +8085,9 @@ static void vlv_crtc_clock_get(struct intel_crtc *crtc, if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0) return; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); mdiv = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW3(pipe)); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); clock.m1 = (mdiv >> DPIO_M1DIV_SHIFT) & 7; clock.m2 = mdiv & DPIO_M2DIV_MASK; @@ -8195,13 +8196,13 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0) return; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); cmn_dw13 = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW13(port)); pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port)); pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port)); pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port)); pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port)); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0; clock.m2 = (pll_dw0 & 0xff) << 22; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 560274d1c50b..4fc25dcc97d4 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3148,12 +3148,12 @@ static void chv_post_disable_dp(struct intel_encoder *encoder, intel_dp_link_down(encoder, old_crtc_state); - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Assert data lane reset */ chv_data_lane_soft_reset(encoder, old_crtc_state, true); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } static void diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index ab4ac7158b79..c784f3daaf51 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -648,7 +648,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, u32 val; int i; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Clear calc init */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch)); @@ -729,8 +729,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val); } - mutex_unlock(&dev_priv->sb_lock); - + vlv_dpio_put(dev_priv); } void chv_data_lane_soft_reset(struct intel_encoder *encoder, @@ -800,7 +799,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder, chv_phy_powergate_lanes(encoder, true, lane_mask); - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Assert data lane reset */ chv_data_lane_soft_reset(encoder, crtc_state, true); @@ -855,7 +854,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder, val |= CHV_CMN_USEDCLKCHANNEL; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW19(ch), val); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, @@ -870,7 +869,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, int data, i, stagger; u32 val; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* allow hardware to manage TX FIFO reset source */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch)); @@ -935,7 +934,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, /* Deassert data lane reset */ chv_data_lane_soft_reset(encoder, crtc_state, false); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } void chv_phy_release_cl2_override(struct intel_encoder *encoder) @@ -956,7 +955,7 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder, enum pipe pipe = to_intel_crtc(old_crtc_state->base.crtc)->pipe; u32 val; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* disable left/right clock distribution */ if (pipe != PIPE_B) { @@ -969,7 +968,7 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val); } - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); /* * Leave the power down bit cleared for at least one @@ -993,7 +992,8 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, enum dpio_channel port = vlv_dport_to_channel(dport); enum pipe pipe = intel_crtc->pipe; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); + vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x00000000); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW4(port), demph_reg_value); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW2(port), @@ -1006,7 +1006,8 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW11(port), 0x00030000); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW9(port), preemph_reg_value); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), DPIO_TX_OCALINIT_EN); - mutex_unlock(&dev_priv->sb_lock); + + vlv_dpio_put(dev_priv); } void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, @@ -1019,7 +1020,8 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, enum pipe pipe = crtc->pipe; /* Program Tx lane resets to default */ - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); + vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); @@ -1033,7 +1035,8 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW12(port), 0x00750f00); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW11(port), 0x00001500); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW14(port), 0x40400000); - mutex_unlock(&dev_priv->sb_lock); + + vlv_dpio_put(dev_priv); } void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, @@ -1047,7 +1050,7 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, enum pipe pipe = crtc->pipe; u32 val; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Enable clock channels for this port */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port)); @@ -1063,7 +1066,7 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW14(port), 0x00760018); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } void vlv_phy_reset_lanes(struct intel_encoder *encoder, @@ -1075,8 +1078,8 @@ void vlv_phy_reset_lanes(struct intel_encoder *encoder, enum dpio_channel port = vlv_dport_to_channel(dport); enum pipe pipe = crtc->pipe; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), 0x00000000); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(port), 0x00e00060); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c index 3074448446bc..2304488f2d35 100644 --- a/drivers/gpu/drm/i915/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c @@ -248,7 +248,7 @@ static void vlv_exec_gpio(struct drm_i915_private *dev_priv, pconf0 = VLV_GPIO_PCONF0(map->base_offset); padval = VLV_GPIO_PAD_VAL(map->base_offset); - mutex_lock(&dev_priv->sb_lock); + vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_GPIO)); if (!map->init) { /* FIXME: remove constant below */ vlv_iosf_sb_write(dev_priv, port, pconf0, 0x2000CC00); @@ -257,7 +257,7 @@ static void vlv_exec_gpio(struct drm_i915_private *dev_priv, tmp = 0x4 | value; vlv_iosf_sb_write(dev_priv, port, padval, tmp); - mutex_unlock(&dev_priv->sb_lock); + vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO)); } static void chv_exec_gpio(struct drm_i915_private *dev_priv, @@ -303,12 +303,12 @@ static void chv_exec_gpio(struct drm_i915_private *dev_priv, cfg0 = CHV_GPIO_PAD_CFG0(family_num, gpio_index); cfg1 = CHV_GPIO_PAD_CFG1(family_num, gpio_index); - mutex_lock(&dev_priv->sb_lock); + vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_GPIO)); vlv_iosf_sb_write(dev_priv, port, cfg1, 0); vlv_iosf_sb_write(dev_priv, port, cfg0, CHV_GPIO_GPIOEN | CHV_GPIO_GPIOCFG_GPO | CHV_GPIO_GPIOTXSTATE(value)); - mutex_unlock(&dev_priv->sb_lock); + vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO)); } static void bxt_exec_gpio(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index e1005d7b75fd..8b72365f9309 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -2630,12 +2630,12 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Assert data lane reset */ chv_data_lane_soft_reset(encoder, old_crtc_state, true); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } static void chv_hdmi_pre_enable(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7aa9a8c12b54..6c2f416b95a6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7803,9 +7803,9 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) vlv_init_gpll_ref_freq(dev_priv); - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); val = vlv_cck_read(dev_priv, CCK_FUSE_REG); - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); switch ((val >> 2) & 0x7) { case 3: diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index d4f4262d0fee..9c1294c29566 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1569,7 +1569,7 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, 1)) DRM_ERROR("Display PHY %d is not power up\n", phy); - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Enable dynamic power down */ tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW28); @@ -1592,7 +1592,7 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, tmp); } - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); dev_priv->chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(phy); I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control); @@ -1655,9 +1655,9 @@ static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpi else reg = _CHV_CMN_DW6_CH1; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); val = vlv_dpio_read(dev_priv, pipe, reg); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); /* * This assumes !override is only used when the port is disabled. diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index fc8913461622..b2fc605e2e29 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -73,6 +73,22 @@ static void __vlv_punit_put(struct drm_i915_private *i915) iosf_mbi_punit_release(); } +void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports) +{ + if (ports & BIT(VLV_IOSF_SB_PUNIT)) + __vlv_punit_get(i915); + + mutex_lock(&i915->sb_lock); +} + +void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports) +{ + mutex_unlock(&i915->sb_lock); + + if (ports & BIT(VLV_IOSF_SB_PUNIT)) + __vlv_punit_put(i915); +} + static int vlv_sideband_rw(struct drm_i915_private *i915, u32 devfn, u32 port, u32 opcode, u32 addr, u32 *val) @@ -82,6 +98,8 @@ static int vlv_sideband_rw(struct drm_i915_private *i915, int err; lockdep_assert_held(&i915->sb_lock); + if (port == IOSF_PORT_PUNIT) + iosf_mbi_assert_punit_acquired(); /* Flush the previous comms, just in case it failed last time. */ if (intel_wait_for_register(uncore, @@ -125,16 +143,14 @@ u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&i915->pcu_lock)); + lockdep_assert_held(&i915->pcu_lock); - mutex_lock(&i915->sb_lock); - __vlv_punit_get(i915); + vlv_punit_get(i915); vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, SB_CRRDDA_NP, addr, &val); - __vlv_punit_put(i915); - mutex_unlock(&i915->sb_lock); + vlv_punit_put(i915); return val; } @@ -143,16 +159,14 @@ int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val) { int err; - WARN_ON(!mutex_is_locked(&i915->pcu_lock)); + lockdep_assert_held(&i915->pcu_lock); - mutex_lock(&i915->sb_lock); - __vlv_punit_get(i915); + vlv_punit_get(i915); err = vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, SB_CRWRDA_NP, addr, &val); - __vlv_punit_put(i915); - mutex_unlock(&i915->sb_lock); + vlv_punit_put(i915); return err; } @@ -177,12 +191,10 @@ u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&i915->pcu_lock)); - - mutex_lock(&i915->sb_lock); + vlv_nc_get(i915); vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_NC, SB_CRRDDA_NP, addr, &val); - mutex_unlock(&i915->sb_lock); + vlv_nc_put(i915); return val; } @@ -281,7 +293,8 @@ u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, enum intel_sbi_destination destination) { u32 value = 0; - WARN_ON(!mutex_is_locked(&dev_priv->sb_lock)); + + lockdep_assert_held(&dev_priv->sb_lock); if (intel_wait_for_register(&dev_priv->uncore, SBI_CTL_STAT, SBI_BUSY, 0, @@ -321,7 +334,7 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, { u32 tmp; - WARN_ON(!mutex_is_locked(&dev_priv->sb_lock)); + lockdep_assert_held(&dev_priv->sb_lock); if (intel_wait_for_register(&dev_priv->uncore, SBI_CTL_STAT, SBI_BUSY, 0, diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c index e0b1ec821960..dc1839bfde3e 100644 --- a/drivers/gpu/drm/i915/vlv_dsi.c +++ b/drivers/gpu/drm/i915/vlv_dsi.c @@ -248,7 +248,7 @@ static int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd, bool hs, static void band_gap_reset(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->sb_lock); + vlv_flisdsi_get(dev_priv); vlv_flisdsi_write(dev_priv, 0x08, 0x0001); vlv_flisdsi_write(dev_priv, 0x0F, 0x0005); @@ -257,7 +257,7 @@ static void band_gap_reset(struct drm_i915_private *dev_priv) vlv_flisdsi_write(dev_priv, 0x0F, 0x0000); vlv_flisdsi_write(dev_priv, 0x08, 0x0000); - mutex_unlock(&dev_priv->sb_lock); + vlv_flisdsi_put(dev_priv); } static int bdw_get_pipemisc_bpp(struct intel_crtc *crtc) @@ -515,11 +515,11 @@ static void vlv_dsi_device_ready(struct intel_encoder *encoder) DRM_DEBUG_KMS("\n"); - mutex_lock(&dev_priv->sb_lock); + vlv_flisdsi_get(dev_priv); /* program rcomp for compliance, reduce from 50 ohms to 45 ohms * needed everytime after power gate */ vlv_flisdsi_write(dev_priv, 0x04, 0x0004); - mutex_unlock(&dev_priv->sb_lock); + vlv_flisdsi_put(dev_priv); /* bandgap reset is needed after everytime we do power gate */ band_gap_reset(dev_priv); diff --git a/drivers/gpu/drm/i915/vlv_dsi_pll.c b/drivers/gpu/drm/i915/vlv_dsi_pll.c index 5e7b1fb2db5d..25b811174f5c 100644 --- a/drivers/gpu/drm/i915/vlv_dsi_pll.c +++ b/drivers/gpu/drm/i915/vlv_dsi_pll.c @@ -149,7 +149,7 @@ void vlv_dsi_pll_enable(struct intel_encoder *encoder, DRM_DEBUG_KMS("\n"); - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_CONTROL, 0); vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_DIVIDER, config->dsi_pll.div); @@ -166,11 +166,11 @@ void vlv_dsi_pll_enable(struct intel_encoder *encoder, if (wait_for(vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL) & DSI_PLL_LOCK, 20)) { - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); DRM_ERROR("DSI PLL lock failed\n"); return; } - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); DRM_DEBUG_KMS("DSI PLL locked\n"); } @@ -182,14 +182,14 @@ void vlv_dsi_pll_disable(struct intel_encoder *encoder) DRM_DEBUG_KMS("\n"); - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); tmp = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL); tmp &= ~DSI_PLL_VCO_EN; tmp |= DSI_PLL_LDO_GATE; vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_CONTROL, tmp); - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); } bool bxt_dsi_pll_is_enabled(struct drm_i915_private *dev_priv) @@ -266,10 +266,10 @@ u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, DRM_DEBUG_KMS("\n"); - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); pll_ctl = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL); pll_div = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_DIVIDER); - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); config->dsi_pll.ctrl = pll_ctl & ~DSI_PLL_LOCK; config->dsi_pll.div = pll_div; From 337fa6e04d40216e9f462b23b86d9e62f93c3d48 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Apr 2019 09:17:20 +0100 Subject: [PATCH 034/591] drm/i915: Lift sideband locking for vlv_punit_(read|write) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lift the sideband acquisition for vlv_punit_read and vlv_punit_write into their callers, so that we can lock the sideband once for a sequence of operations, rather than perform the heavyweight acquisition on each request. Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190426081725.31217-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 5 +++ drivers/gpu/drm/i915/i915_sysfs.c | 14 ++++---- drivers/gpu/drm/i915/intel_cdclk.c | 23 ++++++++++--- drivers/gpu/drm/i915/intel_display.c | 16 +++++---- drivers/gpu/drm/i915/intel_pm.c | 46 ++++++++++++++++++++----- drivers/gpu/drm/i915/intel_runtime_pm.c | 10 ++++++ drivers/gpu/drm/i915/intel_sideband.c | 18 ++-------- 7 files changed, 89 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6972f9b6ae83..f8472db460bb 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1057,7 +1057,10 @@ static int i915_frequency_info(struct seq_file *m, void *unused) yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); + vlv_punit_get(dev_priv); freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(dev_priv); + seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq); @@ -2030,8 +2033,10 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) with_intel_runtime_pm_if_in_use(dev_priv, wakeref) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); act_freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(dev_priv); act_freq = (act_freq >> 8) & 0xff; mutex_unlock(&dev_priv->pcu_lock); } else { diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 41313005af42..bfabb3de4808 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -259,25 +259,25 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); intel_wakeref_t wakeref; - int ret; + u32 freq; wakeref = intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - u32 freq; + vlv_punit_get(dev_priv); freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - ret = intel_gpu_freq(dev_priv, (freq >> 8) & 0xff); + vlv_punit_put(dev_priv); + + freq = (freq >> 8) & 0xff; } else { - ret = intel_gpu_freq(dev_priv, - intel_get_cagf(dev_priv, - I915_READ(GEN6_RPSTAT1))); + freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); } mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv, wakeref); - return snprintf(buf, PAGE_SIZE, "%d\n", ret); + return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, freq)); } static ssize_t gt_cur_freq_mhz_show(struct device *kdev, diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 5845d0a37599..9dd22203a7e8 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -464,13 +464,19 @@ static void vlv_get_cdclk(struct drm_i915_private *dev_priv, { u32 val; + mutex_lock(&dev_priv->pcu_lock); + vlv_iosf_sb_get(dev_priv, + BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT)); + cdclk_state->vco = vlv_get_hpll_vco(dev_priv); cdclk_state->cdclk = vlv_get_cck_clock(dev_priv, "cdclk", CCK_DISPLAY_CLOCK_CONTROL, cdclk_state->vco); - mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); + + vlv_iosf_sb_put(dev_priv, + BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT)); mutex_unlock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv)) @@ -545,6 +551,11 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, */ wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); + vlv_iosf_sb_get(dev_priv, + BIT(VLV_IOSF_SB_CCK) | + BIT(VLV_IOSF_SB_BUNIT) | + BIT(VLV_IOSF_SB_PUNIT)); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); val &= ~DSPFREQGUAR_MASK; @@ -557,9 +568,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, } mutex_unlock(&dev_priv->pcu_lock); - vlv_iosf_sb_get(dev_priv, - BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT)); - if (cdclk == 400000) { u32 divider; @@ -593,7 +601,9 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); vlv_iosf_sb_put(dev_priv, - BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT)); + BIT(VLV_IOSF_SB_CCK) | + BIT(VLV_IOSF_SB_BUNIT) | + BIT(VLV_IOSF_SB_PUNIT)); intel_update_cdclk(dev_priv); @@ -630,6 +640,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); val &= ~DSPFREQGUAR_MASK_CHV; val |= (cmd << DSPFREQGUAR_SHIFT_CHV); @@ -639,6 +650,8 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, 50)) { DRM_ERROR("timed out waiting for CDclk change\n"); } + + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 297efab33b44..8eef0e732ac9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -152,10 +152,8 @@ int vlv_get_hpll_vco(struct drm_i915_private *dev_priv) int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; /* Obtain SKU information */ - vlv_cck_get(dev_priv); hpll_freq = vlv_cck_read(dev_priv, CCK_FUSE_REG) & CCK_FUSE_HPLL_FREQ_MASK; - vlv_cck_put(dev_priv); return vco_freq[hpll_freq] * 1000; } @@ -166,10 +164,7 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv, u32 val; int divider; - vlv_cck_get(dev_priv); val = vlv_cck_read(dev_priv, reg); - vlv_cck_put(dev_priv); - divider = val & CCK_FREQUENCY_VALUES; WARN((val & CCK_FREQUENCY_STATUS) != @@ -182,11 +177,18 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv, int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, const char *name, u32 reg) { + int hpll; + + vlv_cck_get(dev_priv); + if (dev_priv->hpll_freq == 0) dev_priv->hpll_freq = vlv_get_hpll_vco(dev_priv); - return vlv_get_cck_clock(dev_priv, name, reg, - dev_priv->hpll_freq); + hpll = vlv_get_cck_clock(dev_priv, name, reg, dev_priv->hpll_freq); + + vlv_cck_put(dev_priv); + + return hpll; } static void intel_update_czclk(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6c2f416b95a6..9db39ea9bd83 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -318,6 +318,7 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) u32 val; mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); if (enable) @@ -332,6 +333,7 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); } @@ -340,6 +342,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) u32 val; mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); if (enable) @@ -348,6 +351,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) val &= ~DSP_MAXFIFO_PM5_ENABLE; vlv_punit_write(dev_priv, PUNIT_REG_DSPSSPM, val); + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); } @@ -6140,6 +6144,7 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv) if (IS_CHERRYVIEW(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); if (val & DSP_MAXFIFO_PM5_ENABLE) @@ -6169,6 +6174,7 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv) wm->level = VLV_WM_LEVEL_DDR_DVFS; } + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); } @@ -6743,7 +6749,9 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); if (val != dev_priv->gt_pm.rps.cur_freq) { + vlv_punit_get(dev_priv); err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); + vlv_punit_put(dev_priv); if (err) return err; @@ -7755,6 +7763,11 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) valleyview_setup_pctx(dev_priv); + vlv_iosf_sb_get(dev_priv, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + vlv_init_gpll_ref_freq(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); @@ -7792,6 +7805,11 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", intel_gpu_freq(dev_priv, rps->min_freq), rps->min_freq); + + vlv_iosf_sb_put(dev_priv, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); } static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) @@ -7801,11 +7819,14 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) cherryview_setup_pctx(dev_priv); + vlv_iosf_sb_get(dev_priv, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + vlv_init_gpll_ref_freq(dev_priv); - vlv_cck_get(dev_priv); val = vlv_cck_read(dev_priv, CCK_FUSE_REG); - vlv_cck_put(dev_priv); switch ((val >> 2) & 0x7) { case 3: @@ -7838,6 +7859,11 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) intel_gpu_freq(dev_priv, rps->min_freq), rps->min_freq); + vlv_iosf_sb_put(dev_priv, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | rps->min_freq) & 1, "Odd GPU freq values\n"); @@ -7925,13 +7951,15 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) GEN6_RP_DOWN_IDLE_AVG); /* Setting Fixed Bias */ - val = VLV_OVERRIDE_EN | - VLV_SOC_TDP_EN | - CHV_BIAS_CPU_50_SOC_50; + vlv_punit_get(dev_priv); + + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(dev_priv); + /* RPS code assumes GPLL is used */ WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); @@ -8008,14 +8036,16 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_CONT); + vlv_punit_get(dev_priv); + /* Setting Fixed Bias */ - val = VLV_OVERRIDE_EN | - VLV_SOC_TDP_EN | - VLV_BIAS_CPU_125_SOC_875; + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(dev_priv); + /* RPS code assumes GPLL is used */ WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 9c1294c29566..ac8bc5baef40 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1212,6 +1212,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, PUNIT_PWRGT_PWR_GATE(pw_idx); mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); #define COND \ ((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state) @@ -1232,6 +1233,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, #undef COND out: + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); } @@ -1260,6 +1262,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, ctrl = PUNIT_PWRGT_PWR_ON(pw_idx); mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask; /* @@ -1278,6 +1281,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask; WARN_ON(ctrl != state); + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); return enabled; @@ -1765,6 +1769,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, u32 state, ctrl; mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); state = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSS_MASK(pipe); /* @@ -1781,6 +1786,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSC_MASK(pipe); WARN_ON(ctrl << 16 != state); + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); return enabled; @@ -1797,6 +1803,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); #define COND \ ((vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSS_MASK(pipe)) == state) @@ -1817,6 +1824,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, #undef COND out: + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); } @@ -4012,7 +4020,9 @@ static bool vlv_punit_is_power_gated(struct drm_i915_private *dev_priv, u32 reg0 bool ret; mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); ret = (vlv_punit_read(dev_priv, reg0) & SSPM0_SSC_MASK) == SSPM0_SSC_PWR_GATE; + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); return ret; diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index b2fc605e2e29..7c33925f52f9 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -145,30 +145,18 @@ u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) lockdep_assert_held(&i915->pcu_lock); - vlv_punit_get(i915); - vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, SB_CRRDDA_NP, addr, &val); - vlv_punit_put(i915); - return val; } int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val) { - int err; - lockdep_assert_held(&i915->pcu_lock); - vlv_punit_get(i915); - - err = vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, - SB_CRWRDA_NP, addr, &val); - - vlv_punit_put(i915); - - return err; + return vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, + SB_CRWRDA_NP, addr, &val); } u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg) @@ -191,10 +179,8 @@ u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr) { u32 val = 0; - vlv_nc_get(i915); vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_NC, SB_CRRDDA_NP, addr, &val); - vlv_nc_put(i915); return val; } From ebb5eb7d731cc39e29661e0eb9dfe61242817663 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Apr 2019 09:17:21 +0100 Subject: [PATCH 035/591] drm/i915: Replace pcu_lock with sb_lock We now have two locks for sideband access. The general one covering sideband access across all generation, sb_lock, and a specific one covering sideband access via the punit on vlv/chv. After lifting the sb_lock around the punit into the callers, the pcu_lock is now redudant and can be separated from its other use to regulate RPS (essentially giving RPS a lock all of its own). v2: Extract a couple of minor bug fixes. Signed-off-by: Chris Wilson Reviewed-by: Sagar Arun Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20190426081725.31217-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 25 +---- drivers/gpu/drm/i915/i915_drv.h | 10 +- drivers/gpu/drm/i915/i915_irq.c | 4 +- drivers/gpu/drm/i915/i915_sysfs.c | 32 +++--- drivers/gpu/drm/i915/intel_cdclk.c | 28 ----- drivers/gpu/drm/i915/intel_display.c | 6 -- drivers/gpu/drm/i915/intel_hdcp.c | 2 - drivers/gpu/drm/i915/intel_pm.c | 136 +++++++++++------------- drivers/gpu/drm/i915/intel_runtime_pm.c | 10 -- drivers/gpu/drm/i915/intel_sideband.c | 4 - 10 files changed, 85 insertions(+), 172 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f8472db460bb..9545556898e6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1046,8 +1046,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { u32 rpmodectl, freq_sts; - mutex_lock(&dev_priv->pcu_lock); - rpmodectl = I915_READ(GEN6_RP_CONTROL); seq_printf(m, "Video Turbo Mode: %s\n", yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); @@ -1082,7 +1080,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "efficient (RPe) frequency: %d MHz\n", intel_gpu_freq(dev_priv, rps->efficient_freq)); - mutex_unlock(&dev_priv->pcu_lock); } else if (INTEL_GEN(dev_priv) >= 6) { u32 rp_state_limits; u32 gt_perf_status; @@ -1487,12 +1484,9 @@ static int gen6_drpc_info(struct seq_file *m) gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS); } - if (INTEL_GEN(dev_priv) <= 7) { - mutex_lock(&dev_priv->pcu_lock); + if (INTEL_GEN(dev_priv) <= 7) sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); - mutex_unlock(&dev_priv->pcu_lock); - } seq_printf(m, "RC1e Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); @@ -1756,17 +1750,10 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) unsigned int max_gpu_freq, min_gpu_freq; intel_wakeref_t wakeref; int gpu_freq, ia_freq; - int ret; if (!HAS_LLC(dev_priv)) return -ENODEV; - wakeref = intel_runtime_pm_get(dev_priv); - - ret = mutex_lock_interruptible(&dev_priv->pcu_lock); - if (ret) - goto out; - min_gpu_freq = rps->min_freq; max_gpu_freq = rps->max_freq; if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { @@ -1777,6 +1764,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); + wakeref = intel_runtime_pm_get(dev_priv); for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { ia_freq = gpu_freq; sandybridge_pcode_read(dev_priv, @@ -1790,12 +1778,9 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); } - - mutex_unlock(&dev_priv->pcu_lock); - -out: intel_runtime_pm_put(dev_priv, wakeref); - return ret; + + return 0; } static int i915_opregion(struct seq_file *m, void *unused) @@ -2032,13 +2017,11 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) with_intel_runtime_pm_if_in_use(dev_priv, wakeref) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); act_freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(dev_priv); act_freq = (act_freq >> 8) & 0xff; - mutex_unlock(&dev_priv->pcu_lock); } else { act_freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b2215fb7f562..ca1b35d8faca 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -648,6 +648,8 @@ struct intel_rps_ei { }; struct intel_rps { + struct mutex lock; /* protects enabling and the worker */ + /* * work, interrupts_enabled and pm_iir are protected by * dev_priv->irq_lock @@ -1710,14 +1712,6 @@ struct drm_i915_private { */ u32 edram_size_mb; - /* - * Protects RPS/RC6 register access and PCU communication. - * Must be taken after struct_mutex if nested. Note that - * this lock may be held for long periods of time when - * talking to hw - so only take it when talking to hw! - */ - struct mutex pcu_lock; - /* gen6+ GT PM state */ struct intel_gen6_power_mgmt gt_pm; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b92cfd69134b..15f5415a0aa2 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1301,7 +1301,7 @@ static void gen6_pm_rps_work(struct work_struct *work) if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) goto out; - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir); @@ -1367,7 +1367,7 @@ static void gen6_pm_rps_work(struct work_struct *work) rps->last_adj = 0; } - mutex_unlock(&dev_priv->pcu_lock); + mutex_unlock(&rps->lock); out: /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index bfabb3de4808..0952d6a70e1f 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -263,7 +263,6 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, wakeref = intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_punit_get(dev_priv); freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); @@ -273,7 +272,6 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, } else { freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); } - mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv, wakeref); @@ -318,12 +316,12 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, if (val < rps->min_freq || val > rps->max_freq) return -EINVAL; - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); if (val != rps->boost_freq) { rps->boost_freq = val; boost = atomic_read(&rps->num_waiters); } - mutex_unlock(&dev_priv->pcu_lock); + mutex_unlock(&rps->lock); if (boost) schedule_work(&rps->work); @@ -364,17 +362,14 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, return ret; wakeref = intel_runtime_pm_get(dev_priv); - - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); val = intel_freq_opcode(dev_priv, val); - if (val < rps->min_freq || val > rps->max_freq || val < rps->min_freq_softlimit) { - mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv, wakeref); - return -EINVAL; + ret = -EINVAL; + goto unlock; } if (val > rps->rp0_freq) @@ -392,8 +387,8 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, * frequency request may be unchanged. */ ret = intel_set_rps(dev_priv, val); - mutex_unlock(&dev_priv->pcu_lock); - +unlock: + mutex_unlock(&rps->lock); intel_runtime_pm_put(dev_priv, wakeref); return ret ?: count; @@ -423,17 +418,14 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, return ret; wakeref = intel_runtime_pm_get(dev_priv); - - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); val = intel_freq_opcode(dev_priv, val); - if (val < rps->min_freq || val > rps->max_freq || val > rps->max_freq_softlimit) { - mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv, wakeref); - return -EINVAL; + ret = -EINVAL; + goto unlock; } rps->min_freq_softlimit = val; @@ -447,8 +439,8 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, * frequency request may be unchanged. */ ret = intel_set_rps(dev_priv, val); - mutex_unlock(&dev_priv->pcu_lock); - +unlock: + mutex_unlock(&rps->lock); intel_runtime_pm_put(dev_priv, wakeref); return ret ?: count; diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 9dd22203a7e8..2bc5d3227a24 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -464,7 +464,6 @@ static void vlv_get_cdclk(struct drm_i915_private *dev_priv, { u32 val; - mutex_lock(&dev_priv->pcu_lock); vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT)); @@ -477,7 +476,6 @@ static void vlv_get_cdclk(struct drm_i915_private *dev_priv, vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT)); - mutex_unlock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv)) cdclk_state->voltage_level = (val & DSPFREQGUAR_MASK) >> @@ -556,7 +554,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, BIT(VLV_IOSF_SB_BUNIT) | BIT(VLV_IOSF_SB_PUNIT)); - mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); val &= ~DSPFREQGUAR_MASK; val |= (cmd << DSPFREQGUAR_SHIFT); @@ -566,7 +563,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, 50)) { DRM_ERROR("timed out waiting for CDclk change\n"); } - mutex_unlock(&dev_priv->pcu_lock); if (cdclk == 400000) { u32 divider; @@ -639,7 +635,6 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, */ wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); val &= ~DSPFREQGUAR_MASK_CHV; @@ -652,7 +647,6 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, } vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); @@ -731,10 +725,8 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, "trying to change cdclk frequency with cdclk not enabled\n")) return; - mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("failed to inform pcode about cdclk change\n"); return; @@ -783,10 +775,8 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) DRM_ERROR("Switching back to LCPLL failed\n"); - mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, cdclk_state->voltage_level); - mutex_unlock(&dev_priv->pcu_lock); I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); @@ -1025,12 +1015,10 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, */ WARN_ON_ONCE(IS_SKYLAKE(dev_priv) && vco == 8640000); - mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -1094,10 +1082,8 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, POSTING_READ(CDCLK_CTL); /* inform PCU of the change */ - mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, cdclk_state->voltage_level); - mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); } @@ -1394,12 +1380,9 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, * requires us to wait up to 150usec, but that leads to timeouts; * the 2ms used here is based on experiment. */ - mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write_timeout(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, 0x80000000, 150, 2); - mutex_unlock(&dev_priv->pcu_lock); - if (ret) { DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", ret, cdclk); @@ -1429,7 +1412,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, if (pipe != INVALID_PIPE) intel_wait_for_vblank(dev_priv, pipe); - mutex_lock(&dev_priv->pcu_lock); /* * The timeout isn't specified, the 2ms used here is based on * experiment. @@ -1439,8 +1421,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, ret = sandybridge_pcode_write_timeout(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, cdclk_state->voltage_level, 150, 2); - mutex_unlock(&dev_priv->pcu_lock); - if (ret) { DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", ret, cdclk); @@ -1663,12 +1643,10 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, u32 val, divider; int ret; - mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -1707,10 +1685,8 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, intel_wait_for_vblank(dev_priv, pipe); /* inform PCU of the change */ - mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, cdclk_state->voltage_level); - mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); @@ -1849,12 +1825,10 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv, unsigned int vco = cdclk_state->vco; int ret; - mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -1876,10 +1850,8 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv, I915_WRITE(CDCLK_CTL, ICL_CDCLK_CD2X_PIPE_NONE | skl_cdclk_decimal(cdclk)); - mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, cdclk_state->voltage_level); - mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8eef0e732ac9..3cd8273b7186 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5331,10 +5331,8 @@ void hsw_enable_ips(const struct intel_crtc_state *crtc_state) WARN_ON(!(crtc_state->active_planes & ~BIT(PLANE_CURSOR))); if (IS_BROADWELL(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, IPS_ENABLE | IPS_PCODE_CONTROL)); - mutex_unlock(&dev_priv->pcu_lock); /* Quoting Art Runyan: "its not safe to expect any particular * value in IPS_CTL bit 31 after enabling IPS through the * mailbox." Moreover, the mailbox may return a bogus state, @@ -5364,9 +5362,7 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state) return; if (IS_BROADWELL(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0)); - mutex_unlock(&dev_priv->pcu_lock); /* * Wait for PCODE to finish disabling IPS. The BSpec specified * 42ms timeout value leads to occasional timeouts so use 100ms @@ -9506,11 +9502,9 @@ static u32 hsw_read_dcomp(struct drm_i915_private *dev_priv) static void hsw_write_dcomp(struct drm_i915_private *dev_priv, u32 val) { if (IS_HASWELL(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP, val)) DRM_DEBUG_KMS("Failed to write to D_COMP\n"); - mutex_unlock(&dev_priv->pcu_lock); } else { I915_WRITE(D_COMP_BDW, val); POSTING_READ(D_COMP_BDW); diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/intel_hdcp.c index 99b007169c49..2476e867981d 100644 --- a/drivers/gpu/drm/i915/intel_hdcp.c +++ b/drivers/gpu/drm/i915/intel_hdcp.c @@ -213,10 +213,8 @@ static int intel_hdcp_load_keys(struct drm_i915_private *dev_priv) * from other platforms. So GEN9_BC uses the GT Driver Mailbox i/f. */ if (IS_GEN9_BC(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, SKL_PCODE_LOAD_HDCP_KEYS, 1); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to initiate HDCP key load (%d)\n", ret); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9db39ea9bd83..5d4a793a1988 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -317,7 +317,6 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) { u32 val; - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); @@ -334,14 +333,12 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); } static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) { u32 val; - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); @@ -352,7 +349,6 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) vlv_punit_write(dev_priv, PUNIT_REG_DSPSSPM, val); vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); } #define FW_WM(value, plane) \ @@ -2821,11 +2817,9 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, /* read the first set of memory latencies[0:3] */ val = 0; /* data0 to be programmed to 0 for first set */ - mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_READ_MEM_LATENCY, &val); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("SKL Mailbox read error = %d\n", ret); @@ -2842,11 +2836,9 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, /* read the second set of memory latencies[4:7] */ val = 1; /* data0 to be programmed to 1 for second set */ - mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_READ_MEM_LATENCY, &val); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("SKL Mailbox read error = %d\n", ret); return; @@ -3681,13 +3673,10 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) return 0; DRM_DEBUG_KMS("Enabling SAGV\n"); - mutex_lock(&dev_priv->pcu_lock); - ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_ENABLE); /* We don't need to wait for SAGV when enabling */ - mutex_unlock(&dev_priv->pcu_lock); /* * Some skl systems, pre-release machines in particular, @@ -3718,15 +3707,11 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) return 0; DRM_DEBUG_KMS("Disabling SAGV\n"); - mutex_lock(&dev_priv->pcu_lock); - /* bspec says to keep retrying for at least 1 ms */ ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_DISABLE, GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED, 1); - mutex_unlock(&dev_priv->pcu_lock); - /* * Some skl systems, pre-release machines in particular, * don't actually have SAGV. @@ -6143,7 +6128,6 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv) wm->level = VLV_WM_LEVEL_PM2; if (IS_CHERRYVIEW(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); @@ -6175,7 +6159,6 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv) } vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); } for_each_intel_crtc(&dev_priv->drm, crtc) { @@ -6804,7 +6787,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); if (rps->enabled) { u8 freq; @@ -6827,7 +6810,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) rps->max_freq_softlimit))) DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); } - mutex_unlock(&dev_priv->pcu_lock); + mutex_unlock(&rps->lock); } void gen6_rps_idle(struct drm_i915_private *dev_priv) @@ -6841,7 +6824,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) */ gen6_disable_rps_interrupts(dev_priv); - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); if (rps->enabled) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_set_rps_idle(dev_priv); @@ -6851,7 +6834,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } - mutex_unlock(&dev_priv->pcu_lock); + mutex_unlock(&rps->lock); } void gen6_rps_boost(struct i915_request *rq) @@ -6891,7 +6874,7 @@ int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) struct intel_rps *rps = &dev_priv->gt_pm.rps; int err; - lockdep_assert_held(&dev_priv->pcu_lock); + lockdep_assert_held(&rps->lock); GEM_BUG_ON(val > rps->max_freq); GEM_BUG_ON(val < rps->min_freq); @@ -7464,7 +7447,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) unsigned int max_gpu_freq, min_gpu_freq; struct cpufreq_policy *policy; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); + lockdep_assert_held(&rps->lock); if (rps->max_freq <= rps->min_freq) return; @@ -8549,8 +8532,6 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) pm_runtime_get(&dev_priv->drm.pdev->dev); } - mutex_lock(&dev_priv->pcu_lock); - /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) cherryview_init_gt_powersave(dev_priv); @@ -8581,8 +8562,6 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) rps->boost_freq = rps->max_freq; rps->idle_freq = rps->min_freq; rps->cur_freq = rps->idle_freq; - - mutex_unlock(&dev_priv->pcu_lock); } void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) @@ -8608,7 +8587,7 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) { - lockdep_assert_held(&i915->pcu_lock); + lockdep_assert_held(&i915->gt_pm.rps.lock); if (!i915->gt_pm.llc_pstate.enabled) return; @@ -8620,7 +8599,7 @@ static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) static void intel_disable_rc6(struct drm_i915_private *dev_priv) { - lockdep_assert_held(&dev_priv->pcu_lock); + lockdep_assert_held(&dev_priv->gt_pm.rps.lock); if (!dev_priv->gt_pm.rc6.enabled) return; @@ -8639,7 +8618,7 @@ static void intel_disable_rc6(struct drm_i915_private *dev_priv) static void intel_disable_rps(struct drm_i915_private *dev_priv) { - lockdep_assert_held(&dev_priv->pcu_lock); + lockdep_assert_held(&dev_priv->gt_pm.rps.lock); if (!dev_priv->gt_pm.rps.enabled) return; @@ -8660,19 +8639,19 @@ static void intel_disable_rps(struct drm_i915_private *dev_priv) void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&dev_priv->gt_pm.rps.lock); intel_disable_rc6(dev_priv); intel_disable_rps(dev_priv); if (HAS_LLC(dev_priv)) intel_disable_llc_pstate(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); + mutex_unlock(&dev_priv->gt_pm.rps.lock); } static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) { - lockdep_assert_held(&i915->pcu_lock); + lockdep_assert_held(&i915->gt_pm.rps.lock); if (i915->gt_pm.llc_pstate.enabled) return; @@ -8684,7 +8663,7 @@ static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) static void intel_enable_rc6(struct drm_i915_private *dev_priv) { - lockdep_assert_held(&dev_priv->pcu_lock); + lockdep_assert_held(&dev_priv->gt_pm.rps.lock); if (dev_priv->gt_pm.rc6.enabled) return; @@ -8709,7 +8688,7 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; - lockdep_assert_held(&dev_priv->pcu_lock); + lockdep_assert_held(&rps->lock); if (rps->enabled) return; @@ -8744,7 +8723,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) if (intel_vgpu_active(dev_priv)) return; - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&dev_priv->gt_pm.rps.lock); if (HAS_RC6(dev_priv)) intel_enable_rc6(dev_priv); @@ -8753,7 +8732,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) if (HAS_LLC(dev_priv)) intel_enable_llc_pstate(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); + mutex_unlock(&dev_priv->gt_pm.rps.lock); } static void ibx_init_clock_gating(struct drm_i915_private *dev_priv) @@ -9769,22 +9748,20 @@ static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv) } } -int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) +static int +__sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) { int status; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); + lockdep_assert_held(&dev_priv->sb_lock); /* GEN6_PCODE_* are outside of the forcewake domain, we can * use te fw I915_READ variants to reduce the amount of work * required when reading/writing. */ - if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { - DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n", - mbox, __builtin_return_address(0)); + if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) return -EAGAIN; - } I915_WRITE_FW(GEN6_PCODE_DATA, *val); I915_WRITE_FW(GEN6_PCODE_DATA1, 0); @@ -9792,11 +9769,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val if (__intel_wait_for_register_fw(&dev_priv->uncore, GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, - 500, 0, NULL)) { - DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n", - mbox, __builtin_return_address(0)); + 500, 0, NULL)) return -ETIMEDOUT; - } *val = I915_READ_FW(GEN6_PCODE_DATA); I915_WRITE_FW(GEN6_PCODE_DATA, 0); @@ -9806,33 +9780,40 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val else status = gen6_check_mailbox_status(dev_priv); - if (status) { - DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n", - mbox, __builtin_return_address(0), status); - return status; - } - - return 0; + return status; } -int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, - u32 mbox, u32 val, - int fast_timeout_us, int slow_timeout_ms) +int +sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) { int status; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); + mutex_lock(&dev_priv->sb_lock); + status = __sandybridge_pcode_read(dev_priv, mbox, val); + mutex_unlock(&dev_priv->sb_lock); + + if (status) { + DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n", + mbox, __builtin_return_address(0), status); + } + + return status; +} + +static int __sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, + u32 mbox, u32 val, + int fast_timeout_us, + int slow_timeout_ms) +{ + int status; /* GEN6_PCODE_* are outside of the forcewake domain, we can * use te fw I915_READ variants to reduce the amount of work * required when reading/writing. */ - if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { - DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n", - val, mbox, __builtin_return_address(0)); + if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) return -EAGAIN; - } I915_WRITE_FW(GEN6_PCODE_DATA, val); I915_WRITE_FW(GEN6_PCODE_DATA1, 0); @@ -9841,11 +9822,8 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, if (__intel_wait_for_register_fw(&dev_priv->uncore, GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, fast_timeout_us, slow_timeout_ms, - NULL)) { - DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n", - val, mbox, __builtin_return_address(0)); + NULL)) return -ETIMEDOUT; - } I915_WRITE_FW(GEN6_PCODE_DATA, 0); @@ -9854,13 +9832,28 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, else status = gen6_check_mailbox_status(dev_priv); + return status; +} + +int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, + u32 mbox, u32 val, + int fast_timeout_us, + int slow_timeout_ms) +{ + int status; + + mutex_lock(&dev_priv->sb_lock); + status = __sandybridge_pcode_write_timeout(dev_priv, mbox, val, + fast_timeout_us, + slow_timeout_ms); + mutex_unlock(&dev_priv->sb_lock); + if (status) { DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n", val, mbox, __builtin_return_address(0), status); - return status; } - return 0; + return status; } static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, @@ -9869,7 +9862,7 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, { u32 val = request; - *status = sandybridge_pcode_read(dev_priv, mbox, &val); + *status = __sandybridge_pcode_read(dev_priv, mbox, &val); return *status || ((val & reply_mask) == reply); } @@ -9899,7 +9892,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, u32 status; int ret; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); + mutex_lock(&dev_priv->sb_lock); #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \ &status) @@ -9935,6 +9928,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, preempt_enable(); out: + mutex_unlock(&dev_priv->sb_lock); return ret ? ret : status; #undef COND } @@ -10004,7 +9998,7 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) void intel_pm_setup(struct drm_i915_private *dev_priv) { - mutex_init(&dev_priv->pcu_lock); + mutex_init(&dev_priv->gt_pm.rps.lock); mutex_init(&dev_priv->gt_pm.rps.power.mutex); atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index ac8bc5baef40..a80ff35f6c81 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1211,7 +1211,6 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, state = enable ? PUNIT_PWRGT_PWR_ON(pw_idx) : PUNIT_PWRGT_PWR_GATE(pw_idx); - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); #define COND \ @@ -1234,7 +1233,6 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, out: vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); } static void vlv_power_well_enable(struct drm_i915_private *dev_priv, @@ -1261,7 +1259,6 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, mask = PUNIT_PWRGT_MASK(pw_idx); ctrl = PUNIT_PWRGT_PWR_ON(pw_idx); - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask; @@ -1282,7 +1279,6 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, WARN_ON(ctrl != state); vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); return enabled; } @@ -1768,7 +1764,6 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, bool enabled; u32 state, ctrl; - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); state = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSS_MASK(pipe); @@ -1787,7 +1782,6 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, WARN_ON(ctrl << 16 != state); vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); return enabled; } @@ -1802,7 +1796,6 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); #define COND \ @@ -1825,7 +1818,6 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, out: vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); } static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, @@ -4019,11 +4011,9 @@ static bool vlv_punit_is_power_gated(struct drm_i915_private *dev_priv, u32 reg0 { bool ret; - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); ret = (vlv_punit_read(dev_priv, reg0) & SSPM0_SSC_MASK) == SSPM0_SSC_PWR_GATE; vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); return ret; } diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 7c33925f52f9..457b8cad5494 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -143,8 +143,6 @@ u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) { u32 val = 0; - lockdep_assert_held(&i915->pcu_lock); - vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, SB_CRRDDA_NP, addr, &val); @@ -153,8 +151,6 @@ u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val) { - lockdep_assert_held(&i915->pcu_lock); - return vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, SB_CRWRDA_NP, addr, &val); } From 56c5098ffcf8e655ac4e8f0634e44f1cea988590 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Apr 2019 09:17:22 +0100 Subject: [PATCH 036/591] drm/i915: Separate sideband declarations to intel_sideband.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the sideback declarations out of the ginormous i915_drv.h Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190426081725.31217-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile.header-test | 1 + drivers/gpu/drm/i915/i915_debugfs.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 120 -------------------- drivers/gpu/drm/i915/i915_sysfs.c | 2 + drivers/gpu/drm/i915/intel_cdclk.c | 1 + drivers/gpu/drm/i915/intel_display.c | 1 + drivers/gpu/drm/i915/intel_dp.c | 2 + drivers/gpu/drm/i915/intel_dpio_phy.c | 1 + drivers/gpu/drm/i915/intel_dsi_vbt.c | 13 ++- drivers/gpu/drm/i915/intel_hdmi.c | 1 + drivers/gpu/drm/i915/intel_pm.c | 1 + drivers/gpu/drm/i915/intel_runtime_pm.c | 1 + drivers/gpu/drm/i915/intel_sideband.c | 2 + drivers/gpu/drm/i915/intel_sideband.h | 130 ++++++++++++++++++++++ drivers/gpu/drm/i915/vlv_dsi.c | 2 +- drivers/gpu/drm/i915/vlv_dsi_pll.c | 4 +- 16 files changed, 157 insertions(+), 126 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_sideband.h diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index 702e3a7ade4c..325071da0ff7 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -30,6 +30,7 @@ header_test := \ intel_pipe_crc.h \ intel_pm.h \ intel_psr.h \ + intel_sideband.h \ intel_sdvo.h \ intel_sprite.h \ intel_tv.h \ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9545556898e6..28e69e5ae9b3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -42,6 +42,7 @@ #include "intel_hdmi.h" #include "intel_pm.h" #include "intel_psr.h" +#include "intel_sideband.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ca1b35d8faca..c18b28271bfd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -541,11 +541,6 @@ enum intel_pch { PCH_ICP, /* Ice Lake PCH */ }; -enum intel_sbi_destination { - SBI_ICLK, - SBI_MPHY, -}; - #define QUIRK_LVDS_SSC_DISABLE (1<<1) #define QUIRK_INVERT_BRIGHTNESS (1<<2) #define QUIRK_BACKLIGHT_PRESENT (1<<3) @@ -3435,121 +3430,6 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox, int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_base_ms); -/* intel_sideband.c */ - -enum { - VLV_IOSF_SB_BUNIT, - VLV_IOSF_SB_CCK, - VLV_IOSF_SB_CCU, - VLV_IOSF_SB_DPIO, - VLV_IOSF_SB_FLISDSI, - VLV_IOSF_SB_GPIO, - VLV_IOSF_SB_NC, - VLV_IOSF_SB_PUNIT, -}; - -void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports); -u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg); -void vlv_iosf_sb_write(struct drm_i915_private *i915, - u8 port, u32 reg, u32 val); -void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports); - -static inline void vlv_bunit_get(struct drm_i915_private *i915) -{ - vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_BUNIT)); -} - -u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg); -void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val); - -static inline void vlv_bunit_put(struct drm_i915_private *i915) -{ - vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_BUNIT)); -} - -static inline void vlv_cck_get(struct drm_i915_private *i915) -{ - vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_CCK)); -} - -u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg); -void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val); - -static inline void vlv_cck_put(struct drm_i915_private *i915) -{ - vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_CCK)); -} - -static inline void vlv_ccu_get(struct drm_i915_private *i915) -{ - vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_CCU)); -} - -u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg); -void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val); - -static inline void vlv_ccu_put(struct drm_i915_private *i915) -{ - vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_CCU)); -} - -static inline void vlv_dpio_get(struct drm_i915_private *i915) -{ - vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_DPIO)); -} - -u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg); -void vlv_dpio_write(struct drm_i915_private *i915, - enum pipe pipe, int reg, u32 val); - -static inline void vlv_dpio_put(struct drm_i915_private *i915) -{ - vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_DPIO)); -} - -static inline void vlv_flisdsi_get(struct drm_i915_private *i915) -{ - vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_FLISDSI)); -} - -u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg); -void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val); - -static inline void vlv_flisdsi_put(struct drm_i915_private *i915) -{ - vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_FLISDSI)); -} - -static inline void vlv_nc_get(struct drm_i915_private *i915) -{ - vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_NC)); -} - -u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr); - -static inline void vlv_nc_put(struct drm_i915_private *i915) -{ - vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_NC)); -} - -static inline void vlv_punit_get(struct drm_i915_private *i915) -{ - vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_PUNIT)); -} - -u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr); -int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val); - -static inline void vlv_punit_put(struct drm_i915_private *i915) -{ - vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_PUNIT)); -} - -u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, - enum intel_sbi_destination destination); -void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, - enum intel_sbi_destination destination); - /* intel_dpio_phy.c */ void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port, enum dpio_phy *phy, enum dpio_channel *ch); diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 0952d6a70e1f..9bb3a15e4683 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -29,7 +29,9 @@ #include #include #include + #include "intel_drv.h" +#include "intel_sideband.h" #include "i915_drv.h" static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 2bc5d3227a24..cf9c916e8d49 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -23,6 +23,7 @@ #include "intel_cdclk.h" #include "intel_drv.h" +#include "intel_sideband.h" /** * DOC: CDCLK / RAWCLK diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3cd8273b7186..2e2ed2c2b482 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -66,6 +66,7 @@ #include "intel_pm.h" #include "intel_psr.h" #include "intel_sdvo.h" +#include "intel_sideband.h" #include "intel_sprite.h" #include "intel_tv.h" diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 4fc25dcc97d4..08d92570f17f 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -31,6 +31,7 @@ #include #include #include + #include #include @@ -53,6 +54,7 @@ #include "intel_lvds.h" #include "intel_panel.h" #include "intel_psr.h" +#include "intel_sideband.h" #define DP_DPRX_ESI_LEN 14 diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index c784f3daaf51..d80887b5e234 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -23,6 +23,7 @@ #include "intel_dp.h" #include "intel_drv.h" +#include "intel_sideband.h" /** * DOC: DPIO diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c index 2304488f2d35..fbed9064ac7e 100644 --- a/drivers/gpu/drm/i915/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c @@ -24,18 +24,23 @@ * */ -#include -#include -#include #include #include #include -#include