From 13ed13a4dcbf0b664acbf9e6f98ec7851cc59862 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 21 Oct 2019 09:02:11 +0100 Subject: [PATCH 001/154] drm/i915: Don't set queue_priority_hint if we don't kick the submission If we change the priority of the active context, then it has no impact on the decision of whether to preempt the active context -- we don't preempt the context with itself. In this situation, we elide the tasklet rescheduling and should *not* be marking up the queue_priority_hint as that may mask a later submission where we decide we don't have to kick the tasklet as a higher priority submission is pending (spoiler alert, it was not). Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191021080226.537-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_scheduler.c | 37 ++++++++++++++++----------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 0ca40f6bf08c..d2edb527dcb8 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -189,22 +189,34 @@ static inline bool need_preempt(int prio, int active) return prio >= max(I915_PRIORITY_NORMAL, active); } -static void kick_submission(struct intel_engine_cs *engine, int prio) +static void kick_submission(struct intel_engine_cs *engine, + const struct i915_request *rq, + int prio) { - const struct i915_request *inflight = - execlists_active(&engine->execlists); + const struct i915_request *inflight; + + /* + * We only need to kick the tasklet once for the high priority + * new context we add into the queue. + */ + if (prio <= engine->execlists.queue_priority_hint) + return; + + /* Nothing currently active? We're overdue for a submission! */ + inflight = execlists_active(&engine->execlists); + if (!inflight) + return; /* * If we are already the currently executing context, don't - * bother evaluating if we should preempt ourselves, or if - * we expect nothing to change as a result of running the - * tasklet, i.e. we have not change the priority queue - * sufficiently to oust the running context. + * bother evaluating if we should preempt ourselves. */ - if (!inflight || !need_preempt(prio, rq_prio(inflight))) + if (inflight->hw_context == rq->hw_context) return; - tasklet_hi_schedule(&engine->execlists.tasklet); + engine->execlists.queue_priority_hint = prio; + if (need_preempt(prio, rq_prio(inflight))) + tasklet_hi_schedule(&engine->execlists.tasklet); } static void __i915_schedule(struct i915_sched_node *node, @@ -330,13 +342,8 @@ static void __i915_schedule(struct i915_sched_node *node, list_move_tail(&node->link, cache.priolist); } - if (prio <= engine->execlists.queue_priority_hint) - continue; - - engine->execlists.queue_priority_hint = prio; - /* Defer (tasklet) submission until after all of our updates. */ - kick_submission(engine, prio); + kick_submission(engine, node_to_request(node), prio); } spin_unlock(&engine->active.lock); From 8f4b1068e7fc3df1a77ac8151767e56b208cc87f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jul 2019 17:43:40 +0300 Subject: [PATCH 002/154] drm/i915: Check some transcoder timing minimum limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ILK+ the documented min hdisplay is 64, min hblank is 32, and min vblank is 5. On earlier platforms min hblank is also 32, and min vblank is 3. Make sure the mode satisfies those limits. There are further limits for HDMI and pfit use cases, but we'll check for those in a more specific location. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190718144340.1114-2-ville.syrjala@linux.intel.com Reviewed-by: Manasi Navare --- drivers/gpu/drm/i915/display/intel_display.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 2912abd85148..236fdf122e47 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -16294,6 +16294,21 @@ intel_mode_valid(struct drm_device *dev, mode->vtotal > vtotal_max) return MODE_V_ILLEGAL; + if (INTEL_GEN(dev_priv) >= 5) { + if (mode->hdisplay < 64 || + mode->htotal - mode->hdisplay < 32) + return MODE_H_ILLEGAL; + + if (mode->vtotal - mode->vdisplay < 5) + return MODE_V_ILLEGAL; + } else { + if (mode->htotal - mode->hdisplay < 32) + return MODE_H_ILLEGAL; + + if (mode->vtotal - mode->vdisplay < 3) + return MODE_V_ILLEGAL; + } + return MODE_OK; } From 928da10c0ca2aee50099d63f20feeec71344ae67 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 21 Oct 2019 17:21:46 +0100 Subject: [PATCH 003/154] drm/i915/selftests: Use all physical engines for i915_active i915_active must track over any engine, so expand the selftest to iterate over all uabi engines. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191021162146.1686-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_active.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 268192b5613b..96513a7d4739 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -79,7 +79,6 @@ __live_active_setup(struct drm_i915_private *i915) struct intel_engine_cs *engine; struct i915_sw_fence *submit; struct live_active *active; - enum intel_engine_id id; unsigned int count = 0; int err = 0; @@ -97,7 +96,7 @@ __live_active_setup(struct drm_i915_private *i915) if (err) goto out; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { struct i915_request *rq; rq = i915_request_create(engine->kernel_context); From b5e8e954eb672e7128e50589c43817b0ffce6f75 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 21 Oct 2019 18:43:39 +0100 Subject: [PATCH 004/154] drm/i915/gt: Introduce barrier pulses along engines To flush idle barriers, and even inflight requests, we want to send a preemptive 'pulse' along an engine. We use a no-op request along the pinned kernel_context at high priority so that it should run or else kick off the stuck requests. We can use this to ensure idle barriers are immediately flushed, as part of a context cancellation mechanism, or as part of a heartbeat mechanism to detect and reset a stuck GPU. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191021174339.5389-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 3 +- .../gpu/drm/i915/gt/intel_engine_heartbeat.c | 77 +++++++++ .../gpu/drm/i915/gt/intel_engine_heartbeat.h | 15 ++ drivers/gpu/drm/i915/gt/intel_engine_pm.c | 2 +- .../drm/i915/gt/selftest_engine_heartbeat.c | 159 ++++++++++++++++++ drivers/gpu/drm/i915/i915_active.c | 1 + drivers/gpu/drm/i915/i915_priolist_types.h | 1 + .../drm/i915/selftests/i915_live_selftests.h | 1 + 8 files changed, 257 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h create mode 100644 drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index a16a2daef977..2fd4bed188e5 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -78,8 +78,9 @@ gt-y += \ gt/intel_breadcrumbs.o \ gt/intel_context.o \ gt/intel_engine_cs.o \ - gt/intel_engine_pool.o \ + gt/intel_engine_heartbeat.o \ gt/intel_engine_pm.o \ + gt/intel_engine_pool.o \ gt/intel_engine_user.o \ gt/intel_gt.o \ gt/intel_gt_irq.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c new file mode 100644 index 000000000000..4b9ab7813d54 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -0,0 +1,77 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_request.h" + +#include "intel_context.h" +#include "intel_engine_heartbeat.h" +#include "intel_engine_pm.h" +#include "intel_engine.h" +#include "intel_gt.h" + +static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq) +{ + engine->wakeref_serial = READ_ONCE(engine->serial) + 1; + i915_request_add_active_barriers(rq); +} + +int intel_engine_pulse(struct intel_engine_cs *engine) +{ + struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER }; + struct intel_context *ce = engine->kernel_context; + struct i915_request *rq; + int err = 0; + + if (!intel_engine_has_preemption(engine)) + return -ENODEV; + + if (!intel_engine_pm_get_if_awake(engine)) + return 0; + + if (mutex_lock_interruptible(&ce->timeline->mutex)) + goto out_rpm; + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); + intel_context_exit(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unlock; + } + + rq->flags |= I915_REQUEST_SENTINEL; + idle_pulse(engine, rq); + + __i915_request_commit(rq); + __i915_request_queue(rq, &attr); + +out_unlock: + mutex_unlock(&ce->timeline->mutex); +out_rpm: + intel_engine_pm_put(engine); + return err; +} + +int intel_engine_flush_barriers(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + if (llist_empty(&engine->barrier_tasks)) + return 0; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + idle_pulse(engine, rq); + i915_request_add(rq); + + return 0; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_engine_heartbeat.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h new file mode 100644 index 000000000000..b334e5aaf78d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_ENGINE_HEARTBEAT_H +#define INTEL_ENGINE_HEARTBEAT_H + +struct intel_engine_cs; + +int intel_engine_pulse(struct intel_engine_cs *engine); +int intel_engine_flush_barriers(struct intel_engine_cs *engine); + +#endif /* INTEL_ENGINE_HEARTBEAT_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 67eb6183648a..7d76611d9df1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -111,7 +111,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) i915_request_add_active_barriers(rq); /* Install ourselves as a preemption barrier */ - rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE; + rq->sched.attr.priority = I915_PRIORITY_BARRIER; __i915_request_commit(rq); /* Release our exclusive hold on the engine */ diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c new file mode 100644 index 000000000000..1f5ab59ad6e7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -0,0 +1,159 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "i915_drv.h" + +#include "intel_gt_requests.h" +#include "i915_selftest.h" + +struct pulse { + struct i915_active active; + struct kref kref; +}; + +static int pulse_active(struct i915_active *active) +{ + kref_get(&container_of(active, struct pulse, active)->kref); + return 0; +} + +static void pulse_free(struct kref *kref) +{ + kfree(container_of(kref, struct pulse, kref)); +} + +static void pulse_put(struct pulse *p) +{ + kref_put(&p->kref, pulse_free); +} + +static void pulse_retire(struct i915_active *active) +{ + pulse_put(container_of(active, struct pulse, active)); +} + +static struct pulse *pulse_create(void) +{ + struct pulse *p; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return p; + + kref_init(&p->kref); + i915_active_init(&p->active, pulse_active, pulse_retire); + + return p; +} + +static int __live_idle_pulse(struct intel_engine_cs *engine, + int (*fn)(struct intel_engine_cs *cs)) +{ + struct pulse *p; + int err; + + p = pulse_create(); + if (!p) + return -ENOMEM; + + err = i915_active_acquire(&p->active); + if (err) + goto out; + + err = i915_active_acquire_preallocate_barrier(&p->active, engine); + if (err) { + i915_active_release(&p->active); + goto out; + } + + i915_active_acquire_barrier(&p->active); + i915_active_release(&p->active); + + GEM_BUG_ON(i915_active_is_idle(&p->active)); + + err = fn(engine); + if (err) + goto out; + + if (intel_gt_retire_requests_timeout(engine->gt, HZ / 5)) { + err = -ETIME; + goto out; + } + + if (!i915_active_is_idle(&p->active)) { + pr_err("%s: heartbeat pulse did not flush idle tasks\n", + engine->name); + err = -EINVAL; + goto out; + } + +out: + pulse_put(p); + return err; +} + +static int live_idle_flush(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that we can flush the idle barriers */ + + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); + err = __live_idle_pulse(engine, intel_engine_flush_barriers); + intel_engine_pm_put(engine); + if (err) + break; + } + + return err; +} + +static int live_idle_pulse(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that heartbeat pulses flush the idle barriers */ + + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); + err = __live_idle_pulse(engine, intel_engine_pulse); + intel_engine_pm_put(engine); + if (err && err != -ENODEV) + break; + + err = 0; + } + + return err; +} + +int intel_heartbeat_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_idle_flush), + SUBTEST(live_idle_pulse), + }; + int saved_hangcheck; + int err; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + saved_hangcheck = i915_modparams.enable_hangcheck; + i915_modparams.enable_hangcheck = INT_MAX; + + err = intel_gt_live_subtests(tests, &i915->gt); + + i915_modparams.enable_hangcheck = saved_hangcheck; + return err; +} diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 7927b1a0c7a6..07d39f22a2c3 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -595,6 +595,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct llist_node *pos, *next; int err; + GEM_BUG_ON(i915_active_is_idle(ref)); GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); /* diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 21037a2e2038..ae8bb3cb627e 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -39,6 +39,7 @@ enum { * active request. */ #define I915_PRIORITY_UNPREEMPTABLE INT_MAX +#define I915_PRIORITY_BARRIER INT_MAX #define __NO_PREEMPTION (I915_PRIORITY_WAIT) diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 6daf6599ec79..00a063730bc3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -17,6 +17,7 @@ selftest(gt_timelines, intel_timeline_live_selftests) selftest(gt_contexts, intel_context_live_selftests) selftest(gt_lrc, intel_lrc_live_selftests) selftest(gt_pm, intel_gt_pm_live_selftests) +selftest(gt_heartbeat, intel_heartbeat_live_selftests) selftest(requests, i915_request_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) From 71e51ca8dcc1155c914f285e4a71a5586f31a597 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 21 Oct 2019 19:32:35 +0100 Subject: [PATCH 005/154] drm/i915: Lift i915_vma_parked() onto the gt Currently even though i915_vma_parked() operates on a per-gt struct, it is called from a global pm notify. This oddity was only because the long term plan is to decouple the vma cache from the pm notification, but right now the oddity stands out like a sore thumb! Suggested-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191021183236.21790-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 1 - drivers/gpu/drm/i915/gt/intel_gt_pm.c | 1 + drivers/gpu/drm/i915/i915_vma.c | 32 +++++++++++++------------- drivers/gpu/drm/i915/i915_vma.h | 2 +- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 7987b54fb1f5..2aa7e9be088f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -23,7 +23,6 @@ static int pm_notifier(struct notifier_block *nb, break; case INTEL_GT_PARK: - i915_vma_parked(i915); break; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index b866d5b1eee0..fde5112b6650 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -71,6 +71,7 @@ static int __gt_park(struct intel_wakeref *wf) pm_notify(gt, INTEL_GT_PARK); intel_gt_park_requests(gt); + i915_vma_parked(gt); i915_pmu_gt_parked(i915); if (INTEL_GEN(i915) >= 6) gen6_rps_idle(i915); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e90c4d0af8fd..d733bcf262f0 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -106,7 +106,7 @@ vma_create(struct drm_i915_gem_object *obj, struct rb_node *rb, **p; /* The aliasing_ppgtt should never be used directly! */ - GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm); + GEM_BUG_ON(vm == &vm->gt->ggtt->alias->vm); vma = i915_vma_alloc(); if (vma == NULL) @@ -412,7 +412,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) int err; /* Access through the GTT requires the device to be awake. */ - assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm); + assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm); if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { err = -ENODEV; goto err; @@ -945,7 +945,7 @@ err_pages: void i915_vma_close(struct i915_vma *vma) { - struct drm_i915_private *i915 = vma->vm->i915; + struct intel_gt *gt = vma->vm->gt; unsigned long flags; GEM_BUG_ON(i915_vma_is_closed(vma)); @@ -962,18 +962,18 @@ void i915_vma_close(struct i915_vma *vma) * causing us to rebind the VMA once more. This ends up being a lot * of wasted work for the steady state. */ - spin_lock_irqsave(&i915->gt.closed_lock, flags); - list_add(&vma->closed_link, &i915->gt.closed_vma); - spin_unlock_irqrestore(&i915->gt.closed_lock, flags); + spin_lock_irqsave(>->closed_lock, flags); + list_add(&vma->closed_link, >->closed_vma); + spin_unlock_irqrestore(>->closed_lock, flags); } static void __i915_vma_remove_closed(struct i915_vma *vma) { - struct drm_i915_private *i915 = vma->vm->i915; + struct intel_gt *gt = vma->vm->gt; - spin_lock_irq(&i915->gt.closed_lock); + spin_lock_irq(>->closed_lock); list_del_init(&vma->closed_link); - spin_unlock_irq(&i915->gt.closed_lock); + spin_unlock_irq(>->closed_lock); } void i915_vma_reopen(struct i915_vma *vma) @@ -1009,12 +1009,12 @@ void i915_vma_destroy(struct i915_vma *vma) i915_vma_free(vma); } -void i915_vma_parked(struct drm_i915_private *i915) +void i915_vma_parked(struct intel_gt *gt) { struct i915_vma *vma, *next; - spin_lock_irq(&i915->gt.closed_lock); - list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) { + spin_lock_irq(>->closed_lock); + list_for_each_entry_safe(vma, next, >->closed_vma, closed_link) { struct drm_i915_gem_object *obj = vma->obj; struct i915_address_space *vm = vma->vm; @@ -1028,7 +1028,7 @@ void i915_vma_parked(struct drm_i915_private *i915) obj = NULL; } - spin_unlock_irq(&i915->gt.closed_lock); + spin_unlock_irq(>->closed_lock); if (obj) { i915_vma_destroy(vma); @@ -1038,11 +1038,11 @@ void i915_vma_parked(struct drm_i915_private *i915) i915_vm_close(vm); /* Restart after dropping lock */ - spin_lock_irq(&i915->gt.closed_lock); - next = list_first_entry(&i915->gt.closed_vma, + spin_lock_irq(>->closed_lock); + next = list_first_entry(>->closed_vma, typeof(*next), closed_link); } - spin_unlock_irq(&i915->gt.closed_lock); + spin_unlock_irq(>->closed_lock); } static void __i915_vma_iounmap(struct i915_vma *vma) diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 858908e3d1cc..465932813bc5 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -462,7 +462,7 @@ i915_vma_unpin_fence(struct i915_vma *vma) __i915_vma_unpin_fence(vma); } -void i915_vma_parked(struct drm_i915_private *i915); +void i915_vma_parked(struct intel_gt *gt); #define for_each_until(cond) if (cond) break; else From 18f3b2727fc324db1a1787704d97b8f20a5fb1cb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 21 Oct 2019 19:32:36 +0100 Subject: [PATCH 006/154] drm/i915: Remove pm park/unpark notifications With the last user, i915_vma_parked(), retired, there are no more users of the per-gt pm notifications and we can remove the unused infrastructure. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191021183236.21790-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 25 ------------------------ drivers/gpu/drm/i915/gem/i915_gem_pm.h | 2 -- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 10 ---------- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 5 ----- drivers/gpu/drm/i915/gt/intel_gt_types.h | 2 -- drivers/gpu/drm/i915/i915_gem.c | 1 - 6 files changed, 45 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 2aa7e9be088f..ee3279c76566 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -11,24 +11,6 @@ #include "i915_drv.h" -static int pm_notifier(struct notifier_block *nb, - unsigned long action, - void *data) -{ - struct drm_i915_private *i915 = - container_of(nb, typeof(*i915), gem.pm_notifier); - - switch (action) { - case INTEL_GT_UNPARK: - break; - - case INTEL_GT_PARK: - break; - } - - return NOTIFY_OK; -} - static bool switch_to_kernel_context_sync(struct intel_gt *gt) { bool result = !intel_gt_is_wedged(gt); @@ -206,10 +188,3 @@ err_wedged: } goto out_unlock; } - -void i915_gem_init__pm(struct drm_i915_private *i915) -{ - i915->gem.pm_notifier.notifier_call = pm_notifier; - blocking_notifier_chain_register(&i915->gt.pm_notifications, - &i915->gem.pm_notifier); -} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h index 6f7d5d11ac3b..a017572778d5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h @@ -12,8 +12,6 @@ struct drm_i915_private; struct work_struct; -void i915_gem_init__pm(struct drm_i915_private *i915); - bool i915_gem_load_power_context(struct drm_i915_private *i915); void i915_gem_resume(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index fde5112b6650..427aded512f2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -16,11 +16,6 @@ #include "intel_rc6.h" #include "intel_wakeref.h" -static void pm_notify(struct intel_gt *gt, int state) -{ - blocking_notifier_call_chain(>->pm_notifications, state, gt->i915); -} - static int __gt_unpark(struct intel_wakeref *wf) { struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); @@ -55,8 +50,6 @@ static int __gt_unpark(struct intel_wakeref *wf) intel_gt_queue_hangcheck(gt); intel_gt_unpark_requests(gt); - pm_notify(gt, INTEL_GT_UNPARK); - return 0; } @@ -68,7 +61,6 @@ static int __gt_park(struct intel_wakeref *wf) GEM_TRACE("\n"); - pm_notify(gt, INTEL_GT_PARK); intel_gt_park_requests(gt); i915_vma_parked(gt); @@ -96,8 +88,6 @@ static const struct intel_wakeref_ops wf_ops = { void intel_gt_pm_init_early(struct intel_gt *gt) { intel_wakeref_init(>->wakeref, gt->uncore->rpm, &wf_ops); - - BLOCKING_INIT_NOTIFIER_HEAD(>->pm_notifications); } void intel_gt_pm_init(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 997770d3a968..0ed87da4bb68 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -12,11 +12,6 @@ #include "intel_gt_types.h" #include "intel_wakeref.h" -enum { - INTEL_GT_UNPARK, - INTEL_GT_PARK, -}; - static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt) { return intel_wakeref_is_active(>->wakeref); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index ae4aaf75ac78..980973e66e7f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -83,8 +83,6 @@ struct intel_gt { struct intel_llc llc; struct intel_rc6 rc6; - struct blocking_notifier_head pm_notifications; - ktime_t last_init_time; struct i915_vma *scratch; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index dd0a3271b4e2..81eecbdf9685 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1432,7 +1432,6 @@ static void i915_gem_init__mm(struct drm_i915_private *i915) void i915_gem_init_early(struct drm_i915_private *dev_priv) { i915_gem_init__mm(dev_priv); - i915_gem_init__pm(dev_priv); spin_lock_init(&dev_priv->fb_tracking.lock); } From aa9eb0caaa0352820607076fc9b755f84fe71f22 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 22 Oct 2019 10:58:51 +0100 Subject: [PATCH 007/154] drm/i915/selftests: Set vm->gt backpointer for mock_ppgtt Add the backpointer to ppgtt and i915->gt so that we can traverse across the device hierarchy. Reported-by: Matthew Auld Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191022095851.23442-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 1 + drivers/gpu/drm/i915/selftests/mock_gtt.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index cb8c3a501cc7..bce1c855cf2f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -181,6 +181,7 @@ struct drm_i915_private *mock_gem_device(void) intel_timelines_init(i915); mock_init_ggtt(i915, &i915->ggtt); + i915->gt.ggtt = &i915->ggtt; mkwrite_device_info(i915)->engine_mask = BIT(0); diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 173f2d4dbd14..9ec93dc27fb5 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -63,6 +63,7 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) if (!ppgtt) return NULL; + ppgtt->vm.gt = &i915->gt; ppgtt->vm.i915 = i915; ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE); ppgtt->vm.file = ERR_PTR(-ENODEV); From adcb52649498d1727da1f9137d3c611dedb0214c Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 22 Oct 2019 10:47:15 +0100 Subject: [PATCH 008/154] drm/i915: Pass intel_gt to intel_engines_init_mmio Engines belong to the GT so make it indicative in the API. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191022094726.3001-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 4 +++- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 9 +++++---- drivers/gpu/drm/i915/i915_drv.c | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 93ea367fe624..638b19544acd 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -20,6 +20,8 @@ struct drm_printer; +struct intel_gt; + /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, * but keeps the logic simple. Indeed, the whole purpose of this macro is just * to give some inclination as to some of the magic values used in the various @@ -322,7 +324,7 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) return (head - tail - CACHELINE_BYTES) & (size - 1); } -int intel_engines_init_mmio(struct drm_i915_private *i915); +int intel_engines_init_mmio(struct intel_gt *gt); int intel_engines_setup(struct drm_i915_private *i915); int intel_engines_init(struct drm_i915_private *i915); void intel_engines_cleanup(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 051734c9b733..735037f11cc5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -396,12 +396,13 @@ void intel_engines_cleanup(struct drm_i915_private *i915) /** * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers - * @i915: the i915 device + * @gt: pointer to struct intel_gt * * Return: non-zero if the initialization failed. */ -int intel_engines_init_mmio(struct drm_i915_private *i915) +int intel_engines_init_mmio(struct intel_gt *gt) { + struct drm_i915_private *i915 = gt->i915; struct intel_device_info *device_info = mkwrite_device_info(i915); const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask; unsigned int mask = 0; @@ -419,7 +420,7 @@ int intel_engines_init_mmio(struct drm_i915_private *i915) if (!HAS_ENGINE(i915, i)) continue; - err = intel_engine_setup(&i915->gt, i); + err = intel_engine_setup(gt, i); if (err) goto cleanup; @@ -436,7 +437,7 @@ int intel_engines_init_mmio(struct drm_i915_private *i915) RUNTIME_INFO(i915)->num_engines = hweight32(mask); - intel_gt_check_and_clear_faults(&i915->gt); + intel_gt_check_and_clear_faults(gt); intel_setup_engine_capabilities(i915); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 157ed22052a2..c4c7caa73f23 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -598,7 +598,7 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) intel_uc_init_mmio(&dev_priv->gt.uc); - ret = intel_engines_init_mmio(dev_priv); + ret = intel_engines_init_mmio(&dev_priv->gt); if (ret) goto err_uncore; From 3ea951c693a288445ddb720b52fad59b06fe1042 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 22 Oct 2019 10:47:16 +0100 Subject: [PATCH 009/154] drm/i915: Pass intel_gt to intel_setup_engine_capabilities Engines belong to the GT so make it indicative in the API. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191022094726.3001-3-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 735037f11cc5..a46ba24ba2e1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -370,12 +370,12 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine) } } -static void intel_setup_engine_capabilities(struct drm_i915_private *i915) +static void intel_setup_engine_capabilities(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) + for_each_engine(engine, gt, id) __setup_engine_capabilities(engine); } @@ -439,7 +439,7 @@ int intel_engines_init_mmio(struct intel_gt *gt) intel_gt_check_and_clear_faults(gt); - intel_setup_engine_capabilities(i915); + intel_setup_engine_capabilities(gt); return 0; From b0258bf24252a487a03dfc94aeea72940ad3fe52 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 22 Oct 2019 10:47:17 +0100 Subject: [PATCH 010/154] drm/i915: Pass intel_gt to intel_engines_cleanup Engines belong to the GT so make it indicative in the API. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191022094726.3001-4-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 2 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 15 ++++++++------- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 638b19544acd..8675d8ea8c5a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -327,7 +327,7 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) int intel_engines_init_mmio(struct intel_gt *gt); int intel_engines_setup(struct drm_i915_private *i915); int intel_engines_init(struct drm_i915_private *i915); -void intel_engines_cleanup(struct drm_i915_private *i915); +void intel_engines_cleanup(struct intel_gt *gt); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index a46ba24ba2e1..ce7cab474e8c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -381,16 +381,17 @@ static void intel_setup_engine_capabilities(struct intel_gt *gt) /** * intel_engines_cleanup() - free the resources allocated for Command Streamers - * @i915: the i915 devic + * @gt: pointer to struct intel_gt */ -void intel_engines_cleanup(struct drm_i915_private *i915) +void intel_engines_cleanup(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { engine->destroy(engine); - i915->engine[id] = NULL; + gt->engine[id] = NULL; + gt->i915->engine[id] = NULL; } } @@ -444,7 +445,7 @@ int intel_engines_init_mmio(struct intel_gt *gt) return 0; cleanup: - intel_engines_cleanup(i915); + intel_engines_cleanup(gt); return err; } @@ -475,7 +476,7 @@ int intel_engines_init(struct drm_i915_private *i915) return 0; cleanup: - intel_engines_cleanup(i915); + intel_engines_cleanup(&i915->gt); return err; } @@ -659,7 +660,7 @@ int intel_engines_setup(struct drm_i915_private *i915) return 0; cleanup: - intel_engines_cleanup(i915); + intel_engines_cleanup(&i915->gt); return err; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c4c7caa73f23..5138d1eed306 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -621,7 +621,7 @@ err_bridge: */ static void i915_driver_mmio_release(struct drm_i915_private *dev_priv) { - intel_engines_cleanup(dev_priv); + intel_engines_cleanup(&dev_priv->gt); intel_teardown_mchbar(dev_priv); intel_uncore_fini_mmio(&dev_priv->uncore); pci_dev_put(dev_priv->bridge_dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 81eecbdf9685..fcc35912e42a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1328,7 +1328,7 @@ err_init_hw: err_uc_init: if (ret != -EIO) { intel_uc_fini(&dev_priv->gt.uc); - intel_engines_cleanup(dev_priv); + intel_engines_cleanup(&dev_priv->gt); } err_context: if (ret != -EIO) @@ -1397,7 +1397,7 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv) void i915_gem_driver_release(struct drm_i915_private *dev_priv) { - intel_engines_cleanup(dev_priv); + intel_engines_cleanup(&dev_priv->gt); i915_gem_driver_release__contexts(dev_priv); intel_gt_driver_release(&dev_priv->gt); From 78f606033b6e48dbe382272ae0a18e2045a856b7 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 22 Oct 2019 10:47:18 +0100 Subject: [PATCH 011/154] drm/i915: Pass intel_gt to intel_engines_setup Engines belong to the GT so make it indicative in the API. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191022094726.3001-5-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 2 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 10 +++++----- drivers/gpu/drm/i915/i915_gem.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 8675d8ea8c5a..a947bbb60a22 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -325,7 +325,7 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) } int intel_engines_init_mmio(struct intel_gt *gt); -int intel_engines_setup(struct drm_i915_private *i915); +int intel_engines_setup(struct intel_gt *gt); int intel_engines_init(struct drm_i915_private *i915); void intel_engines_cleanup(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index ce7cab474e8c..af64f9079b0f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -623,26 +623,26 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) /** * intel_engines_setup- setup engine state not requiring hw access - * @i915: Device to setup. + * @gt: pointer to struct intel_gt * * Initializes engine structure members shared between legacy and execlists * submission modes which do not require hardware access. * * Typically done early in the submission mode specific engine setup stage. */ -int intel_engines_setup(struct drm_i915_private *i915) +int intel_engines_setup(struct intel_gt *gt) { int (*setup)(struct intel_engine_cs *engine); struct intel_engine_cs *engine; enum intel_engine_id id; int err; - if (HAS_EXECLISTS(i915)) + if (HAS_EXECLISTS(gt->i915)) setup = intel_execlists_submission_setup; else setup = intel_ring_submission_setup; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { err = intel_engine_setup_common(engine); if (err) goto cleanup; @@ -660,7 +660,7 @@ int intel_engines_setup(struct drm_i915_private *i915) return 0; cleanup: - intel_engines_cleanup(&i915->gt); + intel_engines_cleanup(gt); return err; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fcc35912e42a..fb45c7ecc607 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1249,7 +1249,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) intel_gt_init(&dev_priv->gt); - ret = intel_engines_setup(dev_priv); + ret = intel_engines_setup(&dev_priv->gt); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_unlock; From 7841fcbdfb4340819c889b67662f6dd8d5685c8d Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 22 Oct 2019 10:47:19 +0100 Subject: [PATCH 012/154] drm/i915: Pass intel_gt to intel_engines_init Engines belong to the GT so make it indicative in the API. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191022094726.3001-6-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 2 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 10 +++++----- drivers/gpu/drm/i915/i915_gem.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index a947bbb60a22..c2d9d67c63d9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -326,7 +326,7 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) int intel_engines_init_mmio(struct intel_gt *gt); int intel_engines_setup(struct intel_gt *gt); -int intel_engines_init(struct drm_i915_private *i915); +int intel_engines_init(struct intel_gt *gt); void intel_engines_cleanup(struct intel_gt *gt); int intel_engine_init_common(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index af64f9079b0f..0e20713603ec 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -451,23 +451,23 @@ cleanup: /** * intel_engines_init() - init the Engine Command Streamers - * @i915: i915 device private + * @gt: pointer to struct intel_gt * * Return: non-zero if the initialization failed. */ -int intel_engines_init(struct drm_i915_private *i915) +int intel_engines_init(struct intel_gt *gt) { int (*init)(struct intel_engine_cs *engine); struct intel_engine_cs *engine; enum intel_engine_id id; int err; - if (HAS_EXECLISTS(i915)) + if (HAS_EXECLISTS(gt->i915)) init = intel_execlists_submission_init; else init = intel_ring_submission_init; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { err = init(engine); if (err) goto cleanup; @@ -476,7 +476,7 @@ int intel_engines_init(struct drm_i915_private *i915) return 0; cleanup: - intel_engines_cleanup(&i915->gt); + intel_engines_cleanup(gt); return err; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fb45c7ecc607..34273269f8f9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1261,7 +1261,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_scratch; } - ret = intel_engines_init(dev_priv); + ret = intel_engines_init(&dev_priv->gt); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_context; From 7f63aa23526a8a0e17dd98205e52af78e3a660c3 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 22 Oct 2019 10:47:20 +0100 Subject: [PATCH 013/154] drm/i915: Pass intel_gt to intel_engines_verify_workarounds Engines belong to the GT so make it indicative in the API. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191022094726.3001-7-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_gem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 34273269f8f9..e86f0a4e4c6a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1198,7 +1198,7 @@ out: return err; } -static int intel_engines_verify_workarounds(struct drm_i915_private *i915) +static int intel_engines_verify_workarounds(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1207,7 +1207,7 @@ static int intel_engines_verify_workarounds(struct drm_i915_private *i915) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return 0; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { if (intel_engine_verify_workarounds(engine, "load")) err = -EIO; } @@ -1291,7 +1291,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) */ intel_init_clock_gating(dev_priv); - ret = intel_engines_verify_workarounds(dev_priv); + ret = intel_engines_verify_workarounds(&dev_priv->gt); if (ret) goto err_gt; From 8726a2a4df4db2bc7e5ac3f9b5c582bbd384ce6b Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 22 Oct 2019 10:47:21 +0100 Subject: [PATCH 014/154] drm/i915: Split drop caches into GT and i915 parts Just compartmentalizes code a bit more. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191022094726.3001-8-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ada57eee914a..16211430eb78 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3648,17 +3648,11 @@ i915_drop_caches_get(void *data, u64 *val) return 0; } - static int -i915_drop_caches_set(void *data, u64 val) +gt_drop_caches(struct intel_gt *gt, u64 val) { - struct drm_i915_private *i915 = data; - struct intel_gt *gt = &i915->gt; int ret; - DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", - val, val & DROP_ALL); - if (val & DROP_RESET_ACTIVE && wait_for(intel_engines_are_idle(gt), I915_IDLE_ENGINES_TIMEOUT)) intel_gt_set_wedged(gt); @@ -3681,6 +3675,22 @@ i915_drop_caches_set(void *data, u64 val) if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt)) intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL); + return 0; +} + +static int +i915_drop_caches_set(void *data, u64 val) +{ + struct drm_i915_private *i915 = data; + int ret; + + DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", + val, val & DROP_ALL); + + ret = gt_drop_caches(&i915->gt, val); + if (ret) + return ret; + fs_reclaim_acquire(GFP_KERNEL); if (val & DROP_BOUND) i915_gem_shrink(i915, LONG_MAX, NULL, I915_SHRINK_BOUND); From 2271a223e06b59182cfb0a840395903d87288d66 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 22 Oct 2019 10:47:22 +0100 Subject: [PATCH 015/154] drm/i915/selftests: Convert eviction selftests to gt/ggtt Convert the test code to work directly on what it needs rather than going through the top-level i915. This enables another natural usage for for_each_engine(.., gt, ..). Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191022094726.3001-9-tvrtko.ursulin@linux.intel.com --- .../gpu/drm/i915/selftests/i915_gem_evict.c | 100 +++++++++--------- 1 file changed, 51 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 0af9a58d011d..42e948144f1b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -43,8 +43,7 @@ static void quirk_add(struct drm_i915_gem_object *obj, list_add(&obj->st_link, objects); } -static int populate_ggtt(struct drm_i915_private *i915, - struct list_head *objects) +static int populate_ggtt(struct i915_ggtt *ggtt, struct list_head *objects) { unsigned long unbound, bound, count; struct drm_i915_gem_object *obj; @@ -53,7 +52,8 @@ static int populate_ggtt(struct drm_i915_private *i915, do { struct i915_vma *vma; - obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(ggtt->vm.i915, + I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -70,7 +70,7 @@ static int populate_ggtt(struct drm_i915_private *i915, count++; } while (1); pr_debug("Filled GGTT with %lu pages [%llu total]\n", - count, i915->ggtt.vm.total / PAGE_SIZE); + count, ggtt->vm.total / PAGE_SIZE); bound = 0; unbound = 0; @@ -96,7 +96,7 @@ static int populate_ggtt(struct drm_i915_private *i915, return -EINVAL; } - if (list_empty(&i915->ggtt.vm.bound_list)) { + if (list_empty(&ggtt->vm.bound_list)) { pr_err("No objects on the GGTT inactive list!\n"); return -EINVAL; } @@ -104,17 +104,16 @@ static int populate_ggtt(struct drm_i915_private *i915, return 0; } -static void unpin_ggtt(struct drm_i915_private *i915) +static void unpin_ggtt(struct i915_ggtt *ggtt) { struct i915_vma *vma; - list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link) + list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) if (vma->obj->mm.quirked) i915_vma_unpin(vma); } -static void cleanup_objects(struct drm_i915_private *i915, - struct list_head *list) +static void cleanup_objects(struct i915_ggtt *ggtt, struct list_head *list) { struct drm_i915_gem_object *obj, *on; @@ -124,19 +123,19 @@ static void cleanup_objects(struct drm_i915_private *i915, i915_gem_object_put(obj); } - i915_gem_drain_freed_objects(i915); + i915_gem_drain_freed_objects(ggtt->vm.i915); } static int igt_evict_something(void *arg) { - struct drm_i915_private *i915 = arg; - struct i915_ggtt *ggtt = &i915->ggtt; + struct intel_gt *gt = arg; + struct i915_ggtt *ggtt = gt->ggtt; LIST_HEAD(objects); int err; /* Fill the GGTT with pinned objects and try to evict one. */ - err = populate_ggtt(i915, &objects); + err = populate_ggtt(ggtt, &objects); if (err) goto cleanup; @@ -153,7 +152,7 @@ static int igt_evict_something(void *arg) goto cleanup; } - unpin_ggtt(i915); + unpin_ggtt(ggtt); /* Everything is unpinned, we should be able to evict something */ mutex_lock(&ggtt->vm.mutex); @@ -169,13 +168,14 @@ static int igt_evict_something(void *arg) } cleanup: - cleanup_objects(i915, &objects); + cleanup_objects(ggtt, &objects); return err; } static int igt_overcommit(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_ggtt *ggtt = gt->ggtt; struct drm_i915_gem_object *obj; struct i915_vma *vma; LIST_HEAD(objects); @@ -185,11 +185,11 @@ static int igt_overcommit(void *arg) * We expect it to fail. */ - err = populate_ggtt(i915, &objects); + err = populate_ggtt(ggtt, &objects); if (err) goto cleanup; - obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(gt->i915, I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto cleanup; @@ -205,14 +205,14 @@ static int igt_overcommit(void *arg) } cleanup: - cleanup_objects(i915, &objects); + cleanup_objects(ggtt, &objects); return err; } static int igt_evict_for_vma(void *arg) { - struct drm_i915_private *i915 = arg; - struct i915_ggtt *ggtt = &i915->ggtt; + struct intel_gt *gt = arg; + struct i915_ggtt *ggtt = gt->ggtt; struct drm_mm_node target = { .start = 0, .size = 4096, @@ -222,7 +222,7 @@ static int igt_evict_for_vma(void *arg) /* Fill the GGTT with pinned objects and try to evict a range. */ - err = populate_ggtt(i915, &objects); + err = populate_ggtt(ggtt, &objects); if (err) goto cleanup; @@ -236,7 +236,7 @@ static int igt_evict_for_vma(void *arg) goto cleanup; } - unpin_ggtt(i915); + unpin_ggtt(ggtt); /* Everything is unpinned, we should be able to evict the node */ mutex_lock(&ggtt->vm.mutex); @@ -249,7 +249,7 @@ static int igt_evict_for_vma(void *arg) } cleanup: - cleanup_objects(i915, &objects); + cleanup_objects(ggtt, &objects); return err; } @@ -262,8 +262,8 @@ static void mock_color_adjust(const struct drm_mm_node *node, static int igt_evict_for_cache_color(void *arg) { - struct drm_i915_private *i915 = arg; - struct i915_ggtt *ggtt = &i915->ggtt; + struct intel_gt *gt = arg; + struct i915_ggtt *ggtt = gt->ggtt; const unsigned long flags = PIN_OFFSET_FIXED; struct drm_mm_node target = { .start = I915_GTT_PAGE_SIZE * 2, @@ -284,7 +284,7 @@ static int igt_evict_for_cache_color(void *arg) ggtt->vm.mm.color_adjust = mock_color_adjust; GEM_BUG_ON(!i915_vm_has_cache_coloring(&ggtt->vm)); - obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(gt->i915, I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto cleanup; @@ -300,7 +300,7 @@ static int igt_evict_for_cache_color(void *arg) goto cleanup; } - obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(gt->i915, I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto cleanup; @@ -345,22 +345,22 @@ static int igt_evict_for_cache_color(void *arg) err = 0; cleanup: - unpin_ggtt(i915); - cleanup_objects(i915, &objects); + unpin_ggtt(ggtt); + cleanup_objects(ggtt, &objects); ggtt->vm.mm.color_adjust = NULL; return err; } static int igt_evict_vm(void *arg) { - struct drm_i915_private *i915 = arg; - struct i915_ggtt *ggtt = &i915->ggtt; + struct intel_gt *gt = arg; + struct i915_ggtt *ggtt = gt->ggtt; LIST_HEAD(objects); int err; /* Fill the GGTT with pinned objects and try to evict everything. */ - err = populate_ggtt(i915, &objects); + err = populate_ggtt(ggtt, &objects); if (err) goto cleanup; @@ -374,7 +374,7 @@ static int igt_evict_vm(void *arg) goto cleanup; } - unpin_ggtt(i915); + unpin_ggtt(ggtt); mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_vm(&ggtt->vm); @@ -386,14 +386,16 @@ static int igt_evict_vm(void *arg) } cleanup: - cleanup_objects(i915, &objects); + cleanup_objects(ggtt, &objects); return err; } static int igt_evict_contexts(void *arg) { const u64 PRETEND_GGTT_SIZE = 16ull << 20; - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_ggtt *ggtt = gt->ggtt; + struct drm_i915_private *i915 = gt->i915; struct intel_engine_cs *engine; enum intel_engine_id id; struct reserved { @@ -423,10 +425,10 @@ static int igt_evict_contexts(void *arg) /* Reserve a block so that we know we have enough to fit a few rq */ memset(&hole, 0, sizeof(hole)); - mutex_lock(&i915->ggtt.vm.mutex); - err = i915_gem_gtt_insert(&i915->ggtt.vm, &hole, + mutex_lock(&ggtt->vm.mutex); + err = i915_gem_gtt_insert(&ggtt->vm, &hole, PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE, - 0, i915->ggtt.vm.total, + 0, ggtt->vm.total, PIN_NOEVICT); if (err) goto out_locked; @@ -436,17 +438,17 @@ static int igt_evict_contexts(void *arg) do { struct reserved *r; - mutex_unlock(&i915->ggtt.vm.mutex); + mutex_unlock(&ggtt->vm.mutex); r = kcalloc(1, sizeof(*r), GFP_KERNEL); - mutex_lock(&i915->ggtt.vm.mutex); + mutex_lock(&ggtt->vm.mutex); if (!r) { err = -ENOMEM; goto out_locked; } - if (i915_gem_gtt_insert(&i915->ggtt.vm, &r->node, + if (i915_gem_gtt_insert(&ggtt->vm, &r->node, 1ul << 20, 0, I915_COLOR_UNEVICTABLE, - 0, i915->ggtt.vm.total, + 0, ggtt->vm.total, PIN_NOEVICT)) { kfree(r); break; @@ -458,11 +460,11 @@ static int igt_evict_contexts(void *arg) count++; } while (1); drm_mm_remove_node(&hole); - mutex_unlock(&i915->ggtt.vm.mutex); + mutex_unlock(&ggtt->vm.mutex); pr_info("Filled GGTT with %lu 1MiB nodes\n", count); /* Overfill the GGTT with context objects and so try to evict one. */ - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_sw_fence fence; struct drm_file *file; @@ -518,7 +520,7 @@ static int igt_evict_contexts(void *arg) break; } - mutex_lock(&i915->ggtt.vm.mutex); + mutex_lock(&ggtt->vm.mutex); out_locked: if (igt_flush_test(i915)) err = -EIO; @@ -532,7 +534,7 @@ out_locked: } if (drm_mm_node_allocated(&hole)) drm_mm_remove_node(&hole); - mutex_unlock(&i915->ggtt.vm.mutex); + mutex_unlock(&ggtt->vm.mutex); intel_runtime_pm_put(&i915->runtime_pm, wakeref); return err; @@ -556,7 +558,7 @@ int i915_gem_evict_mock_selftests(void) return -ENOMEM; with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = i915_subtests(tests, i915); + err = i915_subtests(tests, &i915->gt); drm_dev_put(&i915->drm); return err; @@ -571,5 +573,5 @@ int i915_gem_evict_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_subtests(tests, i915); + return intel_gt_live_subtests(tests, &i915->gt); } From 6457099ac59e7aa43d0a3fc8f9d5cff508981295 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 22 Oct 2019 10:47:23 +0100 Subject: [PATCH 016/154] drm/i915/selftests: Use GT engines in mock_gem_device Just freeing up two more call sites from passing in i915 to for_each_engine. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191022094726.3001-10-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index bce1c855cf2f..c2f5775b6d58 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -40,14 +40,14 @@ void mock_device_flush(struct drm_i915_private *i915) { + struct intel_gt *gt = &i915->gt; struct intel_engine_cs *engine; enum intel_engine_id id; do { - for_each_engine(engine, i915, id) + for_each_engine(engine, gt, id) mock_engine_flush(engine); - } while (intel_gt_retire_requests_timeout(&i915->gt, - MAX_SCHEDULE_TIMEOUT)); + } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT)); } static void mock_device_release(struct drm_device *dev) @@ -60,7 +60,7 @@ static void mock_device_release(struct drm_device *dev) i915_gem_drain_workqueue(i915); - for_each_engine(engine, i915, id) + for_each_engine(engine, &i915->gt, id) mock_engine_free(engine); i915_gem_driver_release__contexts(i915); From d1a03ee7e9d024e0aa140b7959e2a5232cab392a Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 22 Oct 2019 10:47:24 +0100 Subject: [PATCH 017/154] drm/i915/selftests: Use GT engines in igt_live_test Frees up two call sites from passing i915 to for_each_engine. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191022094726.3001-11-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/selftests/igt_live_test.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 810b60100c2c..c130010a7033 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -16,6 +16,7 @@ int igt_live_test_begin(struct igt_live_test *t, const char *func, const char *name) { + struct intel_gt *gt = &i915->gt; struct intel_engine_cs *engine; enum intel_engine_id id; int err; @@ -24,7 +25,7 @@ int igt_live_test_begin(struct igt_live_test *t, t->func = func; t->name = name; - err = intel_gt_wait_for_idle(&i915->gt, MAX_SCHEDULE_TIMEOUT); + err = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); if (err) { pr_err("%s(%s): failed to idle before, with err=%d!", func, name, err); @@ -33,7 +34,7 @@ int igt_live_test_begin(struct igt_live_test *t, t->reset_global = i915_reset_count(&i915->gpu_error); - for_each_engine(engine, i915, id) + for_each_engine(engine, gt, id) t->reset_engine[id] = i915_reset_engine_count(&i915->gpu_error, engine); @@ -56,7 +57,7 @@ int igt_live_test_end(struct igt_live_test *t) return -EIO; } - for_each_engine(engine, i915, id) { + for_each_engine(engine, &i915->gt, id) { if (t->reset_engine[id] == i915_reset_engine_count(&i915->gpu_error, engine)) continue; From 51757cf4d7e6e1e35c25128b085fd1dabebdf4e8 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 22 Oct 2019 10:47:26 +0100 Subject: [PATCH 018/154] drm/i915/selftests: Use for_each_uabi_engine in contex selftests Contexts are not testing physical engines so it makes sense to use the uabi iterator. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191022094726.3001-13-tvrtko.ursulin@linux.intel.com --- .../gpu/drm/i915/gem/selftests/i915_gem_context.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index e5c235051ae5..8f72f173db03 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -32,7 +32,6 @@ static int live_nop_switch(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; struct i915_gem_context **ctx; - enum intel_engine_id id; struct igt_live_test t; struct drm_file *file; unsigned long n; @@ -67,7 +66,7 @@ static int live_nop_switch(void *arg) } } - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { struct i915_request *rq; unsigned long end_time, prime; ktime_t times[2] = {}; @@ -583,7 +582,6 @@ static int igt_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - enum intel_engine_id id; int err = -ENODEV; /* @@ -595,7 +593,7 @@ static int igt_ctx_exec(void *arg) if (!DRIVER_CAPS(i915)->has_logical_contexts) return 0; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { struct drm_i915_gem_object *obj = NULL; unsigned long ncontexts, ndwords, dw; struct i915_request *tq[5] = {}; @@ -711,7 +709,6 @@ static int igt_shared_ctx_exec(void *arg) struct i915_request *tq[5] = {}; struct i915_gem_context *parent; struct intel_engine_cs *engine; - enum intel_engine_id id; struct igt_live_test t; struct drm_file *file; int err = 0; @@ -743,7 +740,7 @@ static int igt_shared_ctx_exec(void *arg) if (err) goto out_file; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { unsigned long ncontexts, ndwords, dw; struct drm_i915_gem_object *obj = NULL; IGT_TIMEOUT(end_time); @@ -1651,7 +1648,6 @@ static int igt_vm_isolation(void *arg) struct drm_file *file; I915_RND_STATE(prng); unsigned long count; - unsigned int id; u64 vm_total; int err; @@ -1692,7 +1688,7 @@ static int igt_vm_isolation(void *arg) vm_total -= I915_GTT_PAGE_SIZE; count = 0; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { IGT_TIMEOUT(end_time); unsigned long this = 0; From e948761f5b024284dc423172e9aca0672f3dec5b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 22 Oct 2019 11:17:04 +0100 Subject: [PATCH 019/154] drm/i915/selftests: Make the mman object busy everywhere Loop over all engines, issuing a request for the object on each in order to make sure we leave no stone unturned when creating an active ref. The purpose is to make sure that we can reap a zombie object (one that is only alive due to an active reference on the GPU) no matter where that active reference emanates from. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191022101704.5618-1-chris@chris-wilson.co.uk --- .../drm/i915/gem/selftests/i915_gem_mman.c | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 65d4dbf91999..d45a93928ff5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -515,20 +515,19 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct intel_engine_cs *engine; - enum intel_engine_id id; - struct i915_vma *vma; - int err; - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return err; - - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { struct i915_request *rq; + struct i915_vma *vma; + int err; + + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { @@ -544,12 +543,13 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) i915_vma_unlock(vma); i915_request_add(rq); + i915_vma_unpin(vma); + if (err) + return err; } - i915_vma_unpin(vma); i915_gem_object_put(obj); /* leave it only alive via its active ref */ - - return err; + return 0; } static bool assert_mmap_offset(struct drm_i915_private *i915, From 0587152bf9a0d7ebfd7fcb401068a742027adb2a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 22 Oct 2019 13:28:45 +0100 Subject: [PATCH 020/154] drm/i915: Drop assertion that ce->pin_mutex guards state updates The actual conditions are that we know the GPU is not accessing the context, and we hold a pin on the context image to allow CPU access. We used a fake lock on ce->pin_mutex so that we could try and use lockdep to assert that access is serialised, but the various different hardirq/softirq contexts where we need to *fake* holding the pin_mutex are causing more trouble. Still it would be nice if we did have a way to reassure ourselves that the direct update to the context image is serialised with GPU execution. In the meantime, stop lockdep complaining about false irq inversions. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111923 Signed-off-by: Chris Wilson Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191022122845.25038-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 2 -- drivers/gpu/drm/i915/gt/intel_lrc.c | 16 ---------------- drivers/gpu/drm/i915/gt/selftest_lrc.c | 5 ----- drivers/gpu/drm/i915/i915_perf.c | 1 - 4 files changed, 24 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 427aded512f2..06e73d56cfcf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -176,9 +176,7 @@ int intel_gt_resume(struct intel_gt *gt) ce = engine->kernel_context; if (ce) { GEM_BUG_ON(!intel_context_is_pinned(ce)); - mutex_acquire(&ce->pin_mutex.dep_map, 0, 0, _THIS_IP_); ce->ops->reset(ce); - mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_); } engine->serial++; /* kernel context lost */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d0088d020220..f9f3e985bb79 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -235,16 +235,6 @@ static void execlists_init_reg_state(u32 *reg_state, const struct intel_ring *ring, bool close); -static void __context_pin_acquire(struct intel_context *ce) -{ - mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _RET_IP_); -} - -static void __context_pin_release(struct intel_context *ce) -{ - mutex_release(&ce->pin_mutex.dep_map, 0, _RET_IP_); -} - static void mark_eio(struct i915_request *rq) { if (i915_request_completed(rq)) @@ -2792,9 +2782,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) ce = rq->hw_context; GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); - /* Proclaim we have exclusive access to the context image! */ - __context_pin_acquire(ce); - rq = active_request(rq); if (!rq) { /* Idle context; tidy up the ring so we can restart afresh */ @@ -2860,7 +2847,6 @@ out_replay: __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine); ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ - __context_pin_release(ce); unwind: /* Push back any incomplete requests for replay after the reset. */ @@ -4502,7 +4488,6 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, bool scrub) { GEM_BUG_ON(!intel_context_is_pinned(ce)); - __context_pin_acquire(ce); /* * We want a simple context + ring to execute the breadcrumb update. @@ -4528,7 +4513,6 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, intel_ring_update_space(ce->ring); __execlists_update_reg_state(ce, engine); - __context_pin_release(ce); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 5dc679781a08..7516d1c90925 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -168,12 +168,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) } GEM_BUG_ON(!ce[1]->ring->size); intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); - - local_irq_disable(); /* appease lockdep */ - __context_pin_acquire(ce[1]); __execlists_update_reg_state(ce[1], engine); - __context_pin_release(ce[1]); - local_irq_enable(); rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); if (IS_ERR(rq[0])) { diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index d2ac51fe4f04..3130b0c7ed83 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2615,7 +2615,6 @@ void i915_oa_init_reg_state(const struct intel_context *ce, struct i915_perf_stream *stream; /* perf.exclusive_stream serialised by gen8_configure_all_contexts() */ - lockdep_assert_held(&ce->pin_mutex); if (engine->class != RENDER_CLASS) return; From a201b00e5248bb6882ff7ed9e488c999019e729a Mon Sep 17 00:00:00 2001 From: James Ausmus Date: Thu, 17 Oct 2019 12:42:03 -0700 Subject: [PATCH 021/154] drm/i915/aml: Allow SPT PCH for all AML devices Even the AML devices that behave like CFLs can be paired with an SPT PCH. Allow this to happen without blowing up dmesg. BSpec: 33665 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112013 Cc: Quanxian Wang Cc: Rodrigo Vivi Signed-off-by: James Ausmus Reviewed-by: Lyude Paul Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20191017194203.9645-1-james.ausmus@intel.com --- drivers/gpu/drm/i915/intel_pch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index 1035d3d46fd8..bb1cb6f12a50 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -52,7 +52,8 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) return PCH_SPT; case INTEL_PCH_SPT_LP_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n"); - WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)); + WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) && + !IS_COFFEELAKE(dev_priv)); return PCH_SPT; case INTEL_PCH_KBP_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Kaby Lake PCH (KBP)\n"); From 7867d709959927e5df04a34f98880e5e394b411d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 22 Oct 2019 15:45:01 +0100 Subject: [PATCH 022/154] drm/i915/gem: Distinguish each object type Separate each object class into a separate lock type to avoid lockdep cross-contamination between paths (i.e. userptr!). Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191022144501.26486-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 3 ++- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 3 ++- drivers/gpu/drm/i915/gem/i915_gem_object.c | 5 +++-- drivers/gpu/drm/i915/gem/i915_gem_object.h | 3 ++- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 3 ++- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 3 ++- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 3 ++- drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c | 3 ++- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 8 +++++--- drivers/gpu/drm/i915/gvt/dmabuf.c | 3 ++- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 3 ++- drivers/gpu/drm/i915/selftests/mock_region.c | 3 ++- 12 files changed, 28 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 96ce95c8ac5a..eaea49d08eb5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -256,6 +256,7 @@ static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = { struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) { + static struct lock_class_key lock_class; struct dma_buf_attachment *attach; struct drm_i915_gem_object *obj; int ret; @@ -287,7 +288,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, } drm_gem_private_object_init(dev, &obj->base, dma_buf->size); - i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); + i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class); obj->base.import_attach = attach; obj->base.resv = dma_buf->resv; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index 5ae694c24df4..9cfb0e41ff06 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -164,6 +164,7 @@ struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *i915, phys_addr_t size) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; @@ -178,7 +179,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops); + i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class); /* * Mark the object as volatile, such that the pages are marked as diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index dbf9be9a79f4..a50296cce0d8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -47,9 +47,10 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) } void i915_gem_object_init(struct drm_i915_gem_object *obj, - const struct drm_i915_gem_object_ops *ops) + const struct drm_i915_gem_object_ops *ops, + struct lock_class_key *key) { - mutex_init(&obj->mm.lock); + __mutex_init(&obj->mm.lock, "obj->mm.lock", key); spin_lock_init(&obj->vma.lock); INIT_LIST_HEAD(&obj->vma.list); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 85921796851f..aead7e6725f9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -23,7 +23,8 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void); void i915_gem_object_free(struct drm_i915_gem_object *obj); void i915_gem_object_init(struct drm_i915_gem_object *obj, - const struct drm_i915_gem_object_ops *ops); + const struct drm_i915_gem_object_ops *ops, + struct lock_class_key *key); struct drm_i915_gem_object * i915_gem_object_create_shmem(struct drm_i915_private *i915, resource_size_t size); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index be68b76e13b3..4d69c3fc3439 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -465,6 +465,7 @@ create_shmem(struct intel_memory_region *mem, resource_size_t size, unsigned int flags) { + static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; struct drm_i915_gem_object *obj; struct address_space *mapping; @@ -491,7 +492,7 @@ create_shmem(struct intel_memory_region *mem, mapping_set_gfp_mask(mapping, mask); GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); - i915_gem_object_init(obj, &i915_gem_shmem_ops); + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class); obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 57cd8bc2657c..a2d49c04e6a4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -556,6 +556,7 @@ __i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, struct drm_mm_node *stolen, struct intel_memory_region *mem) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; int err = -ENOMEM; @@ -565,7 +566,7 @@ __i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, goto err; drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size); - i915_gem_object_init(obj, &i915_gem_object_stolen_ops); + i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class); obj->stolen = stolen; obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 4f970474013f..1e045c337044 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -725,6 +725,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + static struct lock_class_key lock_class; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_userptr *args = data; struct drm_i915_gem_object *obj; @@ -769,7 +770,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -ENOMEM; drm_gem_private_object_init(dev, &obj->base, args->user_size); - i915_gem_object_init(obj, &i915_gem_userptr_ops); + i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index 3c5d17b2b670..892d12db6c49 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -96,6 +96,7 @@ huge_gem_object(struct drm_i915_private *i915, phys_addr_t phys_size, dma_addr_t dma_size) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; @@ -111,7 +112,7 @@ huge_gem_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); - i915_gem_object_init(obj, &huge_ops); + i915_gem_object_init(obj, &huge_ops, &lock_class); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index f27772f6779a..dac8344507c1 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -149,6 +149,7 @@ huge_pages_object(struct drm_i915_private *i915, u64 size, unsigned int page_mask) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; GEM_BUG_ON(!size); @@ -165,7 +166,7 @@ huge_pages_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &huge_page_ops); + i915_gem_object_init(obj, &huge_page_ops, &lock_class); i915_gem_object_set_volatile(obj); @@ -295,6 +296,7 @@ static const struct drm_i915_gem_object_ops fake_ops_single = { static struct drm_i915_gem_object * fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; GEM_BUG_ON(!size); @@ -313,9 +315,9 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) drm_gem_private_object_init(&i915->drm, &obj->base, size); if (single) - i915_gem_object_init(obj, &fake_ops_single); + i915_gem_object_init(obj, &fake_ops_single, &lock_class); else - i915_gem_object_init(obj, &fake_ops); + i915_gem_object_init(obj, &fake_ops, &lock_class); i915_gem_object_set_volatile(obj); diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 13044c027f27..a816aef6142b 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -152,6 +152,7 @@ static const struct drm_i915_gem_object_ops intel_vgpu_gem_ops = { static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, struct intel_vgpu_fb_info *info) { + static struct lock_class_key lock_class; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; @@ -161,7 +162,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, drm_gem_private_object_init(dev, &obj->base, roundup(info->size, PAGE_SIZE)); - i915_gem_object_init(obj, &intel_vgpu_gem_ops); + i915_gem_object_init(obj, &intel_vgpu_gem_ops, &lock_class); obj->read_domains = I915_GEM_DOMAIN_GTT; obj->write_domain = 0; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index ebe735df6504..a1cb072e4a1b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -104,6 +104,7 @@ static const struct drm_i915_gem_object_ops fake_ops = { static struct drm_i915_gem_object * fake_dma_object(struct drm_i915_private *i915, u64 size) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; GEM_BUG_ON(!size); @@ -117,7 +118,7 @@ fake_dma_object(struct drm_i915_private *i915, u64 size) goto err; drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &fake_ops); + i915_gem_object_init(obj, &fake_ops, &lock_class); i915_gem_object_set_volatile(obj); diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 7b0c99ddc2d5..b2ad41c27e67 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -19,6 +19,7 @@ mock_object_create(struct intel_memory_region *mem, resource_size_t size, unsigned int flags) { + static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; struct drm_i915_gem_object *obj; @@ -30,7 +31,7 @@ mock_object_create(struct intel_memory_region *mem, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &mock_region_obj_ops); + i915_gem_object_init(obj, &mock_region_obj_ops, &lock_class); obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; From ae2e28b02630072134392e75b1062eef69aeeff8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 22 Oct 2019 15:19:35 +0100 Subject: [PATCH 023/154] drm/i915: Teach record_defaults to operate on the intel_gt Again we wish to operate on the engines, which are owned by the intel_gt. As such it is easier, and much more consistent, to pass the intel_gt parameter. v2: Unexport i915_gem_load_power_context() Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191022141935.15733-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 7 +------ drivers/gpu/drm/i915/gem/i915_gem_pm.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 14 ++++++++------ drivers/gpu/drm/i915/selftests/i915_selftest.c | 13 +++++++++++-- 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index ee3279c76566..9bc0cf3139e3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -37,11 +37,6 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt) return result; } -bool i915_gem_load_power_context(struct drm_i915_private *i915) -{ - return switch_to_kernel_context_sync(&i915->gt); -} - static void user_forcewake(struct intel_gt *gt, bool suspend) { int count = atomic_read(>->user_wakeref); @@ -171,7 +166,7 @@ void i915_gem_resume(struct drm_i915_private *i915) intel_uc_resume(&i915->gt.uc); /* Always reload a context for powersaving. */ - if (!i915_gem_load_power_context(i915)) + if (!switch_to_kernel_context_sync(&i915->gt)) goto err_wedged; user_forcewake(&i915->gt, false); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h index a017572778d5..26b78dbdc225 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h @@ -12,7 +12,6 @@ struct drm_i915_private; struct work_struct; -bool i915_gem_load_power_context(struct drm_i915_private *i915); void i915_gem_resume(struct drm_i915_private *i915); void i915_gem_idle_work_handler(struct work_struct *work); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e86f0a4e4c6a..319e96d833fa 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -48,6 +48,7 @@ #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_mocs.h" #include "gt/intel_reset.h" #include "gt/intel_renderstate.h" @@ -1069,7 +1070,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915) intel_runtime_pm_put(&i915->runtime_pm, wakeref); } -static int __intel_engines_record_defaults(struct drm_i915_private *i915) +static int __intel_engines_record_defaults(struct intel_gt *gt) { struct i915_request *requests[I915_NUM_ENGINES] = {}; struct intel_engine_cs *engine; @@ -1085,7 +1086,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) * from the same default HW values. */ - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct intel_context *ce; struct i915_request *rq; @@ -1093,7 +1094,8 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) GEM_BUG_ON(!engine->kernel_context); engine->serial++; /* force the kernel context switch */ - ce = intel_context_create(i915->kernel_context, engine); + ce = intel_context_create(engine->kernel_context->gem_context, + engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out; @@ -1122,7 +1124,7 @@ err_rq: } /* Flush the default context image to memory, and enable powersaving. */ - if (!i915_gem_load_power_context(i915)) { + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { err = -EIO; goto out; } @@ -1181,7 +1183,7 @@ out: * this is by declaring ourselves wedged. */ if (err) - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); for (id = 0; id < ARRAY_SIZE(requests); id++) { struct intel_context *ce; @@ -1295,7 +1297,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) goto err_gt; - ret = __intel_engines_record_defaults(dev_priv); + ret = __intel_engines_record_defaults(&dev_priv->gt); if (ret) goto err_gt; diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 825a8286cbe8..92c9193cdc85 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -23,8 +23,9 @@ #include -#include "../i915_drv.h" -#include "../i915_selftest.h" +#include "gt/intel_gt_pm.h" +#include "i915_drv.h" +#include "i915_selftest.h" #include "igt_flush_test.h" @@ -256,6 +257,10 @@ int __i915_live_setup(void *data) { struct drm_i915_private *i915 = data; + /* The selftests expect an idle system */ + if (intel_gt_pm_wait_for_idle(&i915->gt)) + return -EIO; + return intel_gt_terminally_wedged(&i915->gt); } @@ -275,6 +280,10 @@ int __intel_gt_live_setup(void *data) { struct intel_gt *gt = data; + /* The selftests expect an idle system */ + if (intel_gt_pm_wait_for_idle(gt)) + return -EIO; + return intel_gt_terminally_wedged(gt); } From c31c9e82ee8ae032b0ed495ae8b8207a6734452b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 22 Oct 2019 14:02:21 +0100 Subject: [PATCH 024/154] drm/i915/selftests: Teach switch_to_context() to use the context The context details which engines to use, so use the ctx->engines[] to generate the requests to cause the context switch. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191022130221.20644-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 97f89f744ee2..84e7ca778b7b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -15,16 +15,15 @@ #include "igt_flush_test.h" #include "mock_drm.h" -static int switch_to_context(struct drm_i915_private *i915, - struct i915_gem_context *ctx) +static int switch_to_context(struct i915_gem_context *ctx) { - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct i915_gem_engines_iter it; + struct intel_context *ce; - for_each_engine(engine, i915, id) { + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { struct i915_request *rq; - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -140,7 +139,7 @@ static int igt_gem_suspend(void *arg) err = -ENOMEM; ctx = live_context(i915, file); if (!IS_ERR(ctx)) - err = switch_to_context(i915, ctx); + err = switch_to_context(ctx); if (err) goto out; @@ -155,7 +154,7 @@ static int igt_gem_suspend(void *arg) pm_resume(i915); - err = switch_to_context(i915, ctx); + err = switch_to_context(ctx); out: mock_file_free(i915, file); return err; @@ -175,7 +174,7 @@ static int igt_gem_hibernate(void *arg) err = -ENOMEM; ctx = live_context(i915, file); if (!IS_ERR(ctx)) - err = switch_to_context(i915, ctx); + err = switch_to_context(ctx); if (err) goto out; @@ -190,7 +189,7 @@ static int igt_gem_hibernate(void *arg) pm_resume(i915); - err = switch_to_context(i915, ctx); + err = switch_to_context(ctx); out: mock_file_free(i915, file); return err; From 905da43c6a02b57232c1f087b94bc606f6376632 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 22 Oct 2019 14:10:16 +0100 Subject: [PATCH 025/154] drm/i915/selftests: Move uncore fw selftests to operate on intel_gt Forcewake is the speciality of the GT, so it is natural to run the intel_uncore_forcewake tests over the GT. So pass intel_gt as the parameter to our selftests. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20191022131016.9065-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/intel_uncore.c | 54 ++++++++++--------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 0ffb141eb988..0e4e6be0101d 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -140,19 +140,19 @@ static int live_forcewake_ops(void *arg) } }; const struct reg *r; - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_uncore_forcewake_domain *domain; - struct intel_uncore *uncore = &i915->uncore; + struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; enum intel_engine_id id; intel_wakeref_t wakeref; unsigned int tmp; int err = 0; - GEM_BUG_ON(i915->gt.awake); + GEM_BUG_ON(gt->awake); /* vlv/chv with their pcu behave differently wrt reads */ - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) { pr_debug("PCU fakes forcewake badly; skipping\n"); return 0; } @@ -170,15 +170,15 @@ static int live_forcewake_ops(void *arg) /* We have to pick carefully to get the exact behaviour we need */ for (r = registers; r->name; r++) - if (r->platforms & INTEL_INFO(i915)->gen_mask) + if (r->platforms & INTEL_INFO(gt->i915)->gen_mask) break; if (!r->name) { pr_debug("Forcewaked register not known for %s; skipping\n", - intel_platform_name(INTEL_INFO(i915)->platform)); + intel_platform_name(INTEL_INFO(gt->i915)->platform)); return 0; } - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + wakeref = intel_runtime_pm_get(uncore->rpm); for_each_fw_domain(domain, uncore, tmp) { smp_store_mb(domain->active, false); @@ -188,7 +188,7 @@ static int live_forcewake_ops(void *arg) intel_uncore_fw_release_timer(&domain->timer); } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { i915_reg_t mmio = _MMIO(engine->mmio_base + r->offset); u32 __iomem *reg = uncore->regs + engine->mmio_base + r->offset; enum forcewake_domains fw_domains; @@ -249,22 +249,22 @@ static int live_forcewake_ops(void *arg) } out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + intel_runtime_pm_put(uncore->rpm, wakeref); return err; } static int live_forcewake_domains(void *arg) { #define FW_RANGE 0x40000 - struct drm_i915_private *dev_priv = arg; - struct intel_uncore *uncore = &dev_priv->uncore; + struct intel_gt *gt = arg; + struct intel_uncore *uncore = gt->uncore; unsigned long *valid; u32 offset; int err; - if (!HAS_FPGA_DBG_UNCLAIMED(dev_priv) && - !IS_VALLEYVIEW(dev_priv) && - !IS_CHERRYVIEW(dev_priv)) + if (!HAS_FPGA_DBG_UNCLAIMED(gt->i915) && + !IS_VALLEYVIEW(gt->i915) && + !IS_CHERRYVIEW(gt->i915)) return 0; /* @@ -283,7 +283,7 @@ static int live_forcewake_domains(void *arg) for (offset = 0; offset < FW_RANGE; offset += 4) { i915_reg_t reg = { offset }; - (void)I915_READ_FW(reg); + intel_uncore_posting_read_fw(uncore, reg); if (!check_for_unclaimed_mmio(uncore)) set_bit(offset, valid); } @@ -300,7 +300,7 @@ static int live_forcewake_domains(void *arg) check_for_unclaimed_mmio(uncore); - (void)I915_READ(reg); + intel_uncore_posting_read_fw(uncore, reg); if (check_for_unclaimed_mmio(uncore)) { pr_err("Unclaimed mmio read to register 0x%04x\n", offset); @@ -312,21 +312,23 @@ static int live_forcewake_domains(void *arg) return err; } +static int live_fw_table(void *arg) +{ + struct intel_gt *gt = arg; + + /* Confirm the table we load is still valid */ + return intel_fw_table_check(gt->uncore->fw_domains_table, + gt->uncore->fw_domains_table_entries, + INTEL_GEN(gt->i915) >= 9); +} + int intel_uncore_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { + SUBTEST(live_fw_table), SUBTEST(live_forcewake_ops), SUBTEST(live_forcewake_domains), }; - int err; - - /* Confirm the table we load is still valid */ - err = intel_fw_table_check(i915->uncore.fw_domains_table, - i915->uncore.fw_domains_table_entries, - INTEL_GEN(i915) >= 9); - if (err) - return err; - - return i915_subtests(tests, i915); + return intel_gt_live_subtests(tests, &i915->gt); } From f79520bb333792fb23a32352f83d8d59a525cec9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 22 Oct 2019 12:21:11 +0100 Subject: [PATCH 026/154] drm/i915/selftests: Synchronize checking active status with retirement If retirement is running on another thread, we may inspect the status of the i915_active before its retirement callback is complete. As we expect it to be running synchronously, we can wait for any callback to complete by acquiring the i915_active.mutex. Signed-off-by: Chris Wilson Acked-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20191022112111.9317-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 1f5ab59ad6e7..5af27c37b65b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -49,12 +49,20 @@ static struct pulse *pulse_create(void) return p; } +static void pulse_unlock_wait(struct pulse *p) +{ + mutex_lock(&p->active.mutex); + mutex_unlock(&p->active.mutex); +} + static int __live_idle_pulse(struct intel_engine_cs *engine, int (*fn)(struct intel_engine_cs *cs)) { struct pulse *p; int err; + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + p = pulse_create(); if (!p) return -ENOMEM; @@ -73,16 +81,21 @@ static int __live_idle_pulse(struct intel_engine_cs *engine, i915_active_release(&p->active); GEM_BUG_ON(i915_active_is_idle(&p->active)); + GEM_BUG_ON(llist_empty(&engine->barrier_tasks)); err = fn(engine); if (err) goto out; + GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); + if (intel_gt_retire_requests_timeout(engine->gt, HZ / 5)) { err = -ETIME; goto out; } + pulse_unlock_wait(p); /* synchronize with the retirement callback */ + if (!i915_active_is_idle(&p->active)) { pr_err("%s: heartbeat pulse did not flush idle tasks\n", engine->name); From e16302cb673c13193c582ebcd17965e189bbe72b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 22 Oct 2019 23:33:16 +0100 Subject: [PATCH 027/154] drm/i915/selftests: Release ctx->engine_mutex after iteration A lock once taken must be released again. Fixes: c31c9e82ee8a ("drm/i915/selftests: Teach switch_to_context() to use the context") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Cc: Matthew Auld Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191022223316.12662-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 84e7ca778b7b..6d22567ad620 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -19,18 +19,22 @@ static int switch_to_context(struct i915_gem_context *ctx) { struct i915_gem_engines_iter it; struct intel_context *ce; + int err = 0; for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { struct i915_request *rq; rq = intel_context_create_request(ce); - if (IS_ERR(rq)) - return PTR_ERR(rq); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } i915_request_add(rq); } + i915_gem_context_unlock_engines(ctx); - return 0; + return err; } static void trash_stolen(struct drm_i915_private *i915) From 010663a61c40377adebb716d3fed341042b5651f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 22 Oct 2019 16:34:13 +0300 Subject: [PATCH 028/154] drm/i915/dsc: rename crtc state dsc_params member to dsc Reduce verbosity in code by renaming dsc_params member of crtc state to simply dsc. There is enough context for this to be clear. No functional changes. Cc: Manasi Navare Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191022133414.8293-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 2 +- .../drm/i915/display/intel_display_types.h | 2 +- drivers/gpu/drm/i915/display/intel_dp.c | 32 ++++----- drivers/gpu/drm/i915/display/intel_psr.c | 4 +- drivers/gpu/drm/i915/display/intel_vdsc.c | 68 +++++++++---------- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- 6 files changed, 55 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 9ba794cb9b4f..1a49266f4f57 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2234,7 +2234,7 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder, /* * VDSC power is needed when DSC is enabled */ - if (crtc_state->dsc_params.compression_enable) + if (crtc_state->dsc.compression_enable) intel_display_power_get(dev_priv, intel_dsc_power_domain(crtc_state)); } diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 8358152e403e..db66f9d623f8 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -986,7 +986,7 @@ struct intel_crtc_state { bool dsc_split; u16 compressed_bpp; u8 slice_count; - } dsc_params; + } dsc; struct drm_dsc_config dp_dsc_cfg; /* Forward Error correction State */ diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 5eeafa45831a..521ce23f38ac 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2080,10 +2080,10 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, pipe_config->lane_count = limits->max_lane_count; if (intel_dp_is_edp(intel_dp)) { - pipe_config->dsc_params.compressed_bpp = + pipe_config->dsc.compressed_bpp = min_t(u16, drm_edp_dsc_sink_output_bpp(intel_dp->dsc_dpcd) >> 4, pipe_config->pipe_bpp); - pipe_config->dsc_params.slice_count = + pipe_config->dsc.slice_count = drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd, true); } else { @@ -2104,10 +2104,10 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, DRM_DEBUG_KMS("Compressed BPP/Slice Count not supported\n"); return -EINVAL; } - pipe_config->dsc_params.compressed_bpp = min_t(u16, + pipe_config->dsc.compressed_bpp = min_t(u16, dsc_max_output_bpp >> 4, pipe_config->pipe_bpp); - pipe_config->dsc_params.slice_count = dsc_dp_slice_count; + pipe_config->dsc.slice_count = dsc_dp_slice_count; } /* * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate @@ -2115,8 +2115,8 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, * then we need to use 2 VDSC instances. */ if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq) { - if (pipe_config->dsc_params.slice_count > 1) { - pipe_config->dsc_params.dsc_split = true; + if (pipe_config->dsc.slice_count > 1) { + pipe_config->dsc.dsc_split = true; } else { DRM_DEBUG_KMS("Cannot split stream to use 2 VDSC instances\n"); return -EINVAL; @@ -2128,16 +2128,16 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, DRM_DEBUG_KMS("Cannot compute valid DSC parameters for Input Bpp = %d " "Compressed BPP = %d\n", pipe_config->pipe_bpp, - pipe_config->dsc_params.compressed_bpp); + pipe_config->dsc.compressed_bpp); return ret; } - pipe_config->dsc_params.compression_enable = true; + pipe_config->dsc.compression_enable = true; DRM_DEBUG_KMS("DP DSC computed with Input Bpp = %d " "Compressed Bpp = %d Slice Count = %d\n", pipe_config->pipe_bpp, - pipe_config->dsc_params.compressed_bpp, - pipe_config->dsc_params.slice_count); + pipe_config->dsc.compressed_bpp, + pipe_config->dsc.slice_count); return 0; } @@ -2211,15 +2211,15 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, return ret; } - if (pipe_config->dsc_params.compression_enable) { + if (pipe_config->dsc.compression_enable) { DRM_DEBUG_KMS("DP lane count %d clock %d Input bpp %d Compressed bpp %d\n", pipe_config->lane_count, pipe_config->port_clock, pipe_config->pipe_bpp, - pipe_config->dsc_params.compressed_bpp); + pipe_config->dsc.compressed_bpp); DRM_DEBUG_KMS("DP link rate required %i available %i\n", intel_dp_link_required(adjusted_mode->crtc_clock, - pipe_config->dsc_params.compressed_bpp), + pipe_config->dsc.compressed_bpp), intel_dp_max_data_rate(pipe_config->port_clock, pipe_config->lane_count)); } else { @@ -2377,8 +2377,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->limited_color_range = intel_dp_limited_color_range(pipe_config, conn_state); - if (pipe_config->dsc_params.compression_enable) - output_bpp = pipe_config->dsc_params.compressed_bpp; + if (pipe_config->dsc.compression_enable) + output_bpp = pipe_config->dsc.compressed_bpp; else output_bpp = intel_dp_output_bpp(pipe_config, pipe_config->pipe_bpp); @@ -3102,7 +3102,7 @@ void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp, { int ret; - if (!crtc_state->dsc_params.compression_enable) + if (!crtc_state->dsc.compression_enable) return; ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_DSC_ENABLE, diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 50f22abcd30e..dfbedff98ea8 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -76,7 +76,7 @@ static bool intel_psr2_enabled(struct drm_i915_private *dev_priv, const struct intel_crtc_state *crtc_state) { /* Cannot enable DSC and PSR2 simultaneously */ - WARN_ON(crtc_state->dsc_params.compression_enable && + WARN_ON(crtc_state->dsc.compression_enable && crtc_state->has_psr2); switch (dev_priv->psr.debug & I915_PSR_DEBUG_MODE_MASK) { @@ -623,7 +623,7 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, * resolution requires DSC to be enabled, priority is given to DSC * over PSR2. */ - if (crtc_state->dsc_params.compression_enable) { + if (crtc_state->dsc.compression_enable) { DRM_DEBUG_KMS("PSR2 cannot be enabled since DSC is enabled\n"); return false; } diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index d4fb7f16f9f6..f41a9336476b 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -323,7 +323,7 @@ int intel_dp_compute_dsc_params(struct intel_dp *intel_dp, struct intel_crtc_state *pipe_config) { struct drm_dsc_config *vdsc_cfg = &pipe_config->dp_dsc_cfg; - u16 compressed_bpp = pipe_config->dsc_params.compressed_bpp; + u16 compressed_bpp = pipe_config->dsc.compressed_bpp; u8 i = 0; int row_index = 0; int column_index = 0; @@ -332,7 +332,7 @@ int intel_dp_compute_dsc_params(struct intel_dp *intel_dp, vdsc_cfg->pic_width = pipe_config->base.adjusted_mode.crtc_hdisplay; vdsc_cfg->pic_height = pipe_config->base.adjusted_mode.crtc_vdisplay; vdsc_cfg->slice_width = DIV_ROUND_UP(vdsc_cfg->pic_width, - pipe_config->dsc_params.slice_count); + pipe_config->dsc.slice_count); /* * Slice Height of 8 works for all currently available panels. So start * with that if pic_height is an integral multiple of 8. @@ -491,7 +491,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, u32 pps_val = 0; u32 rc_buf_thresh_dword[4]; u32 rc_range_params_dword[8]; - u8 num_vdsc_instances = (crtc_state->dsc_params.dsc_split) ? 2 : 1; + u8 num_vdsc_instances = (crtc_state->dsc.dsc_split) ? 2 : 1; int i = 0; /* Populate PICTURE_PARAMETER_SET_0 registers */ @@ -514,11 +514,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_0, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_0(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_0(pipe), pps_val); } @@ -533,11 +533,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_1, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_1(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_1(pipe), pps_val); } @@ -553,11 +553,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_2, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_2(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_2(pipe), pps_val); } @@ -573,11 +573,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_3, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_3(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_3(pipe), pps_val); } @@ -593,11 +593,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_4, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_4(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_4(pipe), pps_val); } @@ -613,11 +613,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_5, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_5(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_5(pipe), pps_val); } @@ -635,11 +635,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_6, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_6(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_6(pipe), pps_val); } @@ -655,11 +655,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_7, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_7(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_7(pipe), pps_val); } @@ -675,11 +675,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_8, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_8(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_8(pipe), pps_val); } @@ -695,11 +695,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_9, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_9(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_9(pipe), pps_val); } @@ -717,11 +717,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_10, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_10(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_10(pipe), pps_val); } @@ -740,11 +740,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_16, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_16(pipe), pps_val); - if (crtc_state->dsc_params.dsc_split) + if (crtc_state->dsc.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_16(pipe), pps_val); } @@ -763,7 +763,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, I915_WRITE(DSCA_RC_BUF_THRESH_0_UDW, rc_buf_thresh_dword[1]); I915_WRITE(DSCA_RC_BUF_THRESH_1, rc_buf_thresh_dword[2]); I915_WRITE(DSCA_RC_BUF_THRESH_1_UDW, rc_buf_thresh_dword[3]); - if (crtc_state->dsc_params.dsc_split) { + if (crtc_state->dsc.dsc_split) { I915_WRITE(DSCC_RC_BUF_THRESH_0, rc_buf_thresh_dword[0]); I915_WRITE(DSCC_RC_BUF_THRESH_0_UDW, @@ -782,7 +782,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, rc_buf_thresh_dword[2]); I915_WRITE(ICL_DSC0_RC_BUF_THRESH_1_UDW(pipe), rc_buf_thresh_dword[3]); - if (crtc_state->dsc_params.dsc_split) { + if (crtc_state->dsc.dsc_split) { I915_WRITE(ICL_DSC1_RC_BUF_THRESH_0(pipe), rc_buf_thresh_dword[0]); I915_WRITE(ICL_DSC1_RC_BUF_THRESH_0_UDW(pipe), @@ -824,7 +824,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, rc_range_params_dword[6]); I915_WRITE(DSCA_RC_RANGE_PARAMETERS_3_UDW, rc_range_params_dword[7]); - if (crtc_state->dsc_params.dsc_split) { + if (crtc_state->dsc.dsc_split) { I915_WRITE(DSCC_RC_RANGE_PARAMETERS_0, rc_range_params_dword[0]); I915_WRITE(DSCC_RC_RANGE_PARAMETERS_0_UDW, @@ -859,7 +859,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, rc_range_params_dword[6]); I915_WRITE(ICL_DSC0_RC_RANGE_PARAMETERS_3_UDW(pipe), rc_range_params_dword[7]); - if (crtc_state->dsc_params.dsc_split) { + if (crtc_state->dsc.dsc_split) { I915_WRITE(ICL_DSC1_RC_RANGE_PARAMETERS_0(pipe), rc_range_params_dword[0]); I915_WRITE(ICL_DSC1_RC_RANGE_PARAMETERS_0_UDW(pipe), @@ -909,7 +909,7 @@ void intel_dsc_enable(struct intel_encoder *encoder, u32 dss_ctl1_val = 0; u32 dss_ctl2_val = 0; - if (!crtc_state->dsc_params.compression_enable) + if (!crtc_state->dsc.compression_enable) return; /* Enable Power wells for VDSC/joining */ @@ -928,7 +928,7 @@ void intel_dsc_enable(struct intel_encoder *encoder, dss_ctl2_reg = ICL_PIPE_DSS_CTL2(pipe); } dss_ctl2_val |= LEFT_BRANCH_VDSC_ENABLE; - if (crtc_state->dsc_params.dsc_split) { + if (crtc_state->dsc.dsc_split) { dss_ctl2_val |= RIGHT_BRANCH_VDSC_ENABLE; dss_ctl1_val |= JOINER_ENABLE; } @@ -944,7 +944,7 @@ void intel_dsc_disable(const struct intel_crtc_state *old_crtc_state) i915_reg_t dss_ctl1_reg, dss_ctl2_reg; u32 dss_ctl1_val = 0, dss_ctl2_val = 0; - if (!old_crtc_state->dsc_params.compression_enable) + if (!old_crtc_state->dsc.compression_enable) return; if (old_crtc_state->cpu_transcoder == TRANSCODER_EDP) { diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 16211430eb78..bc0bdf0419e0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4576,7 +4576,7 @@ static int i915_dsc_fec_support_show(struct seq_file *m, void *data) intel_dp = enc_to_intel_dp(&intel_attached_encoder(connector)->base); crtc_state = to_intel_crtc_state(crtc->state); seq_printf(m, "DSC_Enabled: %s\n", - yesno(crtc_state->dsc_params.compression_enable)); + yesno(crtc_state->dsc.compression_enable)); seq_printf(m, "DSC_Sink_Support: %s\n", yesno(drm_dp_sink_supports_dsc(intel_dp->dsc_dpcd))); seq_printf(m, "Force_DSC_Enable: %s\n", From aaed4dd6968668cc7ce9efafd865122bc08e0bc9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 22 Oct 2019 16:34:14 +0300 Subject: [PATCH 029/154] drm/i915/dsc: move crtc state dp_dsc_cfg member under dsc as config DSC isn't DP specific, so remove the dp_ prefix from the crtc state member name. Also moving the member under the dsc sub-struct gives us enough context to allow shortening the name to just config. No functional changes. Cc: Manasi Navare Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191022133414.8293-2-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display_types.h | 2 +- drivers/gpu/drm/i915/display/intel_vdsc.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index db66f9d623f8..bac40482a2aa 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -986,8 +986,8 @@ struct intel_crtc_state { bool dsc_split; u16 compressed_bpp; u8 slice_count; + struct drm_dsc_config config; } dsc; - struct drm_dsc_config dp_dsc_cfg; /* Forward Error correction State */ bool fec_enable; diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index f41a9336476b..896b0c334f5e 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -322,7 +322,7 @@ static int get_column_index_for_rc_params(u8 bits_per_component) int intel_dp_compute_dsc_params(struct intel_dp *intel_dp, struct intel_crtc_state *pipe_config) { - struct drm_dsc_config *vdsc_cfg = &pipe_config->dp_dsc_cfg; + struct drm_dsc_config *vdsc_cfg = &pipe_config->dsc.config; u16 compressed_bpp = pipe_config->dsc.compressed_bpp; u8 i = 0; int row_index = 0; @@ -485,7 +485,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - const struct drm_dsc_config *vdsc_cfg = &crtc_state->dp_dsc_cfg; + const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; enum pipe pipe = crtc->pipe; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; u32 pps_val = 0; @@ -885,7 +885,7 @@ static void intel_dp_write_dsc_pps_sdp(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - const struct drm_dsc_config *vdsc_cfg = &crtc_state->dp_dsc_cfg; + const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; struct drm_dsc_pps_infoframe dp_dsc_pps_sdp; /* Prepare DP SDP PPS header as per DP 1.4 spec, Table 2-123 */ From 37c92dc303dd0977134d1c8501f057de407473ec Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 22 Oct 2019 12:51:55 +0300 Subject: [PATCH 030/154] drm/i915: Add new CNL PCH ID seen on a CML platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atm we don't detect a PCH with PCI ID 0xA3C1 which showed up now on a CML platform. We don't have the official assignment of the PCH PCI IDs, but this looks like a CNP which was already used on CML platforms. Let's add the new ID->PCH type mapping accordingly. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112051 Reported-and-tested-by: Cyrus Cc: Cyrus Signed-off-by: Imre Deak Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20191022095155.30991-1-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_pch.c | 1 + drivers/gpu/drm/i915/intel_pch.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index bb1cb6f12a50..000ba43e2c02 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -62,6 +62,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) /* KBP is SPT compatible */ return PCH_SPT; case INTEL_PCH_CNP_DEVICE_ID_TYPE: + case INTEL_PCH_CNP2_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n"); WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); return PCH_CNP; diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h index f4dc18c34291..1115c6a0522c 100644 --- a/drivers/gpu/drm/i915/intel_pch.h +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -40,6 +40,7 @@ enum intel_pch { #define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE 0x9D00 #define INTEL_PCH_KBP_DEVICE_ID_TYPE 0xA280 #define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300 +#define INTEL_PCH_CNP2_DEVICE_ID_TYPE 0xA380 #define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80 #define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280 #define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680 From 853ddb699396c9831d793b28e49d40248da472d5 Mon Sep 17 00:00:00 2001 From: "Robert M. Fosha" Date: Tue, 22 Oct 2019 09:37:52 -0700 Subject: [PATCH 031/154] drm/i915/guc: Enable guc logging on guc log relay write Creating and opening the GuC log relay file enables and starts the relay potentially before the caller is ready to consume logs. Change the behavior so that relay starts only on an explicit call to the write function (with a value of '1'). Other values flush the log relay as before. v2: Style changes and fix typos. Add guc_log_relay_stop() function. (Daniele) Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Cc: Michal Wajdeczko Signed-off-by: Robert M. Fosha Reviewed-by: Matthew Brost Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20191022163754.23870-1-robert.m.fosha@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 57 ++++++++++++++++------ drivers/gpu/drm/i915/gt/uc/intel_guc_log.h | 4 +- drivers/gpu/drm/i915/i915_debugfs.c | 22 +++++++-- 3 files changed, 64 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 2cf2d3314f62..caed0d57e704 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -226,7 +226,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) mutex_lock(&log->relay.lock); - if (WARN_ON(!intel_guc_log_relay_enabled(log))) + if (WARN_ON(!intel_guc_log_relay_created(log))) goto out_unlock; /* Get the pointer to shared GuC log buffer */ @@ -361,6 +361,7 @@ void intel_guc_log_init_early(struct intel_guc_log *log) { mutex_init(&log->relay.lock); INIT_WORK(&log->relay.flush_work, capture_logs_work); + log->relay.started = false; } static int guc_log_relay_create(struct intel_guc_log *log) @@ -546,7 +547,7 @@ out_unlock: return ret; } -bool intel_guc_log_relay_enabled(const struct intel_guc_log *log) +bool intel_guc_log_relay_created(const struct intel_guc_log *log) { return log->relay.buf_addr; } @@ -560,7 +561,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) mutex_lock(&log->relay.lock); - if (intel_guc_log_relay_enabled(log)) { + if (intel_guc_log_relay_created(log)) { ret = -EEXIST; goto out_unlock; } @@ -585,15 +586,6 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) mutex_unlock(&log->relay.lock); - guc_log_enable_flush_events(log); - - /* - * When GuC is logging without us relaying to userspace, we're ignoring - * the flush notification. This means that we need to unconditionally - * flush on relay enabling, since GuC only notifies us once. - */ - queue_work(system_highpri_wq, &log->relay.flush_work); - return 0; out_relay: @@ -604,11 +596,33 @@ out_unlock: return ret; } +int intel_guc_log_relay_start(struct intel_guc_log *log) +{ + if (log->relay.started) + return -EEXIST; + + guc_log_enable_flush_events(log); + + /* + * When GuC is logging without us relaying to userspace, we're ignoring + * the flush notification. This means that we need to unconditionally + * flush on relay enabling, since GuC only notifies us once. + */ + queue_work(system_highpri_wq, &log->relay.flush_work); + + log->relay.started = true; + + return 0; +} + void intel_guc_log_relay_flush(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); intel_wakeref_t wakeref; + if (!log->relay.started) + return; + /* * Before initiating the forceful flush, wait for any pending/ongoing * flush to complete otherwise forceful flush may not actually happen. @@ -622,18 +636,33 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) guc_log_capture_logs(log); } -void intel_guc_log_relay_close(struct intel_guc_log *log) +/* + * Stops the relay log. Called from intel_guc_log_relay_close(), so no + * possibility of race with start/flush since relay_write cannot race + * relay_close. + */ +static void guc_log_relay_stop(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + if (!log->relay.started) + return; + guc_log_disable_flush_events(log); intel_synchronize_irq(i915); flush_work(&log->relay.flush_work); + log->relay.started = false; +} + +void intel_guc_log_relay_close(struct intel_guc_log *log) +{ + guc_log_relay_stop(log); + mutex_lock(&log->relay.lock); - GEM_BUG_ON(!intel_guc_log_relay_enabled(log)); + GEM_BUG_ON(!intel_guc_log_relay_created(log)); guc_log_unmap(log); guc_log_relay_destroy(log); mutex_unlock(&log->relay.lock); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 6f764879acb1..c252c022c5fc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -47,6 +47,7 @@ struct intel_guc_log { struct i915_vma *vma; struct { void *buf_addr; + bool started; struct work_struct flush_work; struct rchan *channel; struct mutex lock; @@ -65,8 +66,9 @@ int intel_guc_log_create(struct intel_guc_log *log); void intel_guc_log_destroy(struct intel_guc_log *log); int intel_guc_log_set_level(struct intel_guc_log *log, u32 level); -bool intel_guc_log_relay_enabled(const struct intel_guc_log *log); +bool intel_guc_log_relay_created(const struct intel_guc_log *log); int intel_guc_log_relay_open(struct intel_guc_log *log); +int intel_guc_log_relay_start(struct intel_guc_log *log); void intel_guc_log_relay_flush(struct intel_guc_log *log); void intel_guc_log_relay_close(struct intel_guc_log *log); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index bc0bdf0419e0..ede85b2f8a4b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1866,8 +1866,8 @@ static void i915_guc_log_info(struct seq_file *m, struct intel_guc_log *log = &dev_priv->gt.uc.guc.log; enum guc_log_buffer_type type; - if (!intel_guc_log_relay_enabled(log)) { - seq_puts(m, "GuC log relay disabled\n"); + if (!intel_guc_log_relay_created(log)) { + seq_puts(m, "GuC log relay not created\n"); return; } @@ -2054,9 +2054,23 @@ i915_guc_log_relay_write(struct file *filp, loff_t *ppos) { struct intel_guc_log *log = filp->private_data; + int val; + int ret; - intel_guc_log_relay_flush(log); - return cnt; + ret = kstrtoint_from_user(ubuf, cnt, 0, &val); + if (ret < 0) + return ret; + + /* + * Enable and start the guc log relay on value of 1. + * Flush log relay for any other value. + */ + if (val == 1) + ret = intel_guc_log_relay_start(log); + else + intel_guc_log_relay_flush(log); + + return ret ?: cnt; } static int i915_guc_log_relay_release(struct inode *inode, struct file *file) From a1ceb93a824aaaa8323c90ada987099ce44768c0 Mon Sep 17 00:00:00 2001 From: "Robert M. Fosha" Date: Tue, 22 Oct 2019 09:37:53 -0700 Subject: [PATCH 032/154] drm/i915/guc: Update H2G enable logging action definition GuC enable logging H2G action definition changed some time ago from 0xE000 to 0x40. All current GuC FW blobs use this definition, so fix the action definition in driver to match. Cc: Daniele Ceraolo Spurio Cc: Michal Wajdeczko Signed-off-by: Robert M. Fosha Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20191022163754.23870-2-robert.m.fosha@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 1d3cdd67ca2f..a26a85d50209 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -548,6 +548,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30, + INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40, INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, INTEL_GUC_ACTION_ENTER_S_STATE = 0x501, INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, @@ -556,7 +557,6 @@ enum intel_guc_action { INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, - INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000, INTEL_GUC_ACTION_LIMIT }; From a8c51ed22b0ef20205180baa8ed30e05ea6f8505 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 23 Oct 2019 14:31:04 +0100 Subject: [PATCH 033/154] drm/i915/gt: Try to more gracefully quiesce the system before resets If we are doing a normal GPU reset triggered after detecting a long period of stalled work, we can take our time and allow the engines to quiesce. Since we've stopped submission to the engine, and if we wait long enough an innocent context should complete, leaving the engine idle. So by waiting a short amount of time, we should prevent clobbering other users when resetting a stuck context. Suggested-by: Joonas Lahtinen Suggested-by: Jon Bloomfield Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191023133108.21401-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.profile | 11 +++++++++++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 20 +++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_engine_types.h | 4 ++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 48df8889a88a..3a3881d5e44b 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -25,3 +25,14 @@ config DRM_I915_SPIN_REQUEST May be 0 to disable the initial spin. In practice, we estimate the cost of enabling the interrupt (if currently disabled) to be a few microseconds. + +config DRM_I915_STOP_TIMEOUT + int "How long to wait for an engine to quiesce gracefully before reset (ms)" + default 100 # milliseconds + help + By stopping submission and sleeping for a short time before resetting + the GPU, we allow the innocent contexts also on the system to quiesce. + It is then less likely for a hanging context to cause collateral + damage as the system is reset in order to recover. The corollary is + that the reset itself may take longer and so be more disruptive to + interactive or low latency workloads. diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 0e20713603ec..e4203eb44139 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -308,6 +308,9 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->instance = info->instance; __sprint_engine_name(engine); + engine->props.stop_timeout_ms = + CONFIG_DRM_I915_STOP_TIMEOUT; + /* * To be overridden by the backend on setup. However to facilitate * cleanup on error during setup, we always provide the destroy vfunc. @@ -875,6 +878,21 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) return bbaddr; } +static unsigned long stop_timeout(const struct intel_engine_cs *engine) +{ + if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */ + return 0; + + /* + * If we are doing a normal GPU reset, we can take our time and allow + * the engine to quiesce. We've stopped submission to the engine, and + * if we wait long enough an innocent context should complete and + * leave the engine idle. So they should not be caught unaware by + * the forthcoming GPU reset (which usually follows the stop_cs)! + */ + return READ_ONCE(engine->props.stop_timeout_ms); +} + int intel_engine_stop_cs(struct intel_engine_cs *engine) { struct intel_uncore *uncore = engine->uncore; @@ -892,7 +910,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) err = 0; if (__intel_wait_for_register_fw(uncore, mode, MODE_IDLE, MODE_IDLE, - 1000, 0, + 1000, stop_timeout(engine), NULL)) { GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); err = -ETIMEDOUT; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 3451be034caf..87d5c4ef3ae7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -542,6 +542,10 @@ struct intel_engine_cs { */ ktime_t total; } stats; + + struct { + unsigned long stop_timeout_ms; + } props; }; static inline bool From 3a7a92aba8fb77162e1e9963360fd81fc15c39a5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 23 Oct 2019 14:31:05 +0100 Subject: [PATCH 034/154] drm/i915/execlists: Force preemption If the preempted context takes too long to relinquish control, e.g. it is stuck inside a shader with arbitration disabled, evict that context with an engine reset. This ensures that preemptions are reasonably responsive, providing a tighter QoS for the more important context at the cost of flagging unresponsive contexts more frequently (i.e. instead of using an ~10s hangcheck, we now evict at ~100ms). The challenge of lies in picking a timeout that can be reasonably serviced by HW for typical workloads, balancing the existing clients against the needs for responsiveness. Note that coupled with timeslicing, this will lead to rapid GPU "hang" detection with multiple active contexts vying for GPU time. The forced preemption mechanism can be compiled out with ./scripts/config --set-val DRM_I915_PREEMPT_TIMEOUT 0 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191023133108.21401-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.profile | 12 +++ drivers/gpu/drm/i915/gt/intel_engine.h | 9 ++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 5 +- drivers/gpu/drm/i915/gt/intel_engine_types.h | 6 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 93 +++++++++++++++-- drivers/gpu/drm/i915/gt/selftest_lrc.c | 100 +++++++++++++++++++ drivers/gpu/drm/i915/i915_gem.h | 14 --- drivers/gpu/drm/i915/i915_params.h | 2 +- drivers/gpu/drm/i915/i915_utils.c | 29 ++++++ drivers/gpu/drm/i915/i915_utils.h | 9 ++ 10 files changed, 255 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 3a3881d5e44b..b071b6024152 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -12,6 +12,18 @@ config DRM_I915_USERFAULT_AUTOSUSPEND May be 0 to disable the extra delay and solely use the device level runtime pm autosuspend delay tunable. +config DRM_I915_PREEMPT_TIMEOUT + int "Preempt timeout (ms, jiffy granularity)" + default 100 # milliseconds + help + How long to wait (in milliseconds) for a preemption event to occur + when submitting a new context via execlists. If the current context + does not hit an arbitration point and yield to HW before the timer + expires, the HW will be reset to allow the more important context + to execute. + + May be 0 to disable the timeout. + config DRM_I915_SPIN_REQUEST int "Busywait for request completion (us)" default 5 # microseconds diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index c2d9d67c63d9..9409b7856299 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -527,4 +527,13 @@ void intel_engine_init_active(struct intel_engine_cs *engine, #define ENGINE_MOCK 1 #define ENGINE_VIRTUAL 2 +static inline bool +intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) +{ + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) + return 0; + + return intel_engine_has_preemption(engine); +} + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index e4203eb44139..b91ea07f4819 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -308,6 +308,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->instance = info->instance; __sprint_engine_name(engine); + engine->props.preempt_timeout_ms = + CONFIG_DRM_I915_PREEMPT_TIMEOUT; engine->props.stop_timeout_ms = CONFIG_DRM_I915_STOP_TIMEOUT; @@ -1338,10 +1340,11 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, unsigned int idx; u8 read, write; - drm_printf(m, "\tExeclist tasklet queued? %s (%s), timeslice? %s\n", + drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n", yesno(test_bit(TASKLET_STATE_SCHED, &engine->execlists.tasklet.state)), enableddisabled(!atomic_read(&engine->execlists.tasklet.count)), + repr_timer(&engine->execlists.preempt), repr_timer(&engine->execlists.timer)); read = execlists->csb_head; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 87d5c4ef3ae7..1251dac91f31 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -174,6 +174,11 @@ struct intel_engine_execlists { */ struct timer_list timer; + /** + * @preempt: reset the current context if it fails to give way + */ + struct timer_list preempt; + /** * @default_priolist: priority list for I915_PRIORITY_NORMAL */ @@ -544,6 +549,7 @@ struct intel_engine_cs { } stats; struct { + unsigned long preempt_timeout_ms; unsigned long stop_timeout_ms; } props; }; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index f9f3e985bb79..ff0dd297e782 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1372,6 +1372,26 @@ static void record_preemption(struct intel_engine_execlists *execlists) (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); } +static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = last_active(&engine->execlists); + if (!rq) + return 0; + + return READ_ONCE(engine->props.preempt_timeout_ms); +} + +static void set_preempt_timeout(struct intel_engine_cs *engine) +{ + if (!intel_engine_has_preempt_reset(engine)) + return; + + set_timer_ms(&engine->execlists.preempt, + active_preempt_timeout(engine)); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1747,6 +1767,8 @@ done: memset(port + 1, 0, (last_port - port) * sizeof(*port)); execlists_submit_ports(engine); + + set_preempt_timeout(engine); } else { skip_submit: ring_set_paused(engine, 0); @@ -1987,6 +2009,43 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) } } +static noinline void preempt_reset(struct intel_engine_cs *engine) +{ + const unsigned int bit = I915_RESET_ENGINE + engine->id; + unsigned long *lock = &engine->gt->reset.flags; + + if (i915_modparams.reset < 3) + return; + + if (test_and_set_bit(bit, lock)) + return; + + /* Mark this tasklet as disabled to avoid waiting for it to complete */ + tasklet_disable_nosync(&engine->execlists.tasklet); + + GEM_TRACE("%s: preempt timeout %lu+%ums\n", + engine->name, + READ_ONCE(engine->props.preempt_timeout_ms), + jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); + intel_engine_reset(engine, "preemption time out"); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(bit, lock); +} + +static bool preempt_timeout(const struct intel_engine_cs *const engine) +{ + const struct timer_list *t = &engine->execlists.preempt; + + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) + return false; + + if (!timer_expired(t)) + return false; + + return READ_ONCE(engine->execlists.pending[0]); +} + /* * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. @@ -1994,23 +2053,39 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - unsigned long flags; + bool timeout = preempt_timeout(engine); process_csb(engine); - if (!READ_ONCE(engine->execlists.pending[0])) { + if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { + unsigned long flags; + spin_lock_irqsave(&engine->active.lock, flags); __execlists_submission_tasklet(engine); spin_unlock_irqrestore(&engine->active.lock, flags); + + /* Recheck after serialising with direct-submission */ + if (timeout && preempt_timeout(engine)) + preempt_reset(engine); } } -static void execlists_submission_timer(struct timer_list *timer) +static void __execlists_kick(struct intel_engine_execlists *execlists) { - struct intel_engine_cs *engine = - from_timer(engine, timer, execlists.timer); - /* Kick the tasklet for some interrupt coalescing and reset handling */ - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&execlists->tasklet); +} + +#define execlists_kick(t, member) \ + __execlists_kick(container_of(t, struct intel_engine_execlists, member)) + +static void execlists_timeslice(struct timer_list *timer) +{ + execlists_kick(timer, timer); +} + +static void execlists_preempt(struct timer_list *timer) +{ + execlists_kick(timer, preempt); } static void queue_request(struct intel_engine_cs *engine, @@ -3455,6 +3530,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) static void execlists_park(struct intel_engine_cs *engine) { cancel_timer(&engine->execlists.timer); + cancel_timer(&engine->execlists.preempt); } void intel_execlists_set_default_submission(struct intel_engine_cs *engine) @@ -3572,7 +3648,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) { tasklet_init(&engine->execlists.tasklet, execlists_submission_tasklet, (unsigned long)engine); - timer_setup(&engine->execlists.timer, execlists_submission_timer, 0); + timer_setup(&engine->execlists.timer, execlists_timeslice, 0); + timer_setup(&engine->execlists.preempt, execlists_preempt, 0); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 7516d1c90925..b6352671c5a0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1697,6 +1697,105 @@ err_spin_hi: return err; } +static int live_preempt_timeout(void *arg) +{ + struct intel_gt *gt = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct igt_spinner spin_lo; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * Check that we force preemption to occur by cancelling the previous + * context if it refuses to yield the GPU. + */ + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) + return 0; + + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) + return 0; + + if (!intel_has_reset_engine(gt)) + return 0; + + if (igt_spinner_init(&spin_lo, gt)) + return -ENOMEM; + + ctx_hi = kernel_context(gt->i915); + if (!ctx_hi) + goto err_spin_lo; + ctx_hi->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); + + ctx_lo = kernel_context(gt->i915); + if (!ctx_lo) + goto err_ctx_hi; + ctx_lo->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); + + for_each_engine(engine, gt, id) { + unsigned long saved_timeout; + struct i915_request *rq; + + if (!intel_engine_has_preemption(engine)) + continue; + + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_NOOP); /* preemption disabled */ + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin_lo, rq)) { + intel_gt_set_wedged(gt); + err = -EIO; + goto err_ctx_lo; + } + + rq = igt_request_alloc(ctx_hi, engine); + if (IS_ERR(rq)) { + igt_spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + /* Flush the previous CS ack before changing timeouts */ + while (READ_ONCE(engine->execlists.pending[0])) + cpu_relax(); + + saved_timeout = engine->props.preempt_timeout_ms; + engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ + + i915_request_get(rq); + i915_request_add(rq); + + intel_engine_flush_submission(engine); + engine->props.preempt_timeout_ms = saved_timeout; + + if (i915_request_wait(rq, 0, HZ / 10) < 0) { + intel_gt_set_wedged(gt); + i915_request_put(rq); + err = -ETIME; + goto err_ctx_lo; + } + + igt_spinner_end(&spin_lo); + i915_request_put(rq); + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + igt_spinner_fini(&spin_lo); + return err; +} + static int random_range(struct rnd_state *rnd, int min, int max) { return i915_prandom_u32_max_state(max - min, rnd) + min; @@ -2598,6 +2697,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_suppress_wait_preempt), SUBTEST(live_chain_preempt), SUBTEST(live_preempt_hang), + SUBTEST(live_preempt_timeout), SUBTEST(live_preempt_smoke), SUBTEST(live_virtual_engine), SUBTEST(live_virtual_mask), diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 2011f8e9a9f1..f6f9675848b8 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -112,18 +112,4 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t) return test_bit(TASKLET_STATE_SCHED, &t->state); } -static inline void cancel_timer(struct timer_list *t) -{ - if (!READ_ONCE(t->expires)) - return; - - del_timer(t); - WRITE_ONCE(t->expires, 0); -} - -static inline bool timer_expired(const struct timer_list *t) -{ - return READ_ONCE(t->expires) && !timer_pending(t); -} - #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index d29ade3b7de6..56058978bb27 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -61,7 +61,7 @@ struct drm_printer; param(char *, dmc_firmware_path, NULL) \ param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO)) \ param(int, edp_vswing, 0) \ - param(int, reset, 2) \ + param(int, reset, 3) \ param(unsigned int, inject_load_failure, 0) \ param(int, fastboot, -1) \ param(int, enable_dpcd_backlight, 0) \ diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index 16acdf7bdbe6..02e969b64505 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -76,3 +76,32 @@ bool i915_error_injected(void) } #endif + +void cancel_timer(struct timer_list *t) +{ + if (!READ_ONCE(t->expires)) + return; + + del_timer(t); + WRITE_ONCE(t->expires, 0); +} + +void set_timer_ms(struct timer_list *t, unsigned long timeout) +{ + if (!timeout) { + cancel_timer(t); + return; + } + + timeout = msecs_to_jiffies_timeout(timeout); + + /* + * Paranoia to make sure the compiler computes the timeout before + * loading 'jiffies' as jiffies is volatile and may be updated in + * the background by a timer tick. All to reduce the complexity + * of the addition and reduce the risk of losing a jiffie. + */ + barrier(); + + mod_timer(t, jiffies + timeout); +} diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 562f756da421..94f136d8a5fd 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -32,6 +32,7 @@ #include struct drm_i915_private; +struct timer_list; #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ @@ -421,4 +422,12 @@ static inline void add_taint_for_CI(unsigned int taint) add_taint(taint, LOCKDEP_STILL_OK); } +void cancel_timer(struct timer_list *t); +void set_timer_ms(struct timer_list *t, unsigned long timeout); + +static inline bool timer_expired(const struct timer_list *t) +{ + return READ_ONCE(t->expires) && !timer_pending(t); +} + #endif /* !__I915_UTILS_H */ From d12acee84ffb07c2eb11bbb676908b3827b42598 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 23 Oct 2019 14:31:06 +0100 Subject: [PATCH 035/154] drm/i915/execlists: Cancel banned contexts on schedule-out On schedule-out (CS completion) of a banned context, scrub the context image so that we do not replay the active payload. The intent is that we skip banned payloads on request submission so that the timeline advancement continues on in the background. However, if we are returning to a preempted request, i915_request_skip() is ineffective and instead we need to patch up the context image so that it continues from the start of the next request. v2: Fixup cancellation so that we only scrub the payload of the active request and do not short-circuit the breadcrumbs (which might cause other contexts to execute out of order). v3: Grammar pass Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191023133108.21401-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 131 ++++++---- drivers/gpu/drm/i915/gt/selftest_lrc.c | 321 +++++++++++++++++++++++++ 2 files changed, 411 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index ff0dd297e782..651d5dd39464 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -234,6 +234,9 @@ static void execlists_init_reg_state(u32 *reg_state, const struct intel_engine_cs *engine, const struct intel_ring *ring, bool close); +static void +__execlists_update_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine); static void mark_eio(struct i915_request *rq) { @@ -246,6 +249,31 @@ static void mark_eio(struct i915_request *rq) i915_request_mark_complete(rq); } +static struct i915_request *active_request(struct i915_request *rq) +{ + const struct intel_context * const ce = rq->hw_context; + struct i915_request *active = NULL; + struct list_head *list; + + if (!i915_request_is_active(rq)) /* unwound, but incomplete! */ + return rq; + + rcu_read_lock(); + list = &rcu_dereference(rq->timeline)->requests; + list_for_each_entry_from_reverse(rq, list, link) { + if (i915_request_completed(rq)) + break; + + if (rq->hw_context != ce) + break; + + active = rq; + } + rcu_read_unlock(); + + return active; +} + static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) { return (i915_ggtt_offset(engine->status_page.vma) + @@ -972,6 +1000,58 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) tasklet_schedule(&ve->base.execlists.tasklet); } +static void restore_default_state(struct intel_context *ce, + struct intel_engine_cs *engine) +{ + u32 *regs = ce->lrc_reg_state; + + if (engine->pinned_default_state) + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + + execlists_init_reg_state(regs, ce, engine, ce->ring, false); +} + +static void reset_active(struct i915_request *rq, + struct intel_engine_cs *engine) +{ + struct intel_context * const ce = rq->hw_context; + + /* + * The executing context has been cancelled. We want to prevent + * further execution along this context and propagate the error on + * to anything depending on its results. + * + * In __i915_request_submit(), we apply the -EIO and remove the + * requests' payloads for any banned requests. But first, we must + * rewind the context back to the start of the incomplete request so + * that we do not jump back into the middle of the batch. + * + * We preserve the breadcrumbs and semaphores of the incomplete + * requests so that inter-timeline dependencies (i.e other timelines) + * remain correctly ordered. And we defer to __i915_request_submit() + * so that all asynchronous waits are correctly handled. + */ + GEM_TRACE("%s(%s): { rq=%llx:%lld }\n", + __func__, engine->name, rq->fence.context, rq->fence.seqno); + + /* On resubmission of the active request, payload will be scrubbed */ + rq = active_request(rq); + if (rq) + ce->ring->head = intel_ring_wrap(ce->ring, rq->head); + else + ce->ring->head = ce->ring->tail; + intel_ring_update_space(ce->ring); + + /* Scrub the context image to prevent replaying the previous batch */ + restore_default_state(ce, engine); + __execlists_update_reg_state(ce, engine); + + /* We've switched away, so this should be a no-op, but intent matters */ + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; +} + static inline void __execlists_schedule_out(struct i915_request *rq, struct intel_engine_cs * const engine) @@ -982,6 +1062,9 @@ __execlists_schedule_out(struct i915_request *rq, execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); intel_gt_pm_put(engine->gt); + if (unlikely(i915_gem_context_is_banned(ce->gem_context))) + reset_active(rq, engine); + /* * If this is part of a virtual engine, its next request may * have been blocked waiting for access to the active context. @@ -1380,6 +1463,10 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) if (!rq) return 0; + /* Force a fast reset for terminated contexts (ignoring sysfs!) */ + if (unlikely(i915_gem_context_is_banned(rq->gem_context))) + return 1; + return READ_ONCE(engine->props.preempt_timeout_ms); } @@ -2792,29 +2879,6 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) &execlists->csb_status[reset_value]); } -static struct i915_request *active_request(struct i915_request *rq) -{ - const struct intel_context * const ce = rq->hw_context; - struct i915_request *active = NULL; - struct list_head *list; - - if (!i915_request_is_active(rq)) /* unwound, but incomplete! */ - return rq; - - list = &i915_request_active_timeline(rq)->requests; - list_for_each_entry_from_reverse(rq, list, link) { - if (i915_request_completed(rq)) - break; - - if (rq->hw_context != ce) - break; - - active = rq; - } - - return active; -} - static void __execlists_reset_reg_state(const struct intel_context *ce, const struct intel_engine_cs *engine) { @@ -2831,7 +2895,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_context *ce; struct i915_request *rq; - u32 *regs; mb(); /* paranoia: read the CSB pointers from after the reset */ clflush(execlists->csb_write); @@ -2907,13 +2970,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * to recreate its own state. */ GEM_BUG_ON(!intel_context_is_pinned(ce)); - regs = ce->lrc_reg_state; - if (engine->pinned_default_state) { - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - } - execlists_init_reg_state(regs, ce, engine, ce->ring, false); + restore_default_state(ce, engine); out_replay: GEM_TRACE("%s replay {head:%04x, tail:%04x\n", @@ -4574,16 +4631,8 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - if (scrub) { - u32 *regs = ce->lrc_reg_state; - - if (engine->pinned_default_state) { - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - } - execlists_init_reg_state(regs, ce, engine, ce->ring, false); - } + if (scrub) + restore_default_state(ce, engine); /* Rerun the request; its payload has been neutered (if guilty). */ ce->ring->head = head; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index b6352671c5a0..d5d268be554e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -7,6 +7,7 @@ #include #include "gem/i915_gem_pm.h" +#include "gt/intel_engine_heartbeat.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -1160,6 +1161,325 @@ err_wedged: goto err_client_b; } +struct live_preempt_cancel { + struct intel_engine_cs *engine; + struct preempt_client a, b; +}; + +static int __cancel_active0(struct live_preempt_cancel *arg) +{ + struct i915_request *rq; + struct igt_live_test t; + int err; + + /* Preempt cancel of ELSP0 */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); + rq = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + if (!igt_wait_for_spinner(&arg->a.spin, rq)) { + err = -EIO; + goto out; + } + + i915_gem_context_set_banned(arg->a.ctx); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_active1(struct live_preempt_cancel *arg) +{ + struct i915_request *rq[2] = {}; + struct igt_live_test t; + int err; + + /* Preempt cancel of ELSP1 */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); + rq[0] = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_NOOP); /* no preemption */ + if (IS_ERR(rq[0])) + return PTR_ERR(rq[0]); + + i915_request_get(rq[0]); + i915_request_add(rq[0]); + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { + err = -EIO; + goto out; + } + + clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags); + rq[1] = spinner_create_request(&arg->b.spin, + arg->b.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + goto out; + } + + i915_request_get(rq[1]); + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); + i915_request_add(rq[1]); + if (err) + goto out; + + i915_gem_context_set_banned(arg->b.ctx); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + igt_spinner_end(&arg->a.spin); + if (i915_request_wait(rq[1], 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq[0]->fence.error != 0) { + pr_err("Normal inflight0 request did not complete\n"); + err = -EINVAL; + goto out; + } + + if (rq[1]->fence.error != -EIO) { + pr_err("Cancelled inflight1 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq[1]); + i915_request_put(rq[0]); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_queued(struct live_preempt_cancel *arg) +{ + struct i915_request *rq[3] = {}; + struct igt_live_test t; + int err; + + /* Full ELSP and one in the wings */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); + rq[0] = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[0])) + return PTR_ERR(rq[0]); + + i915_request_get(rq[0]); + i915_request_add(rq[0]); + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { + err = -EIO; + goto out; + } + + clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags); + rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + goto out; + } + + i915_request_get(rq[1]); + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); + i915_request_add(rq[1]); + if (err) + goto out; + + rq[2] = spinner_create_request(&arg->b.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[2])) { + err = PTR_ERR(rq[2]); + goto out; + } + + i915_request_get(rq[2]); + err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); + i915_request_add(rq[2]); + if (err) + goto out; + + i915_gem_context_set_banned(arg->a.ctx); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq[0]->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + + if (rq[1]->fence.error != 0) { + pr_err("Normal inflight1 request did not complete\n"); + err = -EINVAL; + goto out; + } + + if (rq[2]->fence.error != -EIO) { + pr_err("Cancelled queued request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq[2]); + i915_request_put(rq[1]); + i915_request_put(rq[0]); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_hostile(struct live_preempt_cancel *arg) +{ + struct i915_request *rq; + int err; + + /* Preempt cancel non-preemptible spinner in ELSP0 */ + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) + return 0; + + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); + rq = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_NOOP); /* preemption disabled */ + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + if (!igt_wait_for_spinner(&arg->a.spin, rq)) { + err = -EIO; + goto out; + } + + i915_gem_context_set_banned(arg->a.ctx); + err = intel_engine_pulse(arg->engine); /* force reset */ + if (err) + goto out; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq); + if (igt_flush_test(arg->engine->i915)) + err = -EIO; + return err; +} + +static int live_preempt_cancel(void *arg) +{ + struct intel_gt *gt = arg; + struct live_preempt_cancel data; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * To cancel an inflight context, we need to first remove it from the + * GPU. That sounds like preemption! Plus a little bit of bookkeeping. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) + return 0; + + if (preempt_client_init(gt, &data.a)) + return -ENOMEM; + if (preempt_client_init(gt, &data.b)) + goto err_client_a; + + for_each_engine(data.engine, gt, id) { + if (!intel_engine_has_preemption(data.engine)) + continue; + + err = __cancel_active0(&data); + if (err) + goto err_wedged; + + err = __cancel_active1(&data); + if (err) + goto err_wedged; + + err = __cancel_queued(&data); + if (err) + goto err_wedged; + + err = __cancel_hostile(&data); + if (err) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&data.b); +err_client_a: + preempt_client_fini(&data.a); + return err; + +err_wedged: + GEM_TRACE_DUMP(); + igt_spinner_end(&data.b.spin); + igt_spinner_end(&data.a.spin); + intel_gt_set_wedged(gt); + goto err_client_b; +} + static int live_suppress_self_preempt(void *arg) { struct intel_gt *gt = arg; @@ -2693,6 +3013,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_preempt), SUBTEST(live_late_preempt), SUBTEST(live_nopreempt), + SUBTEST(live_preempt_cancel), SUBTEST(live_suppress_self_preempt), SUBTEST(live_suppress_wait_preempt), SUBTEST(live_chain_preempt), From 2e0986a58cc4f2e7f9e7ede19ec32b9c116d0068 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 23 Oct 2019 14:31:07 +0100 Subject: [PATCH 036/154] drm/i915/gem: Cancel contexts when hangchecking is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normally, we rely on our hangcheck to prevent persistent batches from hogging the GPU. However, if the user disables hangcheck, this mechanism breaks down. Despite our insistence that this is unsafe, the users are equally insistent that they want to use endless batches and will disable the hangcheck mechanism. We are looking at replacing hangcheck, in the next patch, with a softer mechanism, that sends a pulse down the engine to check if it is well. We can use the same preemptive pulse to flush an active context off the GPU upon context close, preventing resources being lost and unkillable requests remaining on the GPU after process termination. Testcase: igt/gem_ctx_exec/basic-nohangcheck Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Michał Winiarski Cc: Jon Bloomfield Reviewed-by: Jon Bloomfield Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191023133108.21401-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 141 ++++++++++++++++++++ 1 file changed, 141 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7b01f4605f21..b2f042d87be0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -70,6 +70,7 @@ #include #include "gt/intel_lrc_reg.h" +#include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_user.h" #include "i915_gem_context.h" @@ -276,6 +277,135 @@ void i915_gem_context_release(struct kref *ref) schedule_work(&gc->free_work); } +static inline struct i915_gem_engines * +__context_engines_static(const struct i915_gem_context *ctx) +{ + return rcu_dereference_protected(ctx->engines, true); +} + +static bool __reset_engine(struct intel_engine_cs *engine) +{ + struct intel_gt *gt = engine->gt; + bool success = false; + + if (!intel_has_reset_engine(gt)) + return false; + + if (!test_and_set_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags)) { + success = intel_engine_reset(engine, NULL) == 0; + clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags); + } + + return success; +} + +static void __reset_context(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + intel_gt_handle_error(engine->gt, engine->mask, 0, + "context closure in %s", ctx->name); +} + +static bool __cancel_engine(struct intel_engine_cs *engine) +{ + /* + * Send a "high priority pulse" down the engine to cause the + * current request to be momentarily preempted. (If it fails to + * be preempted, it will be reset). As we have marked our context + * as banned, any incomplete request, including any running, will + * be skipped following the preemption. + * + * If there is no hangchecking (one of the reasons why we try to + * cancel the context) and no forced preemption, there may be no + * means by which we reset the GPU and evict the persistent hog. + * Ergo if we are unable to inject a preemptive pulse that can + * kill the banned context, we fallback to doing a local reset + * instead. + */ + if (CONFIG_DRM_I915_PREEMPT_TIMEOUT && !intel_engine_pulse(engine)) + return true; + + /* If we are unable to send a pulse, try resetting this engine. */ + return __reset_engine(engine); +} + +static struct intel_engine_cs * +active_engine(struct dma_fence *fence, struct intel_context *ce) +{ + struct i915_request *rq = to_request(fence); + struct intel_engine_cs *engine, *locked; + + /* + * Serialise with __i915_request_submit() so that it sees + * is-banned?, or we know the request is already inflight. + */ + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->active.lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->active.lock); + spin_lock(&engine->active.lock); + locked = engine; + } + + engine = NULL; + if (i915_request_is_active(rq) && !rq->fence.error) + engine = rq->engine; + + spin_unlock_irq(&locked->active.lock); + + return engine; +} + +static void kill_context(struct i915_gem_context *ctx) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + + /* + * If we are already banned, it was due to a guilty request causing + * a reset and the entire context being evicted from the GPU. + */ + if (i915_gem_context_is_banned(ctx)) + return; + + i915_gem_context_set_banned(ctx); + + /* + * Map the user's engine back to the actual engines; one virtual + * engine will be mapped to multiple engines, and using ctx->engine[] + * the same engine may be have multiple instances in the user's map. + * However, we only care about pending requests, so only include + * engines on which there are incomplete requests. + */ + for_each_gem_engine(ce, __context_engines_static(ctx), it) { + struct intel_engine_cs *engine; + struct dma_fence *fence; + + if (!ce->timeline) + continue; + + fence = i915_active_fence_get(&ce->timeline->last_request); + if (!fence) + continue; + + /* Check with the backend if the request is still inflight */ + engine = active_engine(fence, ce); + + /* First attempt to gracefully cancel the context */ + if (engine && !__cancel_engine(engine)) + /* + * If we are unable to send a preemptive pulse to bump + * the context from the GPU, we have to resort to a full + * reset. We hope the collateral damage is worth it. + */ + __reset_context(ctx, engine); + + dma_fence_put(fence); + } +} + static void context_close(struct i915_gem_context *ctx) { struct i915_address_space *vm; @@ -298,6 +428,17 @@ static void context_close(struct i915_gem_context *ctx) lut_close(ctx); mutex_unlock(&ctx->mutex); + + /* + * If the user has disabled hangchecking, we can not be sure that + * the batches will ever complete after the context is closed, + * keeping the context and all resources pinned forever. So in this + * case we opt to forcibly kill off all remaining requests on + * context close. + */ + if (!i915_modparams.enable_hangcheck) + kill_context(ctx); + i915_gem_context_put(ctx); } From 058179e72e0956a2dfe4927db6cbe5fbfb2406aa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 23 Oct 2019 14:31:08 +0100 Subject: [PATCH 037/154] drm/i915/gt: Replace hangcheck by heartbeats Replace sampling the engine state every so often with a periodic heartbeat request to measure the health of an engine. This is coupled with the forced-preemption to allow long running requests to survive so long as they do not block other users. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Jon Bloomfield Reviewed-by: Jon Bloomfield Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191023133108.21401-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.profile | 11 + drivers/gpu/drm/i915/Makefile | 1 - drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_object.h | 1 - drivers/gpu/drm/i915/gem/i915_gem_pm.c | 2 - drivers/gpu/drm/i915/gt/intel_engine.h | 32 -- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 12 +- .../gpu/drm/i915/gt/intel_engine_heartbeat.c | 157 ++++++++ .../gpu/drm/i915/gt/intel_engine_heartbeat.h | 8 + drivers/gpu/drm/i915/gt/intel_engine_pm.c | 5 +- drivers/gpu/drm/i915/gt/intel_engine_types.h | 17 +- drivers/gpu/drm/i915/gt/intel_gt.c | 1 - drivers/gpu/drm/i915/gt/intel_gt.h | 4 - drivers/gpu/drm/i915/gt/intel_gt_pm.c | 1 - drivers/gpu/drm/i915/gt/intel_gt_types.h | 9 - drivers/gpu/drm/i915/gt/intel_hangcheck.c | 361 ------------------ drivers/gpu/drm/i915/gt/intel_reset.c | 3 +- .../drm/i915/gt/selftest_engine_heartbeat.c | 171 +++++++++ drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 4 - drivers/gpu/drm/i915/i915_debugfs.c | 87 ----- drivers/gpu/drm/i915/i915_drv.c | 3 - drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gpu_error.c | 33 +- drivers/gpu/drm/i915/i915_gpu_error.h | 2 - drivers/gpu/drm/i915/i915_priolist_types.h | 6 + 25 files changed, 379 insertions(+), 555 deletions(-) delete mode 100644 drivers/gpu/drm/i915/gt/intel_hangcheck.c diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index b071b6024152..8ab7af5eb311 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -12,6 +12,17 @@ config DRM_I915_USERFAULT_AUTOSUSPEND May be 0 to disable the extra delay and solely use the device level runtime pm autosuspend delay tunable. +config DRM_I915_HEARTBEAT_INTERVAL + int "Interval between heartbeat pulses (ms)" + default 2500 # milliseconds + help + The driver sends a periodic heartbeat down all active engines to + check the health of the GPU and undertake regular house-keeping of + internal driver state. + + May be 0 to disable heartbeats and therefore disable automatic GPU + hang detection. + config DRM_I915_PREEMPT_TIMEOUT int "Preempt timeout (ms, jiffy granularity)" default 100 # milliseconds diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 2fd4bed188e5..21601bb27b22 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -87,7 +87,6 @@ gt-y += \ gt/intel_gt_pm.o \ gt/intel_gt_pm_irq.o \ gt/intel_gt_requests.o \ - gt/intel_hangcheck.o \ gt/intel_llc.o \ gt/intel_lrc.o \ gt/intel_rc6.o \ diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 236fdf122e47..579655675b08 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14863,7 +14863,7 @@ static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) { struct i915_sched_attr attr = { - .priority = I915_PRIORITY_DISPLAY, + .priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY), }; i915_gem_object_wait_priority(obj, 0, &attr); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index aead7e6725f9..458cd51331f1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -462,6 +462,5 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, const struct i915_sched_attr *attr); -#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX) #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 9bc0cf3139e3..c99bb94fe41e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -76,8 +76,6 @@ void i915_gem_suspend(struct drm_i915_private *i915) intel_gt_suspend(&i915->gt); intel_uc_suspend(&i915->gt.uc); - cancel_delayed_work_sync(&i915->gt.hangcheck.work); - i915_gem_drain_freed_objects(i915); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 9409b7856299..ee47444a6ad4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -91,38 +91,6 @@ struct intel_gt; /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. */ -enum intel_engine_hangcheck_action { - ENGINE_IDLE = 0, - ENGINE_WAIT, - ENGINE_ACTIVE_SEQNO, - ENGINE_ACTIVE_HEAD, - ENGINE_ACTIVE_SUBUNITS, - ENGINE_WAIT_KICK, - ENGINE_DEAD, -}; - -static inline const char * -hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) -{ - switch (a) { - case ENGINE_IDLE: - return "idle"; - case ENGINE_WAIT: - return "wait"; - case ENGINE_ACTIVE_SEQNO: - return "active seqno"; - case ENGINE_ACTIVE_HEAD: - return "active head"; - case ENGINE_ACTIVE_SUBUNITS: - return "active subunits"; - case ENGINE_WAIT_KICK: - return "wait kick"; - case ENGINE_DEAD: - return "dead"; - } - - return "unknown"; -} static inline unsigned int execlists_num_ports(const struct intel_engine_execlists * const execlists) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index b91ea07f4819..8b5129e0b49d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -308,6 +308,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->instance = info->instance; __sprint_engine_name(engine); + engine->props.heartbeat_interval_ms = + CONFIG_DRM_I915_HEARTBEAT_INTERVAL; engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT; engine->props.stop_timeout_ms = @@ -609,7 +611,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_active(engine, ENGINE_PHYSICAL); intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); - intel_engine_init_hangcheck(engine); intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); @@ -1470,8 +1471,13 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "*** WEDGED ***\n"); drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); - drm_printf(m, "\tHangcheck: %d ms ago\n", - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); + + rcu_read_lock(); + rq = READ_ONCE(engine->heartbeat.systole); + if (rq) + drm_printf(m, "\tHeartbeat: %d ms ago\n", + jiffies_to_msecs(jiffies - rq->emitted_jiffies)); + rcu_read_unlock(); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error)); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 4b9ab7813d54..9977f59f6b53 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -11,6 +11,30 @@ #include "intel_engine_pm.h" #include "intel_engine.h" #include "intel_gt.h" +#include "intel_reset.h" + +/* + * While the engine is active, we send a periodic pulse along the engine + * to check on its health and to flush any idle-barriers. If that request + * is stuck, and we fail to preempt it, we declare the engine hung and + * issue a reset -- in the hope that restores progress. + */ + +static bool next_heartbeat(struct intel_engine_cs *engine) +{ + long delay; + + delay = READ_ONCE(engine->props.heartbeat_interval_ms); + if (!delay) + return false; + + delay = msecs_to_jiffies_timeout(delay); + if (delay >= HZ) + delay = round_jiffies_up_relative(delay); + schedule_delayed_work(&engine->heartbeat.work, delay); + + return true; +} static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq) { @@ -18,6 +42,139 @@ static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq) i915_request_add_active_barriers(rq); } +static void show_heartbeat(const struct i915_request *rq, + struct intel_engine_cs *engine) +{ + struct drm_printer p = drm_debug_printer("heartbeat"); + + intel_engine_dump(engine, &p, + "%s heartbeat {prio:%d} not ticking\n", + engine->name, + rq->sched.attr.priority); +} + +static void heartbeat(struct work_struct *wrk) +{ + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN), + }; + struct intel_engine_cs *engine = + container_of(wrk, typeof(*engine), heartbeat.work.work); + struct intel_context *ce = engine->kernel_context; + struct i915_request *rq; + + if (!intel_engine_pm_get_if_awake(engine)) + return; + + rq = engine->heartbeat.systole; + if (rq && i915_request_completed(rq)) { + i915_request_put(rq); + engine->heartbeat.systole = NULL; + } + + if (intel_gt_is_wedged(engine->gt)) + goto out; + + if (engine->heartbeat.systole) { + if (engine->schedule && + rq->sched.attr.priority < I915_PRIORITY_BARRIER) { + /* + * Gradually raise the priority of the heartbeat to + * give high priority work [which presumably desires + * low latency and no jitter] the chance to naturally + * complete before being preempted. + */ + attr.priority = I915_PRIORITY_MASK; + if (rq->sched.attr.priority >= attr.priority) + attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT); + if (rq->sched.attr.priority >= attr.priority) + attr.priority = I915_PRIORITY_BARRIER; + + local_bh_disable(); + engine->schedule(rq, &attr); + local_bh_enable(); + } else { + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + show_heartbeat(rq, engine); + + intel_gt_handle_error(engine->gt, engine->mask, + I915_ERROR_CAPTURE, + "stopped heartbeat on %s", + engine->name); + } + goto out; + } + + if (engine->wakeref_serial == engine->serial) + goto out; + + mutex_lock(&ce->timeline->mutex); + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); + intel_context_exit(ce); + if (IS_ERR(rq)) + goto unlock; + + idle_pulse(engine, rq); + if (i915_modparams.enable_hangcheck) + engine->heartbeat.systole = i915_request_get(rq); + + __i915_request_commit(rq); + __i915_request_queue(rq, &attr); + +unlock: + mutex_unlock(&ce->timeline->mutex); +out: + if (!next_heartbeat(engine)) + i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); + intel_engine_pm_put(engine); +} + +void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine) +{ + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) + return; + + next_heartbeat(engine); +} + +void intel_engine_park_heartbeat(struct intel_engine_cs *engine) +{ + cancel_delayed_work(&engine->heartbeat.work); + i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); +} + +void intel_engine_init_heartbeat(struct intel_engine_cs *engine) +{ + INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat); +} + +int intel_engine_set_heartbeat(struct intel_engine_cs *engine, + unsigned long delay) +{ + int err; + + /* Send one last pulse before to cleanup persistent hogs */ + if (!delay && CONFIG_DRM_I915_PREEMPT_TIMEOUT) { + err = intel_engine_pulse(engine); + if (err) + return err; + } + + WRITE_ONCE(engine->props.heartbeat_interval_ms, delay); + + if (intel_engine_pm_get_if_awake(engine)) { + if (delay) + intel_engine_unpark_heartbeat(engine); + else + intel_engine_park_heartbeat(engine); + intel_engine_pm_put(engine); + } + + return 0; +} + int intel_engine_pulse(struct intel_engine_cs *engine) { struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER }; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h index b334e5aaf78d..a7b8c0f9e005 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h @@ -9,6 +9,14 @@ struct intel_engine_cs; +void intel_engine_init_heartbeat(struct intel_engine_cs *engine); + +int intel_engine_set_heartbeat(struct intel_engine_cs *engine, + unsigned long delay); + +void intel_engine_park_heartbeat(struct intel_engine_cs *engine); +void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine); + int intel_engine_pulse(struct intel_engine_cs *engine); int intel_engine_flush_barriers(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 7d76611d9df1..6fbfa2162e54 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -7,6 +7,7 @@ #include "i915_drv.h" #include "intel_engine.h" +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" #include "intel_engine_pool.h" #include "intel_gt.h" @@ -34,7 +35,7 @@ static int __engine_unpark(struct intel_wakeref *wf) if (engine->unpark) engine->unpark(engine); - intel_engine_init_hangcheck(engine); + intel_engine_unpark_heartbeat(engine); return 0; } @@ -158,6 +159,7 @@ static int __engine_park(struct intel_wakeref *wf) call_idle_barriers(engine); /* cleanup after wedging */ + intel_engine_park_heartbeat(engine); intel_engine_disarm_breadcrumbs(engine); intel_engine_pool_park(&engine->pool); @@ -188,6 +190,7 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) struct intel_runtime_pm *rpm = engine->uncore->rpm; intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); + intel_engine_init_heartbeat(engine); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 1251dac91f31..26998901feff 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -15,6 +15,7 @@ #include #include #include +#include #include "i915_gem.h" #include "i915_pmu.h" @@ -76,14 +77,6 @@ struct intel_instdone { u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; }; -struct intel_engine_hangcheck { - u64 acthd; - u32 last_ring; - u32 last_head; - unsigned long action_timestamp; - struct intel_instdone instdone; -}; - struct intel_ring { struct kref ref; struct i915_vma *vma; @@ -331,6 +324,11 @@ struct intel_engine_cs { intel_engine_mask_t saturated; /* submitting semaphores too late? */ + struct { + struct delayed_work work; + struct i915_request *systole; + } heartbeat; + unsigned long serial; unsigned long wakeref_serial; @@ -481,8 +479,6 @@ struct intel_engine_cs { /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; - struct intel_engine_hangcheck hangcheck; - #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) @@ -549,6 +545,7 @@ struct intel_engine_cs { } stats; struct { + unsigned long heartbeat_interval_ms; unsigned long preempt_timeout_ms; unsigned long stop_timeout_ms; } props; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 1c4b6c9642ad..35127699591d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -22,7 +22,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); - intel_gt_init_hangcheck(gt); intel_gt_init_reset(gt); intel_gt_init_requests(gt); intel_gt_pm_init_early(gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index e6ab0bff0efb..5b6effed3713 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -46,8 +46,6 @@ void intel_gt_clear_error_registers(struct intel_gt *gt, void intel_gt_flush_ggtt_writes(struct intel_gt *gt); void intel_gt_chipset_flush(struct intel_gt *gt); -void intel_gt_init_hangcheck(struct intel_gt *gt); - static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt, enum intel_gt_scratch_field field) { @@ -59,6 +57,4 @@ static inline bool intel_gt_is_wedged(struct intel_gt *gt) return __intel_reset_failed(>->reset); } -void intel_gt_queue_hangcheck(struct intel_gt *gt); - #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 06e73d56cfcf..aff4eda47819 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -47,7 +47,6 @@ static int __gt_unpark(struct intel_wakeref *wf) i915_pmu_gt_unparked(i915); - intel_gt_queue_hangcheck(gt); intel_gt_unpark_requests(gt); return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 980973e66e7f..fcb3a6c9421c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -27,14 +27,6 @@ struct i915_ggtt; struct intel_engine_cs; struct intel_uncore; -struct intel_hangcheck { - /* For hangcheck timer */ -#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ -#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) - - struct delayed_work work; -}; - struct intel_gt { struct drm_i915_private *i915; struct intel_uncore *uncore; @@ -68,7 +60,6 @@ struct intel_gt { struct list_head closed_vma; spinlock_t closed_lock; /* guards the list of closed_vma */ - struct intel_hangcheck hangcheck; struct intel_reset reset; /** diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c deleted file mode 100644 index 0fdef00af9e4..000000000000 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ /dev/null @@ -1,361 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "i915_drv.h" -#include "intel_engine.h" -#include "intel_gt.h" -#include "intel_reset.h" - -struct hangcheck { - u64 acthd; - u32 ring; - u32 head; - enum intel_engine_hangcheck_action action; - unsigned long action_timestamp; - int deadlock; - struct intel_instdone instdone; - bool wedged:1; - bool stalled:1; -}; - -static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) -{ - u32 tmp = current_instdone | *old_instdone; - bool unchanged; - - unchanged = tmp == *old_instdone; - *old_instdone |= tmp; - - return unchanged; -} - -static bool subunits_stuck(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; - struct intel_instdone instdone; - struct intel_instdone *accu_instdone = &engine->hangcheck.instdone; - bool stuck; - int slice; - int subslice; - - intel_engine_get_instdone(engine, &instdone); - - /* There might be unstable subunit states even when - * actual head is not moving. Filter out the unstable ones by - * accumulating the undone -> done transitions and only - * consider those as progress. - */ - stuck = instdone_unchanged(instdone.instdone, - &accu_instdone->instdone); - stuck &= instdone_unchanged(instdone.slice_common, - &accu_instdone->slice_common); - - for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) { - stuck &= instdone_unchanged(instdone.sampler[slice][subslice], - &accu_instdone->sampler[slice][subslice]); - stuck &= instdone_unchanged(instdone.row[slice][subslice], - &accu_instdone->row[slice][subslice]); - } - - return stuck; -} - -static enum intel_engine_hangcheck_action -head_stuck(struct intel_engine_cs *engine, u64 acthd) -{ - if (acthd != engine->hangcheck.acthd) { - - /* Clear subunit states on head movement */ - memset(&engine->hangcheck.instdone, 0, - sizeof(engine->hangcheck.instdone)); - - return ENGINE_ACTIVE_HEAD; - } - - if (!subunits_stuck(engine)) - return ENGINE_ACTIVE_SUBUNITS; - - return ENGINE_DEAD; -} - -static enum intel_engine_hangcheck_action -engine_stuck(struct intel_engine_cs *engine, u64 acthd) -{ - enum intel_engine_hangcheck_action ha; - u32 tmp; - - ha = head_stuck(engine, acthd); - if (ha != ENGINE_DEAD) - return ha; - - if (IS_GEN(engine->i915, 2)) - return ENGINE_DEAD; - - /* Is the chip hanging on a WAIT_FOR_EVENT? - * If so we can simply poke the RB_WAIT bit - * and break the hang. This should work on - * all but the second generation chipsets. - */ - tmp = ENGINE_READ(engine, RING_CTL); - if (tmp & RING_WAIT) { - intel_gt_handle_error(engine->gt, engine->mask, 0, - "stuck wait on %s", engine->name); - ENGINE_WRITE(engine, RING_CTL, tmp); - return ENGINE_WAIT_KICK; - } - - return ENGINE_DEAD; -} - -static void hangcheck_load_sample(struct intel_engine_cs *engine, - struct hangcheck *hc) -{ - hc->acthd = intel_engine_get_active_head(engine); - hc->ring = ENGINE_READ(engine, RING_START); - hc->head = ENGINE_READ(engine, RING_HEAD); -} - -static void hangcheck_store_sample(struct intel_engine_cs *engine, - const struct hangcheck *hc) -{ - engine->hangcheck.acthd = hc->acthd; - engine->hangcheck.last_ring = hc->ring; - engine->hangcheck.last_head = hc->head; -} - -static enum intel_engine_hangcheck_action -hangcheck_get_action(struct intel_engine_cs *engine, - const struct hangcheck *hc) -{ - if (intel_engine_is_idle(engine)) - return ENGINE_IDLE; - - if (engine->hangcheck.last_ring != hc->ring) - return ENGINE_ACTIVE_SEQNO; - - if (engine->hangcheck.last_head != hc->head) - return ENGINE_ACTIVE_SEQNO; - - return engine_stuck(engine, hc->acthd); -} - -static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, - struct hangcheck *hc) -{ - unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT; - - hc->action = hangcheck_get_action(engine, hc); - - /* We always increment the progress - * if the engine is busy and still processing - * the same request, so that no single request - * can run indefinitely (such as a chain of - * batches). The only time we do not increment - * the hangcheck score on this ring, if this - * engine is in a legitimate wait for another - * engine. In that case the waiting engine is a - * victim and we want to be sure we catch the - * right culprit. Then every time we do kick - * the ring, make it as a progress as the seqno - * advancement might ensure and if not, it - * will catch the hanging engine. - */ - - switch (hc->action) { - case ENGINE_IDLE: - case ENGINE_ACTIVE_SEQNO: - /* Clear head and subunit states on seqno movement */ - hc->acthd = 0; - - memset(&engine->hangcheck.instdone, 0, - sizeof(engine->hangcheck.instdone)); - - /* Intentional fall through */ - case ENGINE_WAIT_KICK: - case ENGINE_WAIT: - engine->hangcheck.action_timestamp = jiffies; - break; - - case ENGINE_ACTIVE_HEAD: - case ENGINE_ACTIVE_SUBUNITS: - /* - * Seqno stuck with still active engine gets leeway, - * in hopes that it is just a long shader. - */ - timeout = I915_SEQNO_DEAD_TIMEOUT; - break; - - case ENGINE_DEAD: - break; - - default: - MISSING_CASE(hc->action); - } - - hc->stalled = time_after(jiffies, - engine->hangcheck.action_timestamp + timeout); - hc->wedged = time_after(jiffies, - engine->hangcheck.action_timestamp + - I915_ENGINE_WEDGED_TIMEOUT); -} - -static void hangcheck_declare_hang(struct intel_gt *gt, - intel_engine_mask_t hung, - intel_engine_mask_t stuck) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - char msg[80]; - int len; - - /* If some rings hung but others were still busy, only - * blame the hanging rings in the synopsis. - */ - if (stuck != hung) - hung &= ~stuck; - len = scnprintf(msg, sizeof(msg), - "%s on ", stuck == hung ? "no progress" : "hang"); - for_each_engine_masked(engine, gt, hung, tmp) - len += scnprintf(msg + len, sizeof(msg) - len, - "%s, ", engine->name); - msg[len-2] = '\0'; - - return intel_gt_handle_error(gt, hung, I915_ERROR_CAPTURE, "%s", msg); -} - -/* - * This is called when the chip hasn't reported back with completed - * batchbuffers in a long time. We keep track per ring seqno progress and - * if there are no progress, hangcheck score for that ring is increased. - * Further, acthd is inspected to see if the ring is stuck. On stuck case - * we kick the ring. If we see no progress on three subsequent calls - * we assume chip is wedged and try to fix it by resetting the chip. - */ -static void hangcheck_elapsed(struct work_struct *work) -{ - struct intel_gt *gt = - container_of(work, typeof(*gt), hangcheck.work.work); - intel_engine_mask_t hung = 0, stuck = 0, wedged = 0; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - - if (!i915_modparams.enable_hangcheck) - return; - - if (!READ_ONCE(gt->awake)) - return; - - if (intel_gt_is_wedged(gt)) - return; - - wakeref = intel_runtime_pm_get_if_in_use(gt->uncore->rpm); - if (!wakeref) - return; - - /* As enabling the GPU requires fairly extensive mmio access, - * periodically arm the mmio checker to see if we are triggering - * any invalid access. - */ - intel_uncore_arm_unclaimed_mmio_detection(gt->uncore); - - for_each_engine(engine, gt, id) { - struct hangcheck hc; - - intel_engine_breadcrumbs_irq(engine); - - hangcheck_load_sample(engine, &hc); - hangcheck_accumulate_sample(engine, &hc); - hangcheck_store_sample(engine, &hc); - - if (hc.stalled) { - hung |= engine->mask; - if (hc.action != ENGINE_DEAD) - stuck |= engine->mask; - } - - if (hc.wedged) - wedged |= engine->mask; - } - - if (GEM_SHOW_DEBUG() && (hung | stuck)) { - struct drm_printer p = drm_debug_printer("hangcheck"); - - for_each_engine(engine, gt, id) { - if (intel_engine_is_idle(engine)) - continue; - - intel_engine_dump(engine, &p, "%s\n", engine->name); - } - } - - if (wedged) { - dev_err(gt->i915->drm.dev, - "GPU recovery timed out," - " cancelling all in-flight rendering.\n"); - GEM_TRACE_DUMP(); - intel_gt_set_wedged(gt); - } - - if (hung) - hangcheck_declare_hang(gt, hung, stuck); - - intel_runtime_pm_put(gt->uncore->rpm, wakeref); - - /* Reset timer in case GPU hangs without another request being added */ - intel_gt_queue_hangcheck(gt); -} - -void intel_gt_queue_hangcheck(struct intel_gt *gt) -{ - unsigned long delay; - - if (unlikely(!i915_modparams.enable_hangcheck)) - return; - - /* - * Don't continually defer the hangcheck so that it is always run at - * least once after work has been scheduled on any ring. Otherwise, - * we will ignore a hung ring if a second ring is kept busy. - */ - - delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); - queue_delayed_work(system_long_wq, >->hangcheck.work, delay); -} - -void intel_engine_init_hangcheck(struct intel_engine_cs *engine) -{ - memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); - engine->hangcheck.action_timestamp = jiffies; -} - -void intel_gt_init_hangcheck(struct intel_gt *gt) -{ - INIT_DELAYED_WORK(>->hangcheck.work, hangcheck_elapsed); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftest_hangcheck.c" -#endif diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index bf8d1ed4b1d8..f03e000051c1 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1024,8 +1024,6 @@ void intel_gt_reset(struct intel_gt *gt, if (ret) goto taint; - intel_gt_queue_hangcheck(gt); - finish: reset_finish(gt, awake); unlock: @@ -1353,4 +1351,5 @@ void __intel_fini_wedge(struct intel_wedge_me *w) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_reset.c" +#include "selftest_hangcheck.c" #endif diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 5af27c37b65b..9fb5d30c43bb 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -4,6 +4,8 @@ * Copyright © 2018 Intel Corporation */ +#include + #include "i915_drv.h" #include "intel_gt_requests.h" @@ -150,11 +152,180 @@ static int live_idle_pulse(void *arg) return err; } +static int cmp_u32(const void *_a, const void *_b) +{ + const u32 *a = _a, *b = _b; + + return *a - *b; +} + +static int __live_heartbeat_fast(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + struct i915_request *rq; + ktime_t t0, t1; + u32 times[5]; + int err; + int i; + + ce = intel_context_create(engine->kernel_context->gem_context, + engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + intel_engine_pm_get(engine); + + err = intel_engine_set_heartbeat(engine, 1); + if (err) + goto err_pm; + + for (i = 0; i < ARRAY_SIZE(times); i++) { + /* Manufacture a tick */ + do { + while (READ_ONCE(engine->heartbeat.systole)) + flush_delayed_work(&engine->heartbeat.work); + + engine->serial++; /* quick, pretend we are not idle! */ + flush_delayed_work(&engine->heartbeat.work); + if (!delayed_work_pending(&engine->heartbeat.work)) { + pr_err("%s: heartbeat did not start\n", + engine->name); + err = -EINVAL; + goto err_pm; + } + + rcu_read_lock(); + rq = READ_ONCE(engine->heartbeat.systole); + if (rq) + rq = i915_request_get_rcu(rq); + rcu_read_unlock(); + } while (!rq); + + t0 = ktime_get(); + while (rq == READ_ONCE(engine->heartbeat.systole)) + yield(); /* work is on the local cpu! */ + t1 = ktime_get(); + + i915_request_put(rq); + times[i] = ktime_us_delta(t1, t0); + } + + sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL); + + pr_info("%s: Heartbeat delay: %uus [%u, %u]\n", + engine->name, + times[ARRAY_SIZE(times) / 2], + times[0], + times[ARRAY_SIZE(times) - 1]); + + /* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */ + if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) { + pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", + engine->name, + times[ARRAY_SIZE(times) / 2], + jiffies_to_usecs(6)); + err = -EINVAL; + } + + intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL); +err_pm: + intel_engine_pm_put(engine); + intel_context_put(ce); + return err; +} + +static int live_heartbeat_fast(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + /* Check that the heartbeat ticks at the desired rate. */ + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) + return 0; + + for_each_engine(engine, gt, id) { + err = __live_heartbeat_fast(engine); + if (err) + break; + } + + return err; +} + +static int __live_heartbeat_off(struct intel_engine_cs *engine) +{ + int err; + + intel_engine_pm_get(engine); + + engine->serial++; + flush_delayed_work(&engine->heartbeat.work); + if (!delayed_work_pending(&engine->heartbeat.work)) { + pr_err("%s: heartbeat not running\n", + engine->name); + err = -EINVAL; + goto err_pm; + } + + err = intel_engine_set_heartbeat(engine, 0); + if (err) + goto err_pm; + + engine->serial++; + flush_delayed_work(&engine->heartbeat.work); + if (delayed_work_pending(&engine->heartbeat.work)) { + pr_err("%s: heartbeat still running\n", + engine->name); + err = -EINVAL; + goto err_beat; + } + + if (READ_ONCE(engine->heartbeat.systole)) { + pr_err("%s: heartbeat still allocated\n", + engine->name); + err = -EINVAL; + goto err_beat; + } + +err_beat: + intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL); +err_pm: + intel_engine_pm_put(engine); + return err; +} + +static int live_heartbeat_off(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + /* Check that we can turn off heartbeat and not interrupt VIP */ + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) + return 0; + + for_each_engine(engine, gt, id) { + if (!intel_engine_has_preemption(engine)) + continue; + + err = __live_heartbeat_off(engine); + if (err) + break; + } + + return err; +} + int intel_heartbeat_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_idle_flush), SUBTEST(live_idle_pulse), + SUBTEST(live_heartbeat_fast), + SUBTEST(live_heartbeat_off), }; int saved_hangcheck; int err; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 8e0016464325..b892b47348ab 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1686,7 +1686,6 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) }; struct intel_gt *gt = &i915->gt; intel_wakeref_t wakeref; - bool saved_hangcheck; int err; if (!intel_has_gpu_reset(gt)) @@ -1696,12 +1695,9 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) return -EIO; /* we're long past hope of a successful reset */ wakeref = intel_runtime_pm_get(gt->uncore->rpm); - saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); - drain_delayed_work(>->hangcheck.work); /* flush param */ err = intel_gt_live_subtests(tests, gt); - i915_modparams.enable_hangcheck = saved_hangcheck; intel_runtime_pm_put(gt->uncore->rpm, wakeref); return err; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ede85b2f8a4b..b3f78e6495fd 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1011,92 +1011,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) return ret; } -static void i915_instdone_info(struct drm_i915_private *dev_priv, - struct seq_file *m, - struct intel_instdone *instdone) -{ - const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; - int slice; - int subslice; - - seq_printf(m, "\t\tINSTDONE: 0x%08x\n", - instdone->instdone); - - if (INTEL_GEN(dev_priv) <= 3) - return; - - seq_printf(m, "\t\tSC_INSTDONE: 0x%08x\n", - instdone->slice_common); - - if (INTEL_GEN(dev_priv) <= 6) - return; - - for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) - seq_printf(m, "\t\tSAMPLER_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, instdone->sampler[slice][subslice]); - - for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) - seq_printf(m, "\t\tROW_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, instdone->row[slice][subslice]); -} - -static int i915_hangcheck_info(struct seq_file *m, void *unused) -{ - struct drm_i915_private *i915 = node_to_i915(m->private); - struct intel_gt *gt = &i915->gt; - struct intel_engine_cs *engine; - intel_wakeref_t wakeref; - enum intel_engine_id id; - - seq_printf(m, "Reset flags: %lx\n", gt->reset.flags); - if (test_bit(I915_WEDGED, >->reset.flags)) - seq_puts(m, "\tWedged\n"); - if (test_bit(I915_RESET_BACKOFF, >->reset.flags)) - seq_puts(m, "\tDevice (global) reset in progress\n"); - - if (!i915_modparams.enable_hangcheck) { - seq_puts(m, "Hangcheck disabled\n"); - return 0; - } - - if (timer_pending(>->hangcheck.work.timer)) - seq_printf(m, "Hangcheck active, timer fires in %dms\n", - jiffies_to_msecs(gt->hangcheck.work.timer.expires - - jiffies)); - else if (delayed_work_pending(>->hangcheck.work)) - seq_puts(m, "Hangcheck active, work pending\n"); - else - seq_puts(m, "Hangcheck inactive\n"); - - seq_printf(m, "GT active? %s\n", yesno(gt->awake)); - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - for_each_engine(engine, i915, id) { - struct intel_instdone instdone; - - seq_printf(m, "%s: %d ms ago\n", - engine->name, - jiffies_to_msecs(jiffies - - engine->hangcheck.action_timestamp)); - - seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", - (long long)engine->hangcheck.acthd, - intel_engine_get_active_head(engine)); - - intel_engine_get_instdone(engine, &instdone); - - seq_puts(m, "\tinstdone read =\n"); - i915_instdone_info(i915, m, &instdone); - - seq_puts(m, "\tinstdone accu =\n"); - i915_instdone_info(i915, m, - &engine->hangcheck.instdone); - } - } - - return 0; -} - static int ironlake_drpc_info(struct seq_file *m) { struct drm_i915_private *i915 = node_to_i915(m->private); @@ -4363,7 +4277,6 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_guc_stage_pool", i915_guc_stage_pool, 0}, {"i915_huc_load_status", i915_huc_load_status_info, 0}, {"i915_frequency_info", i915_frequency_info, 0}, - {"i915_hangcheck_info", i915_hangcheck_info, 0}, {"i915_drpc_info", i915_drpc_info, 0}, {"i915_ring_freq_table", i915_ring_freq_table, 0}, {"i915_frontbuffer_tracking", i915_frontbuffer_tracking, 0}, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5138d1eed306..24a3988281d2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1548,10 +1548,7 @@ void i915_driver_remove(struct drm_i915_private *i915) i915_driver_modeset_remove(i915); - /* Free error state after interrupts are fully disabled. */ - cancel_delayed_work_sync(&i915->gt.hangcheck.work); i915_reset_error_state(i915); - i915_gem_driver_remove(i915); intel_power_domains_driver_remove(i915); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8882c0908c3b..674e9e921839 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1846,7 +1846,6 @@ void i915_driver_remove(struct drm_i915_private *i915); int i915_resume_switcheroo(struct drm_i915_private *i915); int i915_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state); -void intel_engine_init_hangcheck(struct intel_engine_cs *engine); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); static inline bool intel_gvt_active(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5cf4eed5add8..47239df653f2 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -534,10 +534,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, } err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); - err_printf(m, " hangcheck timestamp: %dms (%lu%s)\n", - jiffies_to_msecs(ee->hangcheck_timestamp - epoch), - ee->hangcheck_timestamp, - ee->hangcheck_timestamp == epoch ? "; epoch" : ""); err_printf(m, " engine reset count: %u\n", ee->reset_count); for (n = 0; n < ee->num_ports; n++) { @@ -679,11 +675,8 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, ts = ktime_to_timespec64(error->uptime); err_printf(m, "Uptime: %lld s %ld us\n", (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); - err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ); - err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n", - error->capture, - jiffies_to_msecs(jiffies - error->capture), - jiffies_to_msecs(error->capture - error->epoch)); + err_printf(m, "Capture: %lu jiffies; %d ms ago\n", + error->capture, jiffies_to_msecs(jiffies - error->capture)); for (ee = error->engine; ee; ee = ee->next) err_printf(m, "Active process (on ring %s): %s [%d]\n", @@ -742,7 +735,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, err_printf(m, "GTT_CACHE_EN: 0x%08x\n", error->gtt_cache); for (ee = error->engine; ee; ee = ee->next) - error_print_engine(m, ee, error->epoch); + error_print_engine(m, ee, error->capture); for (ee = error->engine; ee; ee = ee->next) { const struct drm_i915_error_object *obj; @@ -770,7 +763,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, for (j = 0; j < ee->num_requests; j++) error_print_request(m, " ", &ee->requests[j], - error->epoch); + error->capture); } print_error_obj(m, ee->engine, "ringbuffer", ee->ringbuffer); @@ -1144,8 +1137,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error, } ee->idle = intel_engine_is_idle(engine); - if (!ee->idle) - ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error, engine); @@ -1657,20 +1648,6 @@ static void capture_params(struct i915_gpu_state *error) i915_params_copy(&error->params, &i915_modparams); } -static unsigned long capture_find_epoch(const struct i915_gpu_state *error) -{ - const struct drm_i915_error_engine *ee; - unsigned long epoch = error->capture; - - for (ee = error->engine; ee; ee = ee->next) { - if (ee->hangcheck_timestamp && - time_before(ee->hangcheck_timestamp, epoch)) - epoch = ee->hangcheck_timestamp; - } - - return epoch; -} - static void capture_finish(struct i915_gpu_state *error) { struct i915_ggtt *ggtt = &error->i915->ggtt; @@ -1722,8 +1699,6 @@ i915_capture_gpu_state(struct drm_i915_private *i915) error->overlay = intel_overlay_capture_error_state(i915); error->display = intel_display_capture_error_state(i915); - error->epoch = capture_find_epoch(error); - capture_finish(error); compress_fini(&compress); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 7f1cd0b1fef7..4dc36d6ee3a2 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -34,7 +34,6 @@ struct i915_gpu_state { ktime_t boottime; ktime_t uptime; unsigned long capture; - unsigned long epoch; struct drm_i915_private *i915; @@ -86,7 +85,6 @@ struct i915_gpu_state { /* Software tracked state */ bool idle; - unsigned long hangcheck_timestamp; int num_requests; u32 reset_count; diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index ae8bb3cb627e..732aad148881 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -16,6 +16,12 @@ enum { I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, + + /* A preemptive pulse used to monitor the health of each engine */ + I915_PRIORITY_HEARTBEAT, + + /* Interactive workload, scheduled for immediate pageflipping */ + I915_PRIORITY_DISPLAY, }; #define I915_USER_PRIORITY_SHIFT 2 From 8eb8e322ec07392e8c8008437216c38c310ff6c7 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Wed, 23 Oct 2019 17:43:27 +0800 Subject: [PATCH 038/154] drm/i915/gvt: fix dead locking in early workload shadow As early workload scan and shadow happens in execlist mmio handler, which has already taken vgpu_lock. So remove extra lock taking here. Fixes: 952f89f098c7 ("drm/i915/gvt: Wean off struct_mutex") Cc: Chris Wilson Signed-off-by: Zhenyu Wang Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gvt/scheduler.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 36bb7639e82f..a5b942ee3ceb 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1584,9 +1584,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, */ if (list_empty(workload_q_head(vgpu, ring_id))) { intel_runtime_pm_get(&dev_priv->runtime_pm); - mutex_lock(&vgpu->vgpu_lock); ret = intel_gvt_scan_and_shadow_workload(workload); - mutex_unlock(&vgpu->vgpu_lock); intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm); } From 93100fdeb4de5b13a7f9113ede93cd062ba779f1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 24 Oct 2019 00:24:43 +0100 Subject: [PATCH 039/154] drm/i915/selftests: Flush interrupts before disabling tasklets When setting up the system to perform the atomic reset, we need to serialise with any ongoing interrupt tasklet or else: <0> [472.951428] i915_sel-4442 0d..1 466527056us : __i915_request_submit: rcs0 fence 11659:2, current 0 <0> [472.951554] i915_sel-4442 0d..1 466527059us : __execlists_submission_tasklet: rcs0: queue_priority_hint:-2147483648, submit:yes <0> [472.951681] i915_sel-4442 0d..1 466527061us : trace_ports: rcs0: submit { 11659:2, 0:0 } <0> [472.951805] i915_sel-4442 0.... 466527114us : __igt_atomic_reset_engine: i915_reset_engine(rcs0:active) under hardirq <0> [472.951932] i915_sel-4442 0d... 466527115us : intel_engine_reset: rcs0 flags=11d <0> [472.952056] i915_sel-4442 0d... 466527117us : execlists_reset_prepare: rcs0: depth<-1 <0> [472.952179] i915_sel-4442 0d... 466527119us : intel_engine_stop_cs: rcs0 <0> [472.952305] -0 1..s1 466527119us : process_csb: rcs0 cs-irq head=3, tail=4 <0> [472.952431] i915_sel-4442 0d... 466527122us : __intel_gt_reset: engine_mask=1 <0> [472.952557] -0 1..s1 466527124us : process_csb: rcs0 csb[4]: status=0x00000001:0x00000000 <0> [472.952683] -0 1..s1 466527130us : trace_ports: rcs0: promote { 11659:2*, 0:0 } <0> [472.952808] i915_sel-4442 0d... 466527131us : execlists_reset: rcs0 <0> [472.952933] i915_sel-4442 0d..1 466527133us : process_csb: rcs0 cs-irq head=3, tail=4 <0> [472.953059] i915_sel-4442 0d..1 466527134us : process_csb: rcs0 csb[4]: status=0x00000001:0x00000000 <0> [472.953185] i915_sel-4442 0d..1 466527136us : trace_ports: rcs0: preempted { 11659:2*, 0:0 } <0> [472.953310] i915_sel-4442 0d..1 466527150us : assert_pending_valid: Nothing pending for promotion! <0> [472.953436] i915_sel-4442 0d..1 466527158us : process_csb: process_csb:1930 GEM_BUG_ON(!assert_pending_valid(execlists, "promote")) We have the same CSB events being seen by process_csb() on two different processors. One being issued by the reset in the test, the other by the interrupt; this scenario is supposed to be prevented by flushing the interrupt tasklet with tasklet_disable() before we enter the atomic reset. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112069 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191023232443.17450-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index b892b47348ab..ba761fcf397b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1563,7 +1563,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", engine->name, mode, p->name); - tasklet_disable_nosync(t); + tasklet_disable(t); p->critical_section_begin(); err = intel_engine_reset(engine, NULL); diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 6efb9221b7fa..6ad6aca315f6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -126,7 +126,7 @@ static int igt_atomic_engine_reset(void *arg) goto out_unlock; for_each_engine(engine, gt, id) { - tasklet_disable_nosync(&engine->execlists.tasklet); + tasklet_disable(&engine->execlists.tasklet); intel_engine_pm_get(engine); for (p = igt_atomic_phases; p->name; p++) { From 7f47211e73e93812def71004f0f81d6c3e7a0da6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 24 Oct 2019 00:53:59 +0100 Subject: [PATCH 040/154] drm/i915/selftests: Flush any i915_active callback work as well Make trebly sure that all possible callbacks and their delayed brethren are complete before asserting that the i915_active should be idle after flushing all barriers. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191023235359.27132-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 9fb5d30c43bb..768f032e6578 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -55,6 +55,7 @@ static void pulse_unlock_wait(struct pulse *p) { mutex_lock(&p->active.mutex); mutex_unlock(&p->active.mutex); + flush_work(&p->active.work); } static int __live_idle_pulse(struct intel_engine_cs *engine, From 2c9a49150d90147a098ba0b39ced89d155c32820 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 24 Oct 2019 10:34:40 +0100 Subject: [PATCH 041/154] drm/i915: Convert PAT setup to uncore mmio One more thing which relied on implicit dev_priv can be covnerted to use the new mmio accessors. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191024093440.32280-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 92 ++++++++++++++++++----------- 1 file changed, 56 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3148d5946b63..3d3a8db18a07 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2922,35 +2922,51 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return 0; } -static void tgl_setup_private_ppat(struct drm_i915_private *dev_priv) +static void tgl_setup_private_ppat(struct intel_uncore *uncore) { /* TGL doesn't support LLC or AGE settings */ - I915_WRITE(GEN12_PAT_INDEX(0), GEN8_PPAT_WB); - I915_WRITE(GEN12_PAT_INDEX(1), GEN8_PPAT_WC); - I915_WRITE(GEN12_PAT_INDEX(2), GEN8_PPAT_WT); - I915_WRITE(GEN12_PAT_INDEX(3), GEN8_PPAT_UC); - I915_WRITE(GEN12_PAT_INDEX(4), GEN8_PPAT_WB); - I915_WRITE(GEN12_PAT_INDEX(5), GEN8_PPAT_WB); - I915_WRITE(GEN12_PAT_INDEX(6), GEN8_PPAT_WB); - I915_WRITE(GEN12_PAT_INDEX(7), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); + intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); + intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); + intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); + intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); } -static void cnl_setup_private_ppat(struct drm_i915_private *dev_priv) +static void cnl_setup_private_ppat(struct intel_uncore *uncore) { - I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_WB | GEN8_PPAT_LLC); - I915_WRITE(GEN10_PAT_INDEX(1), GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); - I915_WRITE(GEN10_PAT_INDEX(2), GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); - I915_WRITE(GEN10_PAT_INDEX(3), GEN8_PPAT_UC); - I915_WRITE(GEN10_PAT_INDEX(4), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); - I915_WRITE(GEN10_PAT_INDEX(5), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); - I915_WRITE(GEN10_PAT_INDEX(6), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); - I915_WRITE(GEN10_PAT_INDEX(7), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(0), + GEN8_PPAT_WB | GEN8_PPAT_LLC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(1), + GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(2), + GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(3), + GEN8_PPAT_UC); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(4), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(5), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(6), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); + intel_uncore_write(uncore, + GEN10_PAT_INDEX(7), + GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); } /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability * bits. When using advanced contexts each context stores its own PAT, but * writing this data shouldn't be harmful even in those cases. */ -static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) +static void bdw_setup_private_ppat(struct intel_uncore *uncore) { u64 pat; @@ -2963,11 +2979,11 @@ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); - I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); - I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); } -static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) +static void chv_setup_private_ppat(struct intel_uncore *uncore) { u64 pat; @@ -2999,8 +3015,8 @@ static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) GEN8_PPAT(6, CHV_PPAT_SNOOP) | GEN8_PPAT(7, CHV_PPAT_SNOOP); - I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); - I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); } static void gen6_gmch_remove(struct i915_address_space *vm) @@ -3011,18 +3027,20 @@ static void gen6_gmch_remove(struct i915_address_space *vm) cleanup_scratch_page(vm); } -static void setup_private_pat(struct drm_i915_private *dev_priv) +static void setup_private_pat(struct intel_uncore *uncore) { - GEM_BUG_ON(INTEL_GEN(dev_priv) < 8); + struct drm_i915_private *i915 = uncore->i915; - if (INTEL_GEN(dev_priv) >= 12) - tgl_setup_private_ppat(dev_priv); - else if (INTEL_GEN(dev_priv) >= 10) - cnl_setup_private_ppat(dev_priv); - else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) - chv_setup_private_ppat(dev_priv); + GEM_BUG_ON(INTEL_GEN(i915) < 8); + + if (INTEL_GEN(i915) >= 12) + tgl_setup_private_ppat(uncore); + else if (INTEL_GEN(i915) >= 10) + cnl_setup_private_ppat(uncore); + else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) + chv_setup_private_ppat(uncore); else - bdw_setup_private_ppat(dev_priv); + bdw_setup_private_ppat(uncore); } static int gen8_gmch_probe(struct i915_ggtt *ggtt) @@ -3078,7 +3096,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.pte_encode = gen8_pte_encode; - setup_private_pat(dev_priv); + setup_private_pat(ggtt->vm.gt->uncore); return ggtt_probe_common(ggtt, size); } @@ -3382,10 +3400,12 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915) { - ggtt_restore_mappings(&i915->ggtt); + struct i915_ggtt *ggtt = &i915->ggtt; + + ggtt_restore_mappings(ggtt); if (INTEL_GEN(i915) >= 8) - setup_private_pat(i915); + setup_private_pat(ggtt->vm.gt->uncore); } static struct scatterlist * From 2871ea85c119e6fb1127b30f0061436b285d3a2c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 24 Oct 2019 11:03:44 +0100 Subject: [PATCH 042/154] drm/i915/gt: Split intel_ring_submission Split the legacy submission backend from the common CS ring buffer handling. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191024100344.5041-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 5 +- drivers/gpu/drm/i915/display/intel_overlay.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_context.c | 3 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 1 + .../gpu/drm/i915/gem/i915_gem_object_blt.c | 1 + .../i915/gem/selftests/i915_gem_coherency.c | 1 + drivers/gpu/drm/i915/gt/intel_context.c | 1 + drivers/gpu/drm/i915/gt/intel_context.h | 1 + drivers/gpu/drm/i915/gt/intel_engine.h | 114 ------- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/gt/intel_engine_pm.c | 1 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 27 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + drivers/gpu/drm/i915/gt/intel_mocs.c | 1 + drivers/gpu/drm/i915/gt/intel_renderstate.c | 1 + drivers/gpu/drm/i915/gt/intel_ring.c | 321 ++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_ring.h | 131 +++++++ ...l_ringbuffer.c => intel_ring_submission.c} | 309 +---------------- drivers/gpu/drm/i915/gt/intel_ring_types.h | 51 +++ drivers/gpu/drm/i915/gt/intel_timeline.c | 6 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 + drivers/gpu/drm/i915/gt/mock_engine.c | 1 + drivers/gpu/drm/i915/gt/selftest_timeline.c | 1 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 3 +- drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 + drivers/gpu/drm/i915/gvt/mmio_context.c | 1 + drivers/gpu/drm/i915/gvt/scheduler.c | 1 + drivers/gpu/drm/i915/i915_active.c | 1 + drivers/gpu/drm/i915/i915_perf.c | 1 + drivers/gpu/drm/i915/i915_request.c | 1 + 30 files changed, 536 insertions(+), 455 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_ring.c create mode 100644 drivers/gpu/drm/i915/gt/intel_ring.h rename drivers/gpu/drm/i915/gt/{intel_ringbuffer.c => intel_ring_submission.c} (88%) create mode 100644 drivers/gpu/drm/i915/gt/intel_ring_types.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 21601bb27b22..9bf1c0b20370 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -89,11 +89,12 @@ gt-y += \ gt/intel_gt_requests.o \ gt/intel_llc.o \ gt/intel_lrc.o \ + gt/intel_mocs.o \ gt/intel_rc6.o \ gt/intel_renderstate.o \ gt/intel_reset.o \ - gt/intel_ringbuffer.o \ - gt/intel_mocs.o \ + gt/intel_ring.o \ + gt/intel_ring_submission.o \ gt/intel_sseu.o \ gt/intel_timeline.o \ gt/intel_workarounds.o diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 2360f19f9694..848ce07a8ec2 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -30,6 +30,7 @@ #include #include "gem/i915_gem_pm.h" +#include "gt/intel_ring.h" #include "i915_drv.h" #include "i915_reg.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index b2f042d87be0..55f1f93c0925 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -69,9 +69,10 @@ #include -#include "gt/intel_lrc_reg.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_user.h" +#include "gt/intel_lrc_reg.h" +#include "gt/intel_ring.h" #include "i915_gem_context.h" #include "i915_globals.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index e96901888323..e4f5c269150a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -19,6 +19,7 @@ #include "gt/intel_engine_pool.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_ring.h" #include "i915_drv.h" #include "i915_gem_clflush.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index 5bd8de124d74..516e61e99212 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -8,6 +8,7 @@ #include "gt/intel_engine_pm.h" #include "gt/intel_engine_pool.h" #include "gt/intel_gt.h" +#include "gt/intel_ring.h" #include "i915_gem_clflush.h" #include "i915_gem_object_blt.h" diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 549810f70aeb..0877ef4dff63 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -8,6 +8,7 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_ring.h" #include "i915_selftest.h" #include "selftests/i915_random.h" diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 59c3083c1ec1..ee9d2bcd2c13 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -13,6 +13,7 @@ #include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" +#include "intel_ring.h" static struct i915_global_context { struct i915_global base; diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index dd742ac2fbdb..68b3d317d959 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -12,6 +12,7 @@ #include "i915_active.h" #include "intel_context_types.h" #include "intel_engine_types.h" +#include "intel_ring_types.h" #include "intel_timeline_types.h" void intel_context_init(struct intel_context *ce, diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index ee47444a6ad4..97bbdd9773c9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -19,7 +19,6 @@ #include "intel_workarounds.h" struct drm_printer; - struct intel_gt; /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, @@ -176,122 +175,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_HWS_CSB_WRITE_INDEX 0x1f #define CNL_HWS_CSB_WRITE_INDEX 0x2f -struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size); -int intel_ring_pin(struct intel_ring *ring); -void intel_ring_reset(struct intel_ring *ring, u32 tail); -unsigned int intel_ring_update_space(struct intel_ring *ring); -void intel_ring_unpin(struct intel_ring *ring); -void intel_ring_free(struct kref *ref); - -static inline struct intel_ring *intel_ring_get(struct intel_ring *ring) -{ - kref_get(&ring->ref); - return ring; -} - -static inline void intel_ring_put(struct intel_ring *ring) -{ - kref_put(&ring->ref, intel_ring_free); -} - void intel_engine_stop(struct intel_engine_cs *engine); void intel_engine_cleanup(struct intel_engine_cs *engine); -int __must_check intel_ring_cacheline_align(struct i915_request *rq); - -u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); - -static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) -{ - /* Dummy function. - * - * This serves as a placeholder in the code so that the reader - * can compare against the preceding intel_ring_begin() and - * check that the number of dwords emitted matches the space - * reserved for the command packet (i.e. the value passed to - * intel_ring_begin()). - */ - GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); -} - -static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) -{ - return pos & (ring->size - 1); -} - -static inline bool -intel_ring_offset_valid(const struct intel_ring *ring, - unsigned int pos) -{ - if (pos & -ring->size) /* must be strictly within the ring */ - return false; - - if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ - return false; - - return true; -} - -static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) -{ - /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - u32 offset = addr - rq->ring->vaddr; - GEM_BUG_ON(offset > rq->ring->size); - return intel_ring_wrap(rq->ring, offset); -} - -static inline void -assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) -{ - GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); - - /* - * "Ring Buffer Use" - * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 - * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 - * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the - * same cacheline, the Head Pointer must not be greater than the Tail - * Pointer." - * - * We use ring->head as the last known location of the actual RING_HEAD, - * it may have advanced but in the worst case it is equally the same - * as ring->head and so we should never program RING_TAIL to advance - * into the same cacheline as ring->head. - */ -#define cacheline(a) round_down(a, CACHELINE_BYTES) - GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && - tail < ring->head); -#undef cacheline -} - -static inline unsigned int -intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) -{ - /* Whilst writes to the tail are strictly order, there is no - * serialisation between readers and the writers. The tail may be - * read by i915_request_retire() just as it is being updated - * by execlists, as although the breadcrumb is complete, the context - * switch hasn't been seen. - */ - assert_ring_tail_valid(ring, tail); - ring->tail = tail; - return tail; -} - -static inline unsigned int -__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) -{ - /* - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the - * same cacheline, the Head Pointer must not be greater than the Tail - * Pointer." - */ - GEM_BUG_ON(!is_power_of_2(size)); - return (head - tail - CACHELINE_BYTES) & (size - 1); -} - int intel_engines_init_mmio(struct intel_gt *gt); int intel_engines_setup(struct intel_gt *gt); int intel_engines_init(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 8b5129e0b49d..9cc1ea6519ec 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -37,6 +37,7 @@ #include "intel_context.h" #include "intel_lrc.h" #include "intel_reset.h" +#include "intel_ring.h" /* Haswell does have the CXT_SIZE register however it does not appear to be * valid. Now, docs explain in dwords what is in the context object. The full diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 6fbfa2162e54..3c0f490ff2c7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -13,6 +13,7 @@ #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_rc6.h" +#include "intel_ring.h" static int __engine_unpark(struct intel_wakeref *wf) { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 26998901feff..e8ea12b96755 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -59,6 +59,7 @@ struct i915_gem_context; struct i915_request; struct i915_sched_attr; struct intel_gt; +struct intel_ring; struct intel_uncore; typedef u8 intel_engine_mask_t; @@ -77,32 +78,6 @@ struct intel_instdone { u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; }; -struct intel_ring { - struct kref ref; - struct i915_vma *vma; - void *vaddr; - - /* - * As we have two types of rings, one global to the engine used - * by ringbuffer submission and those that are exclusive to a - * context used by execlists, we have to play safe and allow - * atomic updates to the pin_count. However, the actual pinning - * of the context is either done during initialisation for - * ringbuffer submission or serialised as part of the context - * pinning for execlists, and so we do not need a mutex ourselves - * to serialise intel_ring_pin/intel_ring_unpin. - */ - atomic_t pin_count; - - u32 head; - u32 tail; - u32 emit; - - u32 space; - u32 size; - u32 effective_size; -}; - /* * we use a single page to load ctx workarounds so all of these * values are referred in terms of dwords diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 651d5dd39464..73eae85a2cc9 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -145,6 +145,7 @@ #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_reset.h" +#include "intel_ring.h" #include "intel_workarounds.h" #define RING_EXECLIST_QFULL (1 << 0x2) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 5bac3966906b..932833e5b712 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -26,6 +26,7 @@ #include "intel_gt.h" #include "intel_mocs.h" #include "intel_lrc.h" +#include "intel_ring.h" /* structures required */ struct drm_i915_mocs_entry { diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index 6d05f9c64178..c4edc35e7d89 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -27,6 +27,7 @@ #include "i915_drv.h" #include "intel_renderstate.h" +#include "intel_ring.h" struct intel_renderstate { const struct intel_renderstate_rodata *rodata; diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c new file mode 100644 index 000000000000..fa01c1407760 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -0,0 +1,321 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "gem/i915_gem_object.h" +#include "i915_drv.h" +#include "i915_vma.h" +#include "intel_engine.h" +#include "intel_ring.h" +#include "intel_timeline.h" + +unsigned int intel_ring_update_space(struct intel_ring *ring) +{ + unsigned int space; + + space = __intel_ring_space(ring->head, ring->emit, ring->size); + + ring->space = space; + return space; +} + +int intel_ring_pin(struct intel_ring *ring) +{ + struct i915_vma *vma = ring->vma; + unsigned int flags; + void *addr; + int ret; + + if (atomic_fetch_inc(&ring->pin_count)) + return 0; + + flags = PIN_GLOBAL; + + /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ + flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); + + if (vma->obj->stolen) + flags |= PIN_MAPPABLE; + else + flags |= PIN_HIGH; + + ret = i915_vma_pin(vma, 0, 0, flags); + if (unlikely(ret)) + goto err_unpin; + + if (i915_vma_is_map_and_fenceable(vma)) + addr = (void __force *)i915_vma_pin_iomap(vma); + else + addr = i915_gem_object_pin_map(vma->obj, + i915_coherent_map_type(vma->vm->i915)); + if (IS_ERR(addr)) { + ret = PTR_ERR(addr); + goto err_ring; + } + + i915_vma_make_unshrinkable(vma); + + GEM_BUG_ON(ring->vaddr); + ring->vaddr = addr; + + return 0; + +err_ring: + i915_vma_unpin(vma); +err_unpin: + atomic_dec(&ring->pin_count); + return ret; +} + +void intel_ring_reset(struct intel_ring *ring, u32 tail) +{ + tail = intel_ring_wrap(ring, tail); + ring->tail = tail; + ring->head = tail; + ring->emit = tail; + intel_ring_update_space(ring); +} + +void intel_ring_unpin(struct intel_ring *ring) +{ + struct i915_vma *vma = ring->vma; + + if (!atomic_dec_and_test(&ring->pin_count)) + return; + + /* Discard any unused bytes beyond that submitted to hw. */ + intel_ring_reset(ring, ring->emit); + + i915_vma_unset_ggtt_write(vma); + if (i915_vma_is_map_and_fenceable(vma)) + i915_vma_unpin_iomap(vma); + else + i915_gem_object_unpin_map(vma->obj); + + GEM_BUG_ON(!ring->vaddr); + ring->vaddr = NULL; + + i915_vma_unpin(vma); + i915_vma_make_purgeable(vma); +} + +static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) +{ + struct i915_address_space *vm = &ggtt->vm; + struct drm_i915_private *i915 = vm->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create_stolen(i915, size); + if (IS_ERR(obj)) + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + /* + * Mark ring buffers as read-only from GPU side (so no stray overwrites) + * if supported by the platform's GGTT. + */ + if (vm->has_read_only) + i915_gem_object_set_readonly(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return vma; +} + +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size) +{ + struct drm_i915_private *i915 = engine->i915; + struct intel_ring *ring; + struct i915_vma *vma; + + GEM_BUG_ON(!is_power_of_2(size)); + GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); + + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + if (!ring) + return ERR_PTR(-ENOMEM); + + kref_init(&ring->ref); + ring->size = size; + + /* + * Workaround an erratum on the i830 which causes a hang if + * the TAIL pointer points to within the last 2 cachelines + * of the buffer. + */ + ring->effective_size = size; + if (IS_I830(i915) || IS_I845G(i915)) + ring->effective_size -= 2 * CACHELINE_BYTES; + + intel_ring_update_space(ring); + + vma = create_ring_vma(engine->gt->ggtt, size); + if (IS_ERR(vma)) { + kfree(ring); + return ERR_CAST(vma); + } + ring->vma = vma; + + return ring; +} + +void intel_ring_free(struct kref *ref) +{ + struct intel_ring *ring = container_of(ref, typeof(*ring), ref); + + i915_vma_put(ring->vma); + kfree(ring); +} + +static noinline int +wait_for_space(struct intel_ring *ring, + struct intel_timeline *tl, + unsigned int bytes) +{ + struct i915_request *target; + long timeout; + + if (intel_ring_update_space(ring) >= bytes) + return 0; + + GEM_BUG_ON(list_empty(&tl->requests)); + list_for_each_entry(target, &tl->requests, link) { + if (target->ring != ring) + continue; + + /* Would completion of this request free enough space? */ + if (bytes <= __intel_ring_space(target->postfix, + ring->emit, ring->size)) + break; + } + + if (GEM_WARN_ON(&target->link == &tl->requests)) + return -ENOSPC; + + timeout = i915_request_wait(target, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) + return timeout; + + i915_request_retire_upto(target); + + intel_ring_update_space(ring); + GEM_BUG_ON(ring->space < bytes); + return 0; +} + +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) +{ + struct intel_ring *ring = rq->ring; + const unsigned int remain_usable = ring->effective_size - ring->emit; + const unsigned int bytes = num_dwords * sizeof(u32); + unsigned int need_wrap = 0; + unsigned int total_bytes; + u32 *cs; + + /* Packets must be qword aligned. */ + GEM_BUG_ON(num_dwords & 1); + + total_bytes = bytes + rq->reserved_space; + GEM_BUG_ON(total_bytes > ring->effective_size); + + if (unlikely(total_bytes > remain_usable)) { + const int remain_actual = ring->size - ring->emit; + + if (bytes > remain_usable) { + /* + * Not enough space for the basic request. So need to + * flush out the remainder and then wait for + * base + reserved. + */ + total_bytes += remain_actual; + need_wrap = remain_actual | 1; + } else { + /* + * The base request will fit but the reserved space + * falls off the end. So we don't need an immediate + * wrap and only need to effectively wait for the + * reserved size from the start of ringbuffer. + */ + total_bytes = rq->reserved_space + remain_actual; + } + } + + if (unlikely(total_bytes > ring->space)) { + int ret; + + /* + * Space is reserved in the ringbuffer for finalising the + * request, as that cannot be allowed to fail. During request + * finalisation, reserved_space is set to 0 to stop the + * overallocation and the assumption is that then we never need + * to wait (which has the risk of failing with EINTR). + * + * See also i915_request_alloc() and i915_request_add(). + */ + GEM_BUG_ON(!rq->reserved_space); + + ret = wait_for_space(ring, + i915_request_timeline(rq), + total_bytes); + if (unlikely(ret)) + return ERR_PTR(ret); + } + + if (unlikely(need_wrap)) { + need_wrap &= ~1; + GEM_BUG_ON(need_wrap > ring->space); + GEM_BUG_ON(ring->emit + need_wrap > ring->size); + GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); + + /* Fill the tail with MI_NOOP */ + memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); + ring->space -= need_wrap; + ring->emit = 0; + } + + GEM_BUG_ON(ring->emit > ring->size - bytes); + GEM_BUG_ON(ring->space < bytes); + cs = ring->vaddr + ring->emit; + GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); + ring->emit += bytes; + ring->space -= bytes; + + return cs; +} + +/* Align the ring tail to a cacheline boundary */ +int intel_ring_cacheline_align(struct i915_request *rq) +{ + int num_dwords; + void *cs; + + num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); + if (num_dwords == 0) + return 0; + + num_dwords = CACHELINE_DWORDS - num_dwords; + GEM_BUG_ON(num_dwords & 1); + + cs = intel_ring_begin(rq, num_dwords); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); + intel_ring_advance(rq, cs + num_dwords); + + GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h new file mode 100644 index 000000000000..ea2839d9e044 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -0,0 +1,131 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RING_H +#define INTEL_RING_H + +#include "i915_gem.h" /* GEM_BUG_ON */ +#include "i915_request.h" +#include "intel_ring_types.h" + +struct intel_engine_cs; + +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size); + +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords); +int intel_ring_cacheline_align(struct i915_request *rq); + +unsigned int intel_ring_update_space(struct intel_ring *ring); + +int intel_ring_pin(struct intel_ring *ring); +void intel_ring_unpin(struct intel_ring *ring); +void intel_ring_reset(struct intel_ring *ring, u32 tail); + +void intel_ring_free(struct kref *ref); + +static inline struct intel_ring *intel_ring_get(struct intel_ring *ring) +{ + kref_get(&ring->ref); + return ring; +} + +static inline void intel_ring_put(struct intel_ring *ring) +{ + kref_put(&ring->ref, intel_ring_free); +} + +static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) +{ + /* Dummy function. + * + * This serves as a placeholder in the code so that the reader + * can compare against the preceding intel_ring_begin() and + * check that the number of dwords emitted matches the space + * reserved for the command packet (i.e. the value passed to + * intel_ring_begin()). + */ + GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); +} + +static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) +{ + return pos & (ring->size - 1); +} + +static inline bool +intel_ring_offset_valid(const struct intel_ring *ring, + unsigned int pos) +{ + if (pos & -ring->size) /* must be strictly within the ring */ + return false; + + if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ + return false; + + return true; +} + +static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) +{ + /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ + u32 offset = addr - rq->ring->vaddr; + GEM_BUG_ON(offset > rq->ring->size); + return intel_ring_wrap(rq->ring, offset); +} + +static inline void +assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) +{ + GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); + + /* + * "Ring Buffer Use" + * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 + * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 + * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + * + * We use ring->head as the last known location of the actual RING_HEAD, + * it may have advanced but in the worst case it is equally the same + * as ring->head and so we should never program RING_TAIL to advance + * into the same cacheline as ring->head. + */ +#define cacheline(a) round_down(a, CACHELINE_BYTES) + GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && + tail < ring->head); +#undef cacheline +} + +static inline unsigned int +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) +{ + /* Whilst writes to the tail are strictly order, there is no + * serialisation between readers and the writers. The tail may be + * read by i915_request_retire() just as it is being updated + * by execlists, as although the breadcrumb is complete, the context + * switch hasn't been seen. + */ + assert_ring_tail_valid(ring, tail); + ring->tail = tail; + return tail; +} + +static inline unsigned int +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) +{ + /* + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + */ + GEM_BUG_ON(!is_power_of_2(size)); + return (head - tail - CACHELINE_BYTES) & (size - 1); +} + +#endif /* INTEL_RING_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c similarity index 88% rename from drivers/gpu/drm/i915/gt/intel_ringbuffer.c rename to drivers/gpu/drm/i915/gt/intel_ring_submission.c index bf631f15aa78..a47d5a7c32c9 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -40,6 +40,7 @@ #include "intel_gt_irq.h" #include "intel_gt_pm_irq.h" #include "intel_reset.h" +#include "intel_ring.h" #include "intel_workarounds.h" /* Rough estimate of the typical request size, performing a flush, @@ -47,16 +48,6 @@ */ #define LEGACY_REQUEST_SIZE 200 -unsigned int intel_ring_update_space(struct intel_ring *ring) -{ - unsigned int space; - - space = __intel_ring_space(ring->head, ring->emit, ring->size); - - ring->space = space; - return space; -} - static int gen2_render_ring_flush(struct i915_request *rq, u32 mode) { @@ -1186,162 +1177,6 @@ i915_emit_bb_start(struct i915_request *rq, return 0; } -int intel_ring_pin(struct intel_ring *ring) -{ - struct i915_vma *vma = ring->vma; - unsigned int flags; - void *addr; - int ret; - - if (atomic_fetch_inc(&ring->pin_count)) - return 0; - - flags = PIN_GLOBAL; - - /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); - - if (vma->obj->stolen) - flags |= PIN_MAPPABLE; - else - flags |= PIN_HIGH; - - ret = i915_vma_pin(vma, 0, 0, flags); - if (unlikely(ret)) - goto err_unpin; - - if (i915_vma_is_map_and_fenceable(vma)) - addr = (void __force *)i915_vma_pin_iomap(vma); - else - addr = i915_gem_object_pin_map(vma->obj, - i915_coherent_map_type(vma->vm->i915)); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto err_ring; - } - - i915_vma_make_unshrinkable(vma); - - GEM_BUG_ON(ring->vaddr); - ring->vaddr = addr; - - return 0; - -err_ring: - i915_vma_unpin(vma); -err_unpin: - atomic_dec(&ring->pin_count); - return ret; -} - -void intel_ring_reset(struct intel_ring *ring, u32 tail) -{ - tail = intel_ring_wrap(ring, tail); - ring->tail = tail; - ring->head = tail; - ring->emit = tail; - intel_ring_update_space(ring); -} - -void intel_ring_unpin(struct intel_ring *ring) -{ - struct i915_vma *vma = ring->vma; - - if (!atomic_dec_and_test(&ring->pin_count)) - return; - - /* Discard any unused bytes beyond that submitted to hw. */ - intel_ring_reset(ring, ring->emit); - - i915_vma_unset_ggtt_write(vma); - if (i915_vma_is_map_and_fenceable(vma)) - i915_vma_unpin_iomap(vma); - else - i915_gem_object_unpin_map(vma->obj); - - GEM_BUG_ON(!ring->vaddr); - ring->vaddr = NULL; - - i915_vma_unpin(vma); - i915_vma_make_purgeable(vma); -} - -static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) -{ - struct i915_address_space *vm = &ggtt->vm; - struct drm_i915_private *i915 = vm->i915; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - - obj = i915_gem_object_create_stolen(i915, size); - if (IS_ERR(obj)) - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - /* - * Mark ring buffers as read-only from GPU side (so no stray overwrites) - * if supported by the platform's GGTT. - */ - if (vm->has_read_only) - i915_gem_object_set_readonly(obj); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return vma; -} - -struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size) -{ - struct drm_i915_private *i915 = engine->i915; - struct intel_ring *ring; - struct i915_vma *vma; - - GEM_BUG_ON(!is_power_of_2(size)); - GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); - - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) - return ERR_PTR(-ENOMEM); - - kref_init(&ring->ref); - - ring->size = size; - /* Workaround an erratum on the i830 which causes a hang if - * the TAIL pointer points to within the last 2 cachelines - * of the buffer. - */ - ring->effective_size = size; - if (IS_I830(i915) || IS_I845G(i915)) - ring->effective_size -= 2 * CACHELINE_BYTES; - - intel_ring_update_space(ring); - - vma = create_ring_vma(engine->gt->ggtt, size); - if (IS_ERR(vma)) { - kfree(ring); - return ERR_CAST(vma); - } - ring->vma = vma; - - return ring; -} - -void intel_ring_free(struct kref *ref) -{ - struct intel_ring *ring = container_of(ref, typeof(*ring), ref); - - i915_vma_put(ring->vma); - kfree(ring); -} - static void __ring_context_fini(struct intel_context *ce) { i915_vma_put(ce->state); @@ -1836,148 +1671,6 @@ static int ring_request_alloc(struct i915_request *request) return 0; } -static noinline int -wait_for_space(struct intel_ring *ring, - struct intel_timeline *tl, - unsigned int bytes) -{ - struct i915_request *target; - long timeout; - - if (intel_ring_update_space(ring) >= bytes) - return 0; - - GEM_BUG_ON(list_empty(&tl->requests)); - list_for_each_entry(target, &tl->requests, link) { - if (target->ring != ring) - continue; - - /* Would completion of this request free enough space? */ - if (bytes <= __intel_ring_space(target->postfix, - ring->emit, ring->size)) - break; - } - - if (GEM_WARN_ON(&target->link == &tl->requests)) - return -ENOSPC; - - timeout = i915_request_wait(target, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (timeout < 0) - return timeout; - - i915_request_retire_upto(target); - - intel_ring_update_space(ring); - GEM_BUG_ON(ring->space < bytes); - return 0; -} - -u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) -{ - struct intel_ring *ring = rq->ring; - const unsigned int remain_usable = ring->effective_size - ring->emit; - const unsigned int bytes = num_dwords * sizeof(u32); - unsigned int need_wrap = 0; - unsigned int total_bytes; - u32 *cs; - - /* Packets must be qword aligned. */ - GEM_BUG_ON(num_dwords & 1); - - total_bytes = bytes + rq->reserved_space; - GEM_BUG_ON(total_bytes > ring->effective_size); - - if (unlikely(total_bytes > remain_usable)) { - const int remain_actual = ring->size - ring->emit; - - if (bytes > remain_usable) { - /* - * Not enough space for the basic request. So need to - * flush out the remainder and then wait for - * base + reserved. - */ - total_bytes += remain_actual; - need_wrap = remain_actual | 1; - } else { - /* - * The base request will fit but the reserved space - * falls off the end. So we don't need an immediate - * wrap and only need to effectively wait for the - * reserved size from the start of ringbuffer. - */ - total_bytes = rq->reserved_space + remain_actual; - } - } - - if (unlikely(total_bytes > ring->space)) { - int ret; - - /* - * Space is reserved in the ringbuffer for finalising the - * request, as that cannot be allowed to fail. During request - * finalisation, reserved_space is set to 0 to stop the - * overallocation and the assumption is that then we never need - * to wait (which has the risk of failing with EINTR). - * - * See also i915_request_alloc() and i915_request_add(). - */ - GEM_BUG_ON(!rq->reserved_space); - - ret = wait_for_space(ring, - i915_request_timeline(rq), - total_bytes); - if (unlikely(ret)) - return ERR_PTR(ret); - } - - if (unlikely(need_wrap)) { - need_wrap &= ~1; - GEM_BUG_ON(need_wrap > ring->space); - GEM_BUG_ON(ring->emit + need_wrap > ring->size); - GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); - - /* Fill the tail with MI_NOOP */ - memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); - ring->space -= need_wrap; - ring->emit = 0; - } - - GEM_BUG_ON(ring->emit > ring->size - bytes); - GEM_BUG_ON(ring->space < bytes); - cs = ring->vaddr + ring->emit; - GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); - ring->emit += bytes; - ring->space -= bytes; - - return cs; -} - -/* Align the ring tail to a cacheline boundary */ -int intel_ring_cacheline_align(struct i915_request *rq) -{ - int num_dwords; - void *cs; - - num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); - if (num_dwords == 0) - return 0; - - num_dwords = CACHELINE_DWORDS - num_dwords; - GEM_BUG_ON(num_dwords & 1); - - cs = intel_ring_begin(rq, num_dwords); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); - intel_ring_advance(rq, cs); - - GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); - return 0; -} - static void gen6_bsd_submit_request(struct i915_request *request) { struct intel_uncore *uncore = request->engine->uncore; diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h new file mode 100644 index 000000000000..d9f17f38e0cc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RING_TYPES_H +#define INTEL_RING_TYPES_H + +#include +#include +#include + +/* + * Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, + * but keeps the logic simple. Indeed, the whole purpose of this macro is just + * to give some inclination as to some of the magic values used in the various + * workarounds! + */ +#define CACHELINE_BYTES 64 +#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) + +struct i915_vma; + +struct intel_ring { + struct kref ref; + struct i915_vma *vma; + void *vaddr; + + /* + * As we have two types of rings, one global to the engine used + * by ringbuffer submission and those that are exclusive to a + * context used by execlists, we have to play safe and allow + * atomic updates to the pin_count. However, the actual pinning + * of the context is either done during initialisation for + * ringbuffer submission or serialised as part of the context + * pinning for execlists, and so we do not need a mutex ourselves + * to serialise intel_ring_pin/intel_ring_unpin. + */ + atomic_t pin_count; + + u32 head; + u32 tail; + u32 emit; + + u32 space; + u32 size; + u32 effective_size; +}; + +#endif /* INTEL_RING_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 0f959694303c..14ad10acd548 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -4,13 +4,13 @@ * Copyright © 2016-2018 Intel Corporation */ -#include "gt/intel_gt_types.h" - #include "i915_drv.h" #include "i915_active.h" #include "i915_syncmap.h" -#include "gt/intel_timeline.h" +#include "intel_gt.h" +#include "intel_ring.h" +#include "intel_timeline.h" #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index af8a8183154a..7cb6dab4399d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -7,6 +7,7 @@ #include "i915_drv.h" #include "intel_context.h" #include "intel_gt.h" +#include "intel_ring.h" #include "intel_workarounds.h" /** diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 123db2c3f956..83f549d203a0 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -23,6 +23,7 @@ */ #include "gem/i915_gem_context.h" +#include "gt/intel_ring.h" #include "i915_drv.h" #include "intel_context.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index dac86f699a4c..f04a59fe5d2c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -9,6 +9,7 @@ #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_requests.h" +#include "intel_ring.h" #include "../selftests/i915_random.h" #include "../i915_selftest.h" diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 009e54a3764f..1b1691aaed28 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -6,12 +6,13 @@ #include #include "gem/i915_gem_context.h" - #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_lrc_reg.h" +#include "gt/intel_ring.h" + #include "intel_guc_submission.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index e753b1e706e2..6a3ac8cde95d 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -35,7 +35,9 @@ */ #include + #include "i915_drv.h" +#include "gt/intel_ring.h" #include "gvt.h" #include "i915_pvinfo.h" #include "trace.h" diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 4208e40445b1..aaf15916d29a 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -35,6 +35,7 @@ #include "i915_drv.h" #include "gt/intel_context.h" +#include "gt/intel_ring.h" #include "gvt.h" #include "trace.h" diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index a5b942ee3ceb..377811f8853f 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -38,6 +38,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" +#include "gt/intel_ring.h" #include "i915_drv.h" #include "gvt.h" diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 07d39f22a2c3..207383dda84d 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -7,6 +7,7 @@ #include #include "gt/intel_engine_pm.h" +#include "gt/intel_ring.h" #include "i915_drv.h" #include "i915_active.h" diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 3130b0c7ed83..38d3de2dfaa6 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -200,6 +200,7 @@ #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_lrc_reg.h" +#include "gt/intel_ring.h" #include "i915_drv.h" #include "i915_perf.h" diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 4575f368455d..932c5cf190b5 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -31,6 +31,7 @@ #include "gem/i915_gem_context.h" #include "gt/intel_context.h" +#include "gt/intel_ring.h" #include "i915_active.h" #include "i915_drv.h" From 71b1c99081aec38b947d678b78a1ebe75c7260db Mon Sep 17 00:00:00 2001 From: Anna Karas Date: Tue, 22 Oct 2019 13:13:38 +0300 Subject: [PATCH 043/154] drm/i915/perf: Describe structure members in documentation Add missing descriptions of i915_perf_stream structure members to documentation. Cc: Umesh Nerlige Ramappa Cc: Lionel Landwerlin Cc: Robert Bragg Signed-off-by: Anna Karas Reviewed-by: Lionel Landwerlin Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191022101338.17048-1-anna.karas@intel.com --- drivers/gpu/drm/i915/i915_perf_types.h | 31 +++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index a1f733fc905a..74ddc20a0d37 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -199,14 +199,43 @@ struct i915_perf_stream { * @pinned_ctx: The OA context specific information. */ struct intel_context *pinned_ctx; + + /** + * @specific_ctx_id: The id of the specific context. + */ u32 specific_ctx_id; + + /** + * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits. + */ u32 specific_ctx_id_mask; + /** + * @poll_check_timer: High resolution timer that will periodically + * check for data in the circular OA buffer for notifying userspace + * (e.g. during a read() or poll()). + */ struct hrtimer poll_check_timer; + + /** + * @poll_wq: The wait queue that hrtimer callback wakes when it + * sees data ready to read in the circular OA buffer. + */ wait_queue_head_t poll_wq; + + /** + * @pollin: Whether there is data available to read. + */ bool pollin; + /** + * @periodic: Whether periodic sampling is currently enabled. + */ bool periodic; + + /** + * @period_exponent: The OA unit sampling frequency is derived from this. + */ int period_exponent; /** @@ -276,7 +305,7 @@ struct i915_perf_stream { } oa_buffer; /** - * A batch buffer doing a wait on the GPU for the NOA logic to be + * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be * reprogrammed. */ struct i915_vma *noa_wait; From 6c066f4c99e1c7a481d8cefd0723e8feadbc1fa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 15 Oct 2019 22:30:23 +0300 Subject: [PATCH 044/154] drm/i915: Add debugs to distingiush a cd2x update from a full cdclk pll update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make the logs a bit less confusing let's toss in some debug prints to indicate whether the cdclk reprogramming is going to happen with a single pipe active or whether we need to turn all pipes off for the duration. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191015193035.25982-2-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/display/intel_cdclk.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 3d867963a6d1..6eaebc38ee01 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2343,6 +2343,9 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) return ret; state->cdclk.pipe = pipe; + + DRM_DEBUG_KMS("Can change cdclk with pipe %c active\n", + pipe_name(pipe)); } else if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual, &state->cdclk.actual)) { ret = intel_modeset_all_pipes(state); @@ -2350,6 +2353,8 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) return ret; state->cdclk.pipe = INVALID_PIPE; + + DRM_DEBUG_KMS("Modeset required for cdclk change\n"); } DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", From 1d5a95b5c943161bcefd487206ca848b81cac4df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 15 Oct 2019 22:30:24 +0300 Subject: [PATCH 045/154] drm/i915: Rework global state locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far we've sort of protected the global state under dev_priv with the connection_mutex. I wan to change that so that we can change the cdclk even for pure plane updates. To that end let's formalize the protection of the global state to follow what I started with the cdclk code already (though not entirely properly) such that any crtc mutex will suffice as a read lock, and all crtcs mutexes act as the write lock. We'll also pimp intel_atomic_state_clear() to clear the entire global state, so that we don't accidentally leak stale information between the locking retries. As a slight optimization we'll only lock the crtc mutexes to protect the global state, however if and when we actually have to poke the hw (eg. if the actual cdclk changes) we must serialize commits across all crtcs so that a parallel nonblocking commit can't get ahead of the cdclk reprogamming. We do that by adding all crtcs to the state. TODO: the old global state examined during commit may still be a problem since it always looks at the _latest_ swapped state in dev_priv. Need to add proper old/new state for that too I think. v2: Remeber to serialize the commits if necessary Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191015193035.25982-3-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/display/intel_atomic.c | 44 ++++++++ drivers/gpu/drm/i915/display/intel_atomic.h | 5 + drivers/gpu/drm/i915/display/intel_audio.c | 10 +- drivers/gpu/drm/i915/display/intel_cdclk.c | 102 ++++++++++-------- drivers/gpu/drm/i915/display/intel_display.c | 29 ++++- .../drm/i915/display/intel_display_types.h | 8 ++ drivers/gpu/drm/i915/i915_drv.h | 11 +- 7 files changed, 153 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index c5a552a69752..9cd6d2348a1e 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -429,6 +429,13 @@ void intel_atomic_state_clear(struct drm_atomic_state *s) struct intel_atomic_state *state = to_intel_atomic_state(s); drm_atomic_state_default_clear(&state->base); state->dpll_set = state->modeset = false; + state->global_state_changed = false; + state->active_pipes = 0; + memset(&state->min_cdclk, 0, sizeof(state->min_cdclk)); + memset(&state->min_voltage_level, 0, sizeof(state->min_voltage_level)); + memset(&state->cdclk.logical, 0, sizeof(state->cdclk.logical)); + memset(&state->cdclk.actual, 0, sizeof(state->cdclk.actual)); + state->cdclk.pipe = INVALID_PIPE; } struct intel_crtc_state * @@ -442,3 +449,40 @@ intel_atomic_get_crtc_state(struct drm_atomic_state *state, return to_intel_crtc_state(crtc_state); } + +int intel_atomic_lock_global_state(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc *crtc; + + state->global_state_changed = true; + + for_each_intel_crtc(&dev_priv->drm, crtc) { + int ret; + + ret = drm_modeset_lock(&crtc->base.mutex, + state->base.acquire_ctx); + if (ret) + return ret; + } + + return 0; +} + +int intel_atomic_serialize_global_state(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc *crtc; + + state->global_state_changed = true; + + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state; + + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/display/intel_atomic.h b/drivers/gpu/drm/i915/display/intel_atomic.h index 58065d3161a3..49d5cb1b9e0a 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.h +++ b/drivers/gpu/drm/i915/display/intel_atomic.h @@ -16,6 +16,7 @@ struct drm_crtc_state; struct drm_device; struct drm_i915_private; struct drm_property; +struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; @@ -46,4 +47,8 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state); +int intel_atomic_lock_global_state(struct intel_atomic_state *state); + +int intel_atomic_serialize_global_state(struct intel_atomic_state *state); + #endif /* __INTEL_ATOMIC_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index ed18511befa3..85e6b2bbb34f 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -28,6 +28,7 @@ #include #include "i915_drv.h" +#include "intel_atomic.h" #include "intel_audio.h" #include "intel_display_types.h" #include "intel_lpe_audio.h" @@ -818,13 +819,8 @@ retry: to_intel_atomic_state(state)->cdclk.force_min_cdclk = enable ? 2 * 96000 : 0; - /* - * Protects dev_priv->cdclk.force_min_cdclk - * Need to lock this here in case we have no active pipes - * and thus wouldn't lock it during the commit otherwise. - */ - ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, - &ctx); + /* Protects dev_priv->cdclk.force_min_cdclk */ + ret = intel_atomic_lock_global_state(to_intel_atomic_state(state)); if (!ret) ret = drm_atomic_commit(state); diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 6eaebc38ee01..6c17a3bbf866 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2007,11 +2007,20 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) sizeof(state->min_cdclk)); for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + int ret; + min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); if (min_cdclk < 0) return min_cdclk; + if (state->min_cdclk[i] == min_cdclk) + continue; + state->min_cdclk[i] = min_cdclk; + + ret = intel_atomic_lock_global_state(state); + if (ret) + return ret; } min_cdclk = state->cdclk.force_min_cdclk; @@ -2034,7 +2043,7 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) * future platforms this code will need to be * adjusted. */ -static u8 bxt_compute_min_voltage_level(struct intel_atomic_state *state) +static int bxt_compute_min_voltage_level(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc *crtc; @@ -2047,11 +2056,21 @@ static u8 bxt_compute_min_voltage_level(struct intel_atomic_state *state) sizeof(state->min_voltage_level)); for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + int ret; + if (crtc_state->base.enable) - state->min_voltage_level[i] = - crtc_state->min_voltage_level; + min_voltage_level = crtc_state->min_voltage_level; else - state->min_voltage_level[i] = 0; + min_voltage_level = 0; + + if (state->min_voltage_level[i] == min_voltage_level) + continue; + + state->min_voltage_level[i] = min_voltage_level; + + ret = intel_atomic_lock_global_state(state); + if (ret) + return ret; } min_voltage_level = 0; @@ -2195,20 +2214,24 @@ static int skl_modeset_calc_cdclk(struct intel_atomic_state *state) static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - int min_cdclk, cdclk, vco; + int min_cdclk, min_voltage_level, cdclk, vco; min_cdclk = intel_compute_min_cdclk(state); if (min_cdclk < 0) return min_cdclk; + min_voltage_level = bxt_compute_min_voltage_level(state); + if (min_voltage_level < 0) + return min_voltage_level; + cdclk = bxt_calc_cdclk(dev_priv, min_cdclk); vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); state->cdclk.logical.vco = vco; state->cdclk.logical.cdclk = cdclk; state->cdclk.logical.voltage_level = - max(dev_priv->display.calc_voltage_level(cdclk), - bxt_compute_min_voltage_level(state)); + max_t(int, min_voltage_level, + dev_priv->display.calc_voltage_level(cdclk)); if (!state->active_pipes) { cdclk = bxt_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); @@ -2225,23 +2248,6 @@ static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) return 0; } -static int intel_lock_all_pipes(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_crtc *crtc; - - /* Add all pipes to the state */ - for_each_intel_crtc(&dev_priv->drm, crtc) { - struct intel_crtc_state *crtc_state; - - crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); - } - - return 0; -} - static int intel_modeset_all_pipes(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); @@ -2308,46 +2314,56 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) return ret; /* - * Writes to dev_priv->cdclk.logical must protected by - * holding all the crtc locks, even if we don't end up + * Writes to dev_priv->cdclk.{actual,logical} must protected + * by holding all the crtc mutexes even if we don't end up * touching the hardware */ - if (intel_cdclk_changed(&dev_priv->cdclk.logical, - &state->cdclk.logical)) { - ret = intel_lock_all_pipes(state); - if (ret < 0) + if (intel_cdclk_changed(&dev_priv->cdclk.actual, + &state->cdclk.actual)) { + /* + * Also serialize commits across all crtcs + * if the actual hw needs to be poked. + */ + ret = intel_atomic_serialize_global_state(state); + if (ret) return ret; + } else if (intel_cdclk_changed(&dev_priv->cdclk.logical, + &state->cdclk.logical)) { + ret = intel_atomic_lock_global_state(state); + if (ret) + return ret; + } else { + return 0; } - if (is_power_of_2(state->active_pipes)) { + if (is_power_of_2(state->active_pipes) && + intel_cdclk_needs_cd2x_update(dev_priv, + &dev_priv->cdclk.actual, + &state->cdclk.actual)) { struct intel_crtc *crtc; struct intel_crtc_state *crtc_state; pipe = ilog2(state->active_pipes); crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - if (crtc_state && - drm_atomic_crtc_needs_modeset(&crtc_state->base)) + + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + + if (drm_atomic_crtc_needs_modeset(&crtc_state->base)) pipe = INVALID_PIPE; } else { pipe = INVALID_PIPE; } - /* All pipes must be switched off while we change the cdclk. */ - if (pipe != INVALID_PIPE && - intel_cdclk_needs_cd2x_update(dev_priv, - &dev_priv->cdclk.actual, - &state->cdclk.actual)) { - ret = intel_lock_all_pipes(state); - if (ret) - return ret; - + if (pipe != INVALID_PIPE) { state->cdclk.pipe = pipe; DRM_DEBUG_KMS("Can change cdclk with pipe %c active\n", pipe_name(pipe)); } else if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual, &state->cdclk.actual)) { + /* All pipes must be switched off while we change the cdclk. */ ret = intel_modeset_all_pipes(state); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 579655675b08..ed3f501ed202 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12425,6 +12425,12 @@ static bool check_digital_port_conflicts(struct intel_atomic_state *state) unsigned int used_mst_ports = 0; bool ret = true; + /* + * We're going to peek into connector->state, + * hence connection_mutex must be held. + */ + drm_modeset_lock_assert_held(&dev->mode_config.connection_mutex); + /* * Walk the connector list instead of the encoder * list to detect the problem on ddi platforms @@ -13725,7 +13731,6 @@ static int intel_modeset_checks(struct intel_atomic_state *state) state->active_pipes = dev_priv->active_pipes; state->cdclk.logical = dev_priv->cdclk.logical; state->cdclk.actual = dev_priv->cdclk.actual; - state->cdclk.pipe = INVALID_PIPE; for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { @@ -13738,6 +13743,12 @@ static int intel_modeset_checks(struct intel_atomic_state *state) state->active_pipe_changes |= BIT(crtc->pipe); } + if (state->active_pipe_changes) { + ret = intel_atomic_lock_global_state(state); + if (ret) + return ret; + } + ret = intel_modeset_calc_cdclk(state); if (ret) return ret; @@ -13839,7 +13850,7 @@ static int intel_atomic_check(struct drm_device *dev, struct intel_crtc_state *old_crtc_state, *new_crtc_state; struct intel_crtc *crtc; int ret, i; - bool any_ms = state->cdclk.force_min_cdclk_changed; + bool any_ms = false; /* Catch I915_MODE_FLAG_INHERITED */ for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, @@ -13877,6 +13888,8 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; + any_ms |= state->cdclk.force_min_cdclk_changed; + if (any_ms) { ret = intel_modeset_checks(state); if (ret) @@ -14670,6 +14683,14 @@ static void intel_atomic_track_fbs(struct intel_atomic_state *state) plane->frontbuffer_bit); } +static void assert_global_state_locked(struct drm_i915_private *dev_priv) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&dev_priv->drm, crtc) + drm_modeset_lock_assert_held(&crtc->base.mutex); +} + static int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state, bool nonblock) @@ -14735,7 +14756,9 @@ static int intel_atomic_commit(struct drm_device *dev, intel_shared_dpll_swap_state(state); intel_atomic_track_fbs(state); - if (state->modeset) { + if (state->global_state_changed) { + assert_global_state_locked(dev_priv); + memcpy(dev_priv->min_cdclk, state->min_cdclk, sizeof(state->min_cdclk)); memcpy(dev_priv->min_voltage_level, state->min_voltage_level, diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index bac40482a2aa..d18208e590b0 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -506,6 +506,14 @@ struct intel_atomic_state { bool rps_interactive; + /* + * active_pipes + * min_cdclk[] + * min_voltage_level[] + * cdclk.* + */ + bool global_state_changed; + /* Gen9+ only */ struct skl_ddb_values wm_results; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 674e9e921839..16e58a74fa6f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1097,13 +1097,14 @@ struct drm_i915_private { unsigned int fdi_pll_freq; unsigned int czclk_freq; + /* + * For reading holding any crtc lock is sufficient, + * for writing must hold all of them. + */ struct { /* * The current logical cdclk state. * See intel_atomic_state.cdclk.logical - * - * For reading holding any crtc lock is sufficient, - * for writing must hold all of them. */ struct intel_cdclk_state logical; /* @@ -1173,6 +1174,10 @@ struct drm_i915_private { */ struct mutex dpll_lock; + /* + * For reading active_pipes, min_cdclk, min_voltage_level holding + * any crtc lock is sufficient, for writing must hold all of them. + */ u8 active_pipes; /* minimum acceptable cdclk for each pipe */ int min_cdclk[I915_MAX_PIPES]; From bf5da83e4bd800e7ccd44cf4937a365a859bbf23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 15 Oct 2019 22:30:25 +0300 Subject: [PATCH 046/154] drm/i915: Move check_digital_port_conflicts() earier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit check_digital_port_conflicts() is done needlessly late. Move it earlier. This will be needed as later on we want to set any_ms=true a bit later for non-modesets too and we can't call this guy without the connection_mutex held. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191015193035.25982-4-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/display/intel_display.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index ed3f501ed202..dea811995584 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -13718,11 +13718,6 @@ static int intel_modeset_checks(struct intel_atomic_state *state) struct intel_crtc *crtc; int ret, i; - if (!check_digital_port_conflicts(state)) { - DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n"); - return -EINVAL; - } - /* keep the current setting */ if (!state->cdclk.force_min_cdclk_changed) state->cdclk.force_min_cdclk = dev_priv->cdclk.force_min_cdclk; @@ -13884,6 +13879,12 @@ static int intel_atomic_check(struct drm_device *dev, any_ms = true; } + if (any_ms && !check_digital_port_conflicts(state)) { + DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n"); + ret = EINVAL; + goto fail; + } + ret = drm_dp_mst_atomic_check(&state->base); if (ret) goto fail; From bb6ae9e653dc1019312466cde7be3db69681d3b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 15 Oct 2019 22:30:26 +0300 Subject: [PATCH 047/154] drm/i915: Allow planes to declare their minimum acceptable cdclk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Various pixel formats and plane scaling impose additional constraints on the cdclk frequency. Provide a new plane->min_cdclk() hook that will be used to compute the minimum acceptable cdclk frequency for each plane. Annoyingly on some platforms the numer of active planes affects this calculation so we must also toss in more planes into the state when the number of active planes changes. The sequence of state computation must also be changed: 1. check_plane() (updates plane's visibility etc.) 2. figure out if more planes now require update min_cdclk computaion 3. calculate the new min cdclk for each plane in the state 4. if the minimum of any plane now exceeds the current logical cdclk we recompute the cdclk 4. during cdclk computation take the planes' min_cdclk into accoutn 5. follow the normal cdclk programming to change the cdclk frequency. This may now require a modeset (except on bxt/glk in some cases), which either succeeds or fails depending on whether userspace has given us permission to perform a modeset or not. v2: Fix plane id check in intel_crtc_add_planes_to_state() Only print the debug message when cdclk needs bumping Use dev_priv->cdclk... as the old state explicitly Reviewed-by: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191015193035.25982-5-ville.syrjala@linux.intel.com --- .../gpu/drm/i915/display/intel_atomic_plane.c | 39 ++ .../gpu/drm/i915/display/intel_atomic_plane.h | 2 + drivers/gpu/drm/i915/display/intel_cdclk.c | 16 + drivers/gpu/drm/i915/display/intel_display.c | 182 +++++++++- .../drm/i915/display/intel_display_types.h | 4 + drivers/gpu/drm/i915/display/intel_sprite.c | 341 ++++++++++++++++++ drivers/gpu/drm/i915/display/intel_sprite.h | 7 + 7 files changed, 571 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index a6cff5a160fb..98f557a9f8ee 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -138,6 +138,44 @@ unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state, return cpp * crtc_state->pixel_rate; } +bool intel_plane_calc_min_cdclk(struct intel_atomic_state *state, + struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct intel_plane_state *plane_state = + intel_atomic_get_new_plane_state(state, plane); + struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc); + struct intel_crtc_state *crtc_state; + + if (!plane_state->base.visible || !plane->min_cdclk) + return false; + + crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + + crtc_state->min_cdclk[plane->id] = + plane->min_cdclk(crtc_state, plane_state); + + /* + * Does the cdclk need to be bumbed up? + * + * Note: we obviously need to be called before the new + * cdclk frequency is calculated so state->cdclk.logical + * hasn't been populated yet. Hence we look at the old + * cdclk state under dev_priv->cdclk.logical. This is + * safe as long we hold at least one crtc mutex (which + * must be true since we have crtc_state). + */ + if (crtc_state->min_cdclk[plane->id] > dev_priv->cdclk.logical.cdclk) { + DRM_DEBUG_KMS("[PLANE:%d:%s] min_cdclk (%d kHz) > logical cdclk (%d kHz)\n", + plane->base.base.id, plane->base.name, + crtc_state->min_cdclk[plane->id], + dev_priv->cdclk.logical.cdclk); + return true; + } + + return false; +} + int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state, struct intel_crtc_state *new_crtc_state, const struct intel_plane_state *old_plane_state, @@ -151,6 +189,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ new_crtc_state->nv12_planes &= ~BIT(plane->id); new_crtc_state->c8_planes &= ~BIT(plane->id); new_crtc_state->data_rate[plane->id] = 0; + new_crtc_state->min_cdclk[plane->id] = 0; new_plane_state->base.visible = false; if (!new_plane_state->base.crtc && !old_plane_state->base.crtc) diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_atomic_plane.h index dc85af02e9b7..e61e9a82aadf 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.h +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.h @@ -47,5 +47,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat struct intel_crtc_state *crtc_state, const struct intel_plane_state *old_plane_state, struct intel_plane_state *plane_state); +bool intel_plane_calc_min_cdclk(struct intel_atomic_state *state, + struct intel_plane *plane); #endif /* __INTEL_ATOMIC_PLANE_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 6c17a3bbf866..0caef2592a7e 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1918,6 +1918,19 @@ static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state) return DIV_ROUND_UP(pixel_rate * 100, 90); } +static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_plane *plane; + int min_cdclk = 0; + + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) + min_cdclk = max(crtc_state->min_cdclk[plane->id], min_cdclk); + + return min_cdclk; +} + int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = @@ -1986,6 +1999,9 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) IS_GEMINILAKE(dev_priv)) min_cdclk = max(158400, min_cdclk); + /* Account for additional needs from the planes */ + min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk); + if (min_cdclk > dev_priv->max_cdclk_freq) { DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n", min_cdclk, dev_priv->max_cdclk_freq); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index dea811995584..1fac98961183 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3154,6 +3154,7 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc, intel_set_plane_visible(crtc_state, plane_state, false); fixup_active_planes(crtc_state); crtc_state->data_rate[plane->id] = 0; + crtc_state->min_cdclk[plane->id] = 0; if (plane->id == PLANE_PRIMARY) intel_pre_disable_primary_noatomic(&crtc->base); @@ -3577,6 +3578,53 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) return 0; } +static void i9xx_plane_ratio(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + unsigned int *num, unsigned int *den) +{ + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int cpp = fb->format->cpp[0]; + + /* + * g4x bspec says 64bpp pixel rate can't exceed 80% + * of cdclk when the sprite plane is enabled on the + * same pipe. ilk/snb bspec says 64bpp pixel rate is + * never allowed to exceed 80% of cdclk. Let's just go + * with the ilk/snb limit always. + */ + if (cpp == 8) { + *num = 10; + *den = 8; + } else { + *num = 1; + *den = 1; + } +} + +static int i9xx_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + unsigned int pixel_rate; + unsigned int num, den; + + /* + * Note that crtc_state->pixel_rate accounts for both + * horizontal and vertical panel fitter downscaling factors. + * Pre-HSW bspec tells us to only consider the horizontal + * downscaling factor here. We ignore that and just consider + * both for simplicity. + */ + pixel_rate = crtc_state->pixel_rate; + + i9xx_plane_ratio(crtc_state, plane_state, &num, &den); + + /* two pixels per clock with double wide pipe */ + if (crtc_state->double_wide) + den *= 2; + + return DIV_ROUND_UP(pixel_rate * num, den); +} + unsigned int i9xx_plane_max_stride(struct intel_plane *plane, u32 pixel_format, u64 modifier, @@ -11706,6 +11754,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat plane_state->base.visible = visible = false; crtc_state->active_planes &= ~BIT(plane->id); crtc_state->data_rate[plane->id] = 0; + crtc_state->min_cdclk[plane->id] = 0; } if (!was_visible && !visible) @@ -12072,9 +12121,6 @@ static int intel_crtc_atomic_check(struct intel_atomic_state *state, if (INTEL_GEN(dev_priv) >= 9) { if (mode_changed || crtc_state->update_pipe) ret = skl_update_scaler_crtc(crtc_state); - - if (!ret) - ret = icl_check_nv12_planes(crtc_state); if (!ret) ret = skl_check_pipe_max_pixel_rate(crtc, crtc_state); if (!ret) @@ -13796,12 +13842,49 @@ static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_sta new_crtc_state->has_drrs = old_crtc_state->has_drrs; } -static int intel_atomic_check_planes(struct intel_atomic_state *state) +static int intel_crtc_add_planes_to_state(struct intel_atomic_state *state, + struct intel_crtc *crtc, + u8 plane_ids_mask) { + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_plane *plane; + + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { + struct intel_plane_state *plane_state; + + if ((plane_ids_mask & BIT(plane->id)) == 0) + continue; + + plane_state = intel_atomic_get_plane_state(state, plane); + if (IS_ERR(plane_state)) + return PTR_ERR(plane_state); + } + + return 0; +} + +static bool active_planes_affects_min_cdclk(struct drm_i915_private *dev_priv) +{ + /* See {hsw,vlv,ivb}_plane_ratio() */ + return IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv) || + IS_CHERRYVIEW(dev_priv) || IS_VALLEYVIEW(dev_priv) || + IS_IVYBRIDGE(dev_priv); +} + +static int intel_atomic_check_planes(struct intel_atomic_state *state, + bool *need_modeset) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc_state *old_crtc_state, *new_crtc_state; struct intel_plane_state *plane_state; struct intel_plane *plane; + struct intel_crtc *crtc; int i, ret; + ret = icl_add_linked_planes(state); + if (ret) + return ret; + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { ret = intel_plane_atomic_check(state, plane); if (ret) { @@ -13811,6 +13894,41 @@ static int intel_atomic_check_planes(struct intel_atomic_state *state) } } + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + u8 old_active_planes, new_active_planes; + + ret = icl_check_nv12_planes(new_crtc_state); + if (ret) + return ret; + + /* + * On some platforms the number of active planes affects + * the planes' minimum cdclk calculation. Add such planes + * to the state before we compute the minimum cdclk. + */ + if (!active_planes_affects_min_cdclk(dev_priv)) + continue; + + old_active_planes = old_crtc_state->active_planes & ~BIT(PLANE_CURSOR); + new_active_planes = new_crtc_state->active_planes & ~BIT(PLANE_CURSOR); + + if (hweight8(old_active_planes) == hweight8(new_active_planes)) + continue; + + ret = intel_crtc_add_planes_to_state(state, crtc, new_active_planes); + if (ret) + return ret; + } + + /* + * active_planes bitmask has been updated, and potentially + * affected planes are part of the state. We can now + * compute the minimum cdclk for each plane. + */ + for_each_new_intel_plane_in_state(state, plane, plane_state, i) + *need_modeset |= intel_plane_calc_min_cdclk(state, plane); + return 0; } @@ -13891,6 +14009,10 @@ static int intel_atomic_check(struct drm_device *dev, any_ms |= state->cdclk.force_min_cdclk_changed; + ret = intel_atomic_check_planes(state, &any_ms); + if (ret) + goto fail; + if (any_ms) { ret = intel_modeset_checks(state); if (ret) @@ -13899,14 +14021,6 @@ static int intel_atomic_check(struct drm_device *dev, state->cdclk.logical = dev_priv->cdclk.logical; } - ret = icl_add_linked_planes(state); - if (ret) - goto fail; - - ret = intel_atomic_check_planes(state); - if (ret) - goto fail; - ret = intel_atomic_check_crtcs(state); if (ret) goto fail; @@ -15355,6 +15469,15 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) plane->get_hw_state = i9xx_plane_get_hw_state; plane->check_plane = i9xx_plane_check; + if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) + plane->min_cdclk = hsw_plane_min_cdclk; + else if (IS_IVYBRIDGE(dev_priv)) + plane->min_cdclk = ivb_plane_min_cdclk; + else if (IS_CHERRYVIEW(dev_priv) || IS_VALLEYVIEW(dev_priv)) + plane->min_cdclk = vlv_plane_min_cdclk; + else + plane->min_cdclk = i9xx_plane_min_cdclk; + plane_funcs = &i965_plane_funcs; } else { formats = i8xx_primary_formats; @@ -15366,6 +15489,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) plane->disable_plane = i9xx_disable_plane; plane->get_hw_state = i9xx_plane_get_hw_state; plane->check_plane = i9xx_plane_check; + plane->min_cdclk = i9xx_plane_min_cdclk; plane_funcs = &i8xx_plane_funcs; } @@ -17284,17 +17408,9 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) intel_crtc_compute_pixel_rate(crtc_state); - min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); - if (WARN_ON(min_cdclk < 0)) - min_cdclk = 0; - intel_crtc_update_active_timings(crtc_state); } - dev_priv->min_cdclk[crtc->pipe] = min_cdclk; - dev_priv->min_voltage_level[crtc->pipe] = - crtc_state->min_voltage_level; - for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); @@ -17306,8 +17422,34 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) if (plane_state->base.visible) crtc_state->data_rate[plane->id] = 4 * crtc_state->pixel_rate; + /* + * FIXME don't have the fb yet, so can't + * use plane->min_cdclk() :( + */ + if (plane_state->base.visible && plane->min_cdclk) { + if (crtc_state->double_wide || + INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + crtc_state->min_cdclk[plane->id] = + DIV_ROUND_UP(crtc_state->pixel_rate, 2); + else + crtc_state->min_cdclk[plane->id] = + crtc_state->pixel_rate; + } + DRM_DEBUG_KMS("[PLANE:%d:%s] min_cdclk %d kHz\n", + plane->base.base.id, plane->base.name, + crtc_state->min_cdclk[plane->id]); } + if (crtc_state->base.active) { + min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); + if (WARN_ON(min_cdclk < 0)) + min_cdclk = 0; + } + + dev_priv->min_cdclk[crtc->pipe] = min_cdclk; + dev_priv->min_voltage_level[crtc->pipe] = + crtc_state->min_voltage_level; + intel_bw_crtc_update(bw_state, crtc_state); intel_pipe_config_sanity_check(dev_priv, crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index d18208e590b0..40184e823c84 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -940,6 +940,8 @@ struct intel_crtc_state { struct intel_crtc_wm_state wm; + int min_cdclk[I915_MAX_PLANES]; + u32 data_rate[I915_MAX_PLANES]; /* Gamma mode programmed on the pipe */ @@ -1085,6 +1087,8 @@ struct intel_plane { bool (*get_hw_state)(struct intel_plane *plane, enum pipe *pipe); int (*check_plane)(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state); + int (*min_cdclk)(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); }; struct intel_watermark_params { diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 5ae12ab3c5b7..469f79b01114 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -322,6 +322,55 @@ bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id) icl_hdr_plane_mask() & BIT(plane_id); } +static void +skl_plane_ratio(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + unsigned int *num, unsigned int *den) +{ + struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + + if (fb->format->cpp[0] == 8) { + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + *num = 10; + *den = 8; + } else { + *num = 9; + *den = 8; + } + } else { + *num = 1; + *den = 1; + } +} + +static int skl_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); + unsigned int pixel_rate = crtc_state->pixel_rate; + unsigned int src_w, src_h, dst_w, dst_h; + unsigned int num, den; + + skl_plane_ratio(crtc_state, plane_state, &num, &den); + + /* two pixels per clock on glk+ */ + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + den *= 2; + + src_w = drm_rect_width(&plane_state->base.src) >> 16; + src_h = drm_rect_height(&plane_state->base.src) >> 16; + dst_w = drm_rect_width(&plane_state->base.dst); + dst_h = drm_rect_height(&plane_state->base.dst); + + /* Downscaling limits the maximum pixel rate */ + dst_w = min(src_w, dst_w); + dst_h = min(src_h, dst_h); + + return DIV64_U64_ROUND_UP(mul_u32_u32(pixel_rate * num, src_w * src_h), + mul_u32_u32(den, dst_w * dst_h)); +} + static unsigned int skl_plane_max_stride(struct intel_plane *plane, u32 pixel_format, u64 modifier, @@ -811,6 +860,85 @@ vlv_update_clrc(const struct intel_plane_state *plane_state) SP_SH_SIN(sh_sin) | SP_SH_COS(sh_cos)); } +static void +vlv_plane_ratio(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + unsigned int *num, unsigned int *den) +{ + u8 active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int cpp = fb->format->cpp[0]; + + /* + * VLV bspec only considers cases where all three planes are + * enabled, and cases where the primary and one sprite is enabled. + * Let's assume the case with just two sprites enabled also + * maps to the latter case. + */ + if (hweight8(active_planes) == 3) { + switch (cpp) { + case 8: + *num = 11; + *den = 8; + break; + case 4: + *num = 18; + *den = 16; + break; + default: + *num = 1; + *den = 1; + break; + } + } else if (hweight8(active_planes) == 2) { + switch (cpp) { + case 8: + *num = 10; + *den = 8; + break; + case 4: + *num = 17; + *den = 16; + break; + default: + *num = 1; + *den = 1; + break; + } + } else { + switch (cpp) { + case 8: + *num = 10; + *den = 8; + break; + default: + *num = 1; + *den = 1; + break; + } + } +} + +int vlv_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + unsigned int pixel_rate; + unsigned int num, den; + + /* + * Note that crtc_state->pixel_rate accounts for both + * horizontal and vertical panel fitter downscaling factors. + * Pre-HSW bspec tells us to only consider the horizontal + * downscaling factor here. We ignore that and just consider + * both for simplicity. + */ + pixel_rate = crtc_state->pixel_rate; + + vlv_plane_ratio(crtc_state, plane_state, &num, &den); + + return DIV_ROUND_UP(pixel_rate * num, den); +} + static u32 vlv_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) { u32 sprctl = 0; @@ -1017,6 +1145,164 @@ vlv_plane_get_hw_state(struct intel_plane *plane, return ret; } +static void ivb_plane_ratio(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + unsigned int *num, unsigned int *den) +{ + u8 active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int cpp = fb->format->cpp[0]; + + if (hweight8(active_planes) == 2) { + switch (cpp) { + case 8: + *num = 10; + *den = 8; + break; + case 4: + *num = 17; + *den = 16; + break; + default: + *num = 1; + *den = 1; + break; + } + } else { + switch (cpp) { + case 8: + *num = 9; + *den = 8; + break; + default: + *num = 1; + *den = 1; + break; + } + } +} + +static void ivb_plane_ratio_scaling(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + unsigned int *num, unsigned int *den) +{ + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int cpp = fb->format->cpp[0]; + + switch (cpp) { + case 8: + *num = 12; + *den = 8; + break; + case 4: + *num = 19; + *den = 16; + break; + case 2: + *num = 33; + *den = 32; + break; + default: + *num = 1; + *den = 1; + break; + } +} + +int ivb_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + unsigned int pixel_rate; + unsigned int num, den; + + /* + * Note that crtc_state->pixel_rate accounts for both + * horizontal and vertical panel fitter downscaling factors. + * Pre-HSW bspec tells us to only consider the horizontal + * downscaling factor here. We ignore that and just consider + * both for simplicity. + */ + pixel_rate = crtc_state->pixel_rate; + + ivb_plane_ratio(crtc_state, plane_state, &num, &den); + + return DIV_ROUND_UP(pixel_rate * num, den); +} + +static int ivb_sprite_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + unsigned int src_w, dst_w, pixel_rate; + unsigned int num, den; + + /* + * Note that crtc_state->pixel_rate accounts for both + * horizontal and vertical panel fitter downscaling factors. + * Pre-HSW bspec tells us to only consider the horizontal + * downscaling factor here. We ignore that and just consider + * both for simplicity. + */ + pixel_rate = crtc_state->pixel_rate; + + src_w = drm_rect_width(&plane_state->base.src) >> 16; + dst_w = drm_rect_width(&plane_state->base.dst); + + if (src_w != dst_w) + ivb_plane_ratio_scaling(crtc_state, plane_state, &num, &den); + else + ivb_plane_ratio(crtc_state, plane_state, &num, &den); + + /* Horizontal downscaling limits the maximum pixel rate */ + dst_w = min(src_w, dst_w); + + return DIV_ROUND_UP_ULL(mul_u32_u32(pixel_rate, num * src_w), + den * dst_w); +} + +static void hsw_plane_ratio(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + unsigned int *num, unsigned int *den) +{ + u8 active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int cpp = fb->format->cpp[0]; + + if (hweight8(active_planes) == 2) { + switch (cpp) { + case 8: + *num = 10; + *den = 8; + break; + default: + *num = 1; + *den = 1; + break; + } + } else { + switch (cpp) { + case 8: + *num = 9; + *den = 8; + break; + default: + *num = 1; + *den = 1; + break; + } + } +} + +int hsw_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + unsigned int pixel_rate = crtc_state->pixel_rate; + unsigned int num, den; + + hsw_plane_ratio(crtc_state, plane_state, &num, &den); + + return DIV_ROUND_UP(pixel_rate * num, den); +} + static u32 ivb_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) { u32 sprctl = 0; @@ -1243,6 +1529,53 @@ ivb_plane_get_hw_state(struct intel_plane *plane, return ret; } +static int g4x_sprite_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int hscale, pixel_rate; + unsigned int limit, decimate; + + /* + * Note that crtc_state->pixel_rate accounts for both + * horizontal and vertical panel fitter downscaling factors. + * Pre-HSW bspec tells us to only consider the horizontal + * downscaling factor here. We ignore that and just consider + * both for simplicity. + */ + pixel_rate = crtc_state->pixel_rate; + + /* Horizontal downscaling limits the maximum pixel rate */ + hscale = drm_rect_calc_hscale(&plane_state->base.src, + &plane_state->base.dst, + 0, INT_MAX); + if (hscale < 0x10000) + return pixel_rate; + + /* Decimation steps at 2x,4x,8x,16x */ + decimate = ilog2(hscale >> 16); + hscale >>= decimate; + + /* Starting limit is 90% of cdclk */ + limit = 9; + + /* -10% per decimation step */ + limit -= decimate; + + /* -10% for RGB */ + if (fb->format->cpp[0] >= 4) + limit--; /* -10% for RGB */ + + /* + * We should also do -10% if sprite scaling is enabled + * on the other pipe, but we can't really check for that, + * so we ignore it. + */ + + return DIV_ROUND_UP_ULL(mul_u32_u32(pixel_rate, 10 * hscale), + limit << 16); +} + static unsigned int g4x_sprite_max_stride(struct intel_plane *plane, u32 pixel_format, u64 modifier, @@ -2511,6 +2844,7 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, plane->disable_plane = skl_disable_plane; plane->get_hw_state = skl_plane_get_hw_state; plane->check_plane = skl_plane_check; + plane->min_cdclk = skl_plane_min_cdclk; if (icl_is_nv12_y_plane(plane_id)) plane->update_slave = icl_update_slave; @@ -2618,6 +2952,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, plane->disable_plane = vlv_disable_plane; plane->get_hw_state = vlv_plane_get_hw_state; plane->check_plane = vlv_sprite_check; + plane->min_cdclk = vlv_plane_min_cdclk; formats = vlv_plane_formats; num_formats = ARRAY_SIZE(vlv_plane_formats); @@ -2631,6 +2966,11 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, plane->get_hw_state = ivb_plane_get_hw_state; plane->check_plane = g4x_sprite_check; + if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) + plane->min_cdclk = hsw_plane_min_cdclk; + else + plane->min_cdclk = ivb_sprite_min_cdclk; + formats = snb_plane_formats; num_formats = ARRAY_SIZE(snb_plane_formats); modifiers = i9xx_plane_format_modifiers; @@ -2642,6 +2982,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, plane->disable_plane = g4x_disable_plane; plane->get_hw_state = g4x_plane_get_hw_state; plane->check_plane = g4x_sprite_check; + plane->min_cdclk = g4x_sprite_min_cdclk; modifiers = i9xx_plane_format_modifiers; if (IS_GEN(dev_priv, 6)) { diff --git a/drivers/gpu/drm/i915/display/intel_sprite.h b/drivers/gpu/drm/i915/display/intel_sprite.h index 229336214f68..5eeaa92420d1 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.h +++ b/drivers/gpu/drm/i915/display/intel_sprite.h @@ -49,4 +49,11 @@ static inline u8 icl_hdr_plane_mask(void) bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id); +int ivb_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); +int hsw_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); +int vlv_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); + #endif /* __INTEL_SPRITE_H__ */ From 99efd1c92b7ae8f46b74552513bfa975f080485b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 15 Oct 2019 22:30:27 +0300 Subject: [PATCH 048/154] drm/i915: Eliminate skl_check_pipe_max_pixel_rate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The normal cdclk handling now takes care of making sure the plane's pixel rate doesn't exceed the spec appointed percentage of the cdclk frequency. Thus we can nuke skl_check_pipe_max_pixel_rate(). Reviewed-by: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191015193035.25982-6-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 2 - drivers/gpu/drm/i915/intel_pm.c | 87 -------------------- drivers/gpu/drm/i915/intel_pm.h | 2 - 3 files changed, 91 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 1fac98961183..f696fa23c96e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12121,8 +12121,6 @@ static int intel_crtc_atomic_check(struct intel_atomic_state *state, if (INTEL_GEN(dev_priv) >= 9) { if (mode_changed || crtc_state->update_pipe) ret = skl_update_scaler_crtc(crtc_state); - if (!ret) - ret = skl_check_pipe_max_pixel_rate(crtc, crtc_state); if (!ret) ret = intel_atomic_setup_scalers(dev_priv, crtc, crtc_state); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 362234449087..6b37d22fc68d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4097,93 +4097,6 @@ skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state, return mul_fixed16(downscale_w, downscale_h); } -static uint_fixed_16_16_t -skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state) -{ - uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1); - - if (!crtc_state->base.enable) - return pipe_downscale; - - if (crtc_state->pch_pfit.enabled) { - u32 src_w, src_h, dst_w, dst_h; - u32 pfit_size = crtc_state->pch_pfit.size; - uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; - uint_fixed_16_16_t downscale_h, downscale_w; - - src_w = crtc_state->pipe_src_w; - src_h = crtc_state->pipe_src_h; - dst_w = pfit_size >> 16; - dst_h = pfit_size & 0xffff; - - if (!dst_w || !dst_h) - return pipe_downscale; - - fp_w_ratio = div_fixed16(src_w, dst_w); - fp_h_ratio = div_fixed16(src_h, dst_h); - downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1)); - downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1)); - - pipe_downscale = mul_fixed16(downscale_w, downscale_h); - } - - return pipe_downscale; -} - -int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - struct drm_atomic_state *state = crtc_state->base.state; - const struct intel_plane_state *plane_state; - struct intel_plane *plane; - int crtc_clock, dotclk; - u32 pipe_max_pixel_rate; - uint_fixed_16_16_t pipe_downscale; - uint_fixed_16_16_t max_downscale = u32_to_fixed16(1); - - if (!crtc_state->base.enable) - return 0; - - intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { - uint_fixed_16_16_t plane_downscale; - uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8); - int bpp; - - if (!intel_wm_plane_visible(crtc_state, plane_state)) - continue; - - if (WARN_ON(!plane_state->base.fb)) - return -EINVAL; - - plane_downscale = skl_plane_downscale_amount(crtc_state, plane_state); - bpp = plane_state->base.fb->format->cpp[0] * 8; - if (bpp == 64) - plane_downscale = mul_fixed16(plane_downscale, - fp_9_div_8); - - max_downscale = max_fixed16(plane_downscale, max_downscale); - } - pipe_downscale = skl_pipe_downscale_amount(crtc_state); - - pipe_downscale = mul_fixed16(pipe_downscale, max_downscale); - - crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; - dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk; - - if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) - dotclk *= 2; - - pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale); - - if (pipe_max_pixel_rate < crtc_clock) { - DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n"); - return -EINVAL; - } - - return 0; -} - static u64 skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 93d192d0610a..00a5801dfc06 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -64,8 +64,6 @@ void skl_write_plane_wm(struct intel_plane *plane, void skl_write_cursor_wm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state); bool ilk_disable_lp_wm(struct drm_device *dev); -int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, - struct intel_crtc_state *cstate); void intel_init_ipc(struct drm_i915_private *dev_priv); void intel_enable_ipc(struct drm_i915_private *dev_priv); From dbe20703e178b005f0793bdddfe7389549a0e6cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 15 Oct 2019 22:30:28 +0300 Subject: [PATCH 049/154] drm/i915: Simplify skl_max_scale() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the planes declare their minimum cdclk requirements properly we don't need to check the cdclk in skl_max_scale() anymore. Just check against the maximum downscale ratio, and move the code next to it's only caller. v2: Add a comment explaining the HQ vs. not thing Reviewed-by: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191015193035.25982-7-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 38 -------------------- drivers/gpu/drm/i915/display/intel_display.h | 2 -- drivers/gpu/drm/i915/display/intel_sprite.c | 18 +++++++++- 3 files changed, 17 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index f696fa23c96e..af9e210653ec 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15145,44 +15145,6 @@ intel_cleanup_plane_fb(struct drm_plane *plane, intel_plane_unpin_fb(old_plane_state); } -int -skl_max_scale(const struct intel_crtc_state *crtc_state, - const struct drm_format_info *format) -{ - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - int max_scale; - int crtc_clock, max_dotclk, tmpclk1, tmpclk2; - - if (!crtc_state->base.enable) - return DRM_PLANE_HELPER_NO_SCALING; - - crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; - max_dotclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk; - - if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) - max_dotclk *= 2; - - if (WARN_ON_ONCE(!crtc_clock || max_dotclk < crtc_clock)) - return DRM_PLANE_HELPER_NO_SCALING; - - /* - * skl max scale is lower of: - * close to 3 but not 3, -1 is for that purpose - * or - * cdclk/crtc_clock - */ - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv) || - !drm_format_info_is_yuv_semiplanar(format)) - tmpclk1 = 0x30000 - 1; - else - tmpclk1 = 0x20000 - 1; - tmpclk2 = (1 << 8) * ((max_dotclk << 8) / crtc_clock); - max_scale = min(tmpclk1, tmpclk2); - - return max_scale; -} - /** * intel_plane_destroy - destroy a plane * @plane: plane to destroy diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 7dcb176d91b0..ca7ca2804d8b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -563,8 +563,6 @@ void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center); int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); -int skl_max_scale(const struct intel_crtc_state *crtc_state, - const struct drm_format_info *format); u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 469f79b01114..6b2eaf26700c 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -2120,6 +2120,22 @@ static int skl_plane_check_nv12_rotation(const struct intel_plane_state *plane_s return 0; } +static int skl_plane_max_scale(struct drm_i915_private *dev_priv, + const struct drm_framebuffer *fb) +{ + /* + * We don't yet know the final source width nor + * whether we can use the HQ scaler mode. Assume + * the best case. + * FIXME need to properly check this later. + */ + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv) || + !drm_format_info_is_yuv_semiplanar(fb->format)) + return 0x30000 - 1; + else + return 0x20000 - 1; +} + static int skl_plane_check(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { @@ -2137,7 +2153,7 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state, /* use scaler when colorkey is not required */ if (!plane_state->ckey.flags && intel_fb_scalable(fb)) { min_scale = 1; - max_scale = skl_max_scale(crtc_state, fb->format); + max_scale = skl_plane_max_scale(dev_priv, fb); } ret = drm_atomic_helper_check_plane_state(&plane_state->base, From 6e6c155da65be0ea4a682f41021a9f635e16ecfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 15 Oct 2019 22:30:29 +0300 Subject: [PATCH 050/154] drm/i915: Add support for half float framebuffers for skl+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit skl+ supports fp16 pixel formats on all universal planes. Add the necessary bits to expose that capability. The main different to icl is that we can't scale fp16, so need to add the relevant checks. v2: Rebase on top of icl fp16 Split skl+ bits into a separate patch Reviewed-by: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191015193035.25982-8-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 11 +++++++---- drivers/gpu/drm/i915/display/intel_sprite.c | 11 +++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index af9e210653ec..cd79d35de05e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5640,10 +5640,6 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, case DRM_FORMAT_ARGB8888: case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_XBGR2101010: - case DRM_FORMAT_XBGR16161616F: - case DRM_FORMAT_ABGR16161616F: - case DRM_FORMAT_XRGB16161616F: - case DRM_FORMAT_ARGB16161616F: case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: @@ -5659,6 +5655,13 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, case DRM_FORMAT_XVYU12_16161616: case DRM_FORMAT_XVYU16161616: break; + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: + if (INTEL_GEN(dev_priv) >= 11) + break; + /* fall through */ default: DRM_DEBUG_KMS("[PLANE:%d:%s] FB:%d unsupported scaling format 0x%x\n", intel_plane->base.base.id, intel_plane->base.name, diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 6b2eaf26700c..e8442299989b 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -1832,6 +1832,11 @@ static bool intel_fb_scalable(const struct drm_framebuffer *fb) switch (fb->format->format) { case DRM_FORMAT_C8: return false; + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: + return INTEL_GEN(to_i915(fb->dev)) >= 11; default: return true; } @@ -2359,6 +2364,8 @@ static const u32 skl_plane_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, + DRM_FORMAT_XRGB16161616F, + DRM_FORMAT_XBGR16161616F, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, @@ -2374,6 +2381,8 @@ static const u32 skl_planar_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, + DRM_FORMAT_XRGB16161616F, + DRM_FORMAT_XBGR16161616F, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, @@ -2390,6 +2399,8 @@ static const u32 glk_planar_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, + DRM_FORMAT_XRGB16161616F, + DRM_FORMAT_XBGR16161616F, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, From 03b0ce9532ec5d2dba52e17970fae5a484bb5531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 15 Oct 2019 22:30:30 +0300 Subject: [PATCH 051/154] drm/i915: Add support for half float framebuffers for gen4+ primary planes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gen4+ supports fp16 pixel formats on the primary planes. Add the relevant code. On ivb fp16 scanout is slightly busted. The output from the plane will have 1/4 the expected value. For the primary plane we would have to use the pipe gamma or pipe csc to correct that which would affect all the other planes as well, hence we simply choose not to expose fp16 on the ivb primary plane. On hsw the primary plane got fixed. On gmch platforms I observed that the plane width must be below 2k pixels with fp16 or else we get a corrupted image. This limitation does not seem to be documented in bspec. I verified the exact limit using the chv pipe B primary plane since it has windowing capability. The stride limits are unaffected by fp16. v2: Rebase on top of icl fp16 Split thea gen4+ primary plane bits into a separate patch Deal with HAS_GMCH() Reviewed-by: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191015193035.25982-9-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 49 ++++++++++++++++++-- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index cd79d35de05e..6dc3c931edb1 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -88,7 +88,17 @@ static const u32 i8xx_primary_formats[] = { DRM_FORMAT_XRGB8888, }; -/* Primary plane formats for gen >= 4 */ +/* Primary plane formats for ivb (no fp16 due to hw issue) */ +static const u32 ivb_primary_formats[] = { + DRM_FORMAT_C8, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_XBGR2101010, +}; + +/* Primary plane formats for gen >= 4, except ivb */ static const u32 i965_primary_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, @@ -96,6 +106,7 @@ static const u32 i965_primary_formats[] = { DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, + DRM_FORMAT_XBGR16161616F, }; static const u64 i9xx_format_modifiers[] = { @@ -2971,6 +2982,8 @@ static int i9xx_format_to_fourcc(int format) return DRM_FORMAT_XRGB2101010; case DISPPLANE_RGBX101010: return DRM_FORMAT_XBGR2101010; + case DISPPLANE_RGBX161616: + return DRM_FORMAT_XBGR16161616F; } } @@ -3707,6 +3720,9 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, case DRM_FORMAT_XBGR2101010: dspcntr |= DISPPLANE_RGBX101010; break; + case DRM_FORMAT_XBGR16161616F: + dspcntr |= DISPPLANE_RGBX161616; + break; default: MISSING_CASE(fb->format->format); return 0; @@ -3729,7 +3745,8 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); - int src_x, src_y; + const struct drm_framebuffer *fb = plane_state->base.fb; + int src_x, src_y, src_w; u32 offset; int ret; @@ -3740,9 +3757,14 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state) if (!plane_state->base.visible) return 0; + src_w = drm_rect_width(&plane_state->base.src) >> 16; src_x = plane_state->base.src.x1 >> 16; src_y = plane_state->base.src.y1 >> 16; + /* Undocumented hardware limit on i965/g4x/vlv/chv */ + if (HAS_GMCH(dev_priv) && fb->format->cpp[0] == 8 && src_w > 2048) + return -EINVAL; + intel_add_fb_offsets(&src_x, &src_y, plane_state, 0); if (INTEL_GEN(dev_priv) >= 4) @@ -15202,6 +15224,7 @@ static bool i965_plane_format_mod_supported(struct drm_plane *_plane, case DRM_FORMAT_XBGR8888: case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_XBGR16161616F: return modifier == DRM_FORMAT_MOD_LINEAR || modifier == I915_FORMAT_MOD_X_TILED; default: @@ -15422,8 +15445,26 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) } if (INTEL_GEN(dev_priv) >= 4) { - formats = i965_primary_formats; - num_formats = ARRAY_SIZE(i965_primary_formats); + /* + * WaFP16GammaEnabling:ivb + * "Workaround : When using the 64-bit format, the plane + * output on each color channel has one quarter amplitude. + * It can be brought up to full amplitude by using pipe + * gamma correction or pipe color space conversion to + * multiply the plane output by four." + * + * There is no dedicated plane gamma for the primary plane, + * and using the pipe gamma/csc could conflict with other + * planes, so we choose not to expose fp16 on IVB primary + * planes. HSW primary planes no longer have this problem. + */ + if (IS_IVYBRIDGE(dev_priv)) { + formats = ivb_primary_formats; + num_formats = ARRAY_SIZE(ivb_primary_formats); + } else { + formats = i965_primary_formats; + num_formats = ARRAY_SIZE(i965_primary_formats); + } modifiers = i9xx_format_modifiers; plane->max_stride = i9xx_plane_max_stride; From 762dff2e6f434b8f485a29b3731e490c8dfea31b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 15 Oct 2019 22:30:31 +0300 Subject: [PATCH 052/154] drm/i915: Add support for half float framebuffers for ivb+ sprites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ivb+ supports fp16 pixel formats on the sprite planes planes. Expose that capability. On ivb/hsw fp16 scanout is slightly busted. The output from the plane will have 1/4 the expected value. For the sprite plane we can fix that up with the plane gamma unit. This was fixed on bdw. v2: Rebase on top of icl fp16 Split the ivb+ sprite birs into a separate patch v3: Move ivb_need_sprite_gamma() check one level up so that we don't waste time programming garbage into he gamma registers Reviewed-by: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191015193035.25982-10-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_sprite.c | 48 ++++++++++++++++++--- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index e8442299989b..db1e2dce6636 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -1316,6 +1316,16 @@ static u32 ivb_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) return sprctl; } +static bool ivb_need_sprite_gamma(const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + + return fb->format->cpp[0] == 8 && + (IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)); +} + static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -1338,6 +1348,12 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, case DRM_FORMAT_XRGB8888: sprctl |= SPRITE_FORMAT_RGBX888; break; + case DRM_FORMAT_XBGR16161616F: + sprctl |= SPRITE_FORMAT_RGBX161616 | SPRITE_RGB_ORDER_RGBX; + break; + case DRM_FORMAT_XRGB16161616F: + sprctl |= SPRITE_FORMAT_RGBX161616; + break; case DRM_FORMAT_YUYV: sprctl |= SPRITE_FORMAT_YUV422 | SPRITE_YUV_ORDER_YUYV; break; @@ -1355,7 +1371,8 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, return 0; } - sprctl |= SPRITE_INT_GAMMA_DISABLE; + if (!ivb_need_sprite_gamma(plane_state)) + sprctl |= SPRITE_INT_GAMMA_DISABLE; if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) sprctl |= SPRITE_YUV_TO_RGB_CSC_FORMAT_BT709; @@ -1377,12 +1394,26 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, return sprctl; } -static void ivb_sprite_linear_gamma(u16 gamma[18]) +static void ivb_sprite_linear_gamma(const struct intel_plane_state *plane_state, + u16 gamma[18]) { - int i; + int scale, i; - for (i = 0; i < 17; i++) - gamma[i] = (i << 10) / 16; + /* + * WaFP16GammaEnabling:ivb,hsw + * "Workaround : When using the 64-bit format, the sprite output + * on each color channel has one quarter amplitude. It can be + * brought up to full amplitude by using sprite internal gamma + * correction, pipe gamma correction, or pipe color space + * conversion to multiply the sprite output by four." + */ + scale = 4; + + for (i = 0; i < 16; i++) + gamma[i] = min((scale * i << 10) / 16, (1 << 10) - 1); + + gamma[i] = min((scale * i << 10) / 16, 1 << 10); + i++; gamma[i] = 3 << 10; i++; @@ -1396,7 +1427,10 @@ static void ivb_update_gamma(const struct intel_plane_state *plane_state) u16 gamma[18]; int i; - ivb_sprite_linear_gamma(gamma); + if (!ivb_need_sprite_gamma(plane_state)) + return; + + ivb_sprite_linear_gamma(plane_state, gamma); /* FIXME these register are single buffered :( */ for (i = 0; i < 16; i++) @@ -2551,6 +2585,8 @@ static bool snb_sprite_format_mod_supported(struct drm_plane *_plane, switch (format) { case DRM_FORMAT_XRGB8888: case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_XBGR16161616F: case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: From 90a764cda1c809b0fe3f2ffe133839421c60faf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 15 Oct 2019 22:30:32 +0300 Subject: [PATCH 053/154] drm/i915: Add support for half float framebuffers on snb sprites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit snb supports fp16 pixel formats on the sprite planes. Expose that capability. Nothing special needs to be done, it just works. v2: Rebase on top of icl fp16 Split snb+ sprite bits into a separate patch Reviewed-by: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191015193035.25982-11-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_sprite.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index db1e2dce6636..edc41fc40726 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -1653,6 +1653,12 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, case DRM_FORMAT_XRGB8888: dvscntr |= DVS_FORMAT_RGBX888; break; + case DRM_FORMAT_XBGR16161616F: + dvscntr |= DVS_FORMAT_RGBX161616 | DVS_RGB_ORDER_XBGR; + break; + case DRM_FORMAT_XRGB16161616F: + dvscntr |= DVS_FORMAT_RGBX161616; + break; case DRM_FORMAT_YUYV: dvscntr |= DVS_FORMAT_YUV422 | DVS_YUV_ORDER_YUYV; break; @@ -2367,8 +2373,10 @@ static const u64 i9xx_plane_format_modifiers[] = { }; static const u32 snb_plane_formats[] = { - DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB16161616F, + DRM_FORMAT_XBGR16161616F, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, From 5063f48bbbd87d3f309ed7c447609307e5b66731 Mon Sep 17 00:00:00 2001 From: Ap Kamal Date: Wed, 23 Oct 2019 13:55:28 +0530 Subject: [PATCH 054/154] drm/i915: Making loglevel of PSR2/SU logs same. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'Link CRC error' will now have same error level as other PSR2 errors like 'RFB storage error' and 'VSC SDP uncorrectable error'. Signed-off-by: Ap Kamal Reviewed-by: José Roberto de Souza Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/1571819128-3264-1-git-send-email-kamal.ap@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index dfbedff98ea8..5d3cdd7d4a8d 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1437,7 +1437,7 @@ void intel_psr_short_pulse(struct intel_dp *intel_dp) if (val & DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR) DRM_DEBUG_KMS("PSR VSC SDP uncorrectable error, disabling PSR\n"); if (val & DP_PSR_LINK_CRC_ERROR) - DRM_ERROR("PSR Link CRC error, disabling PSR\n"); + DRM_DEBUG_KMS("PSR Link CRC error, disabling PSR\n"); if (val & ~errors) DRM_ERROR("PSR_ERROR_STATUS unhandled errors %x\n", From 6b441c628e178629bbd38ddd8b97dfeff93c2bb6 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 24 Oct 2019 14:03:31 +0300 Subject: [PATCH 055/154] drm/i915: Remove nonpriv flags when srm/lrm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On testing the whitelists, using any of the nonpriv flags when trying to access the register offset will lead to failure. Define address mask to get the mmio offset in order to guard against any current and future flag usage. v2: apply also on scrub_whitelisted_registers (Lionel) Cc: Tapani Pälli Cc: Chris Wilson Cc: Lionel Landwerlin Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Reviewed-by: Lionel Landwerlin Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191024110331.8935-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 10 ++++++++-- drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index ef02920cec29..abce6e4ec9c0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -513,6 +513,9 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, ro_reg = ro_register(reg); + /* Clear non priv flags */ + reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; + srm = MI_STORE_REGISTER_MEM; lrm = MI_LOAD_REGISTER_MEM; if (INTEL_GEN(ctx->i915) >= 8) @@ -810,8 +813,8 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, u64 offset = results->node.start + sizeof(u32) * i; u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); - /* Clear access permission field */ - reg &= ~RING_FORCE_TO_NONPRIV_ACCESS_MASK; + /* Clear non priv flags */ + reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; *cs++ = srm; *cs++ = reg; @@ -849,6 +852,9 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, if (ro_register(reg)) continue; + /* Clear non priv flags */ + reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; + *cs++ = reg; *cs++ = 0xffffffff; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 855db888516c..3ba503b5e0d9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2490,6 +2490,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN8_RING_CS_GPR_UDW(base, n) _MMIO((base) + 0x600 + (n) * 8 + 4) #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4) +#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2) #define RING_FORCE_TO_NONPRIV_ACCESS_RW (0 << 28) /* CFL+ & Gen11+ */ #define RING_FORCE_TO_NONPRIV_ACCESS_RD (1 << 28) #define RING_FORCE_TO_NONPRIV_ACCESS_WR (2 << 28) From 772d1dea1f6cbff55963e04f52341b3cc5e710c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tapani=20P=C3=A4lli?= Date: Thu, 24 Oct 2019 13:38:58 +0300 Subject: [PATCH 056/154] drm/i915/tgl: whitelist PS_(DEPTH|INVOCATION)_COUNT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As with commit 3fe0107e45ab, this change fixes multiple tests that are using the invocation counts. Documentation doesn't list the workaround for TGL but applying it fixes the tests. Signed-off-by: Tapani Pälli Acked-by: Chris Wilson Reviewed-by: Lionel Landwerlin Reviewed-by: Mika Kuoppala Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191024103858.28113-2-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 7cb6dab4399d..e4bccc14602f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1216,6 +1216,26 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) static void tgl_whitelist_build(struct intel_engine_cs *engine) { + struct i915_wa_list *w = &engine->whitelist; + + switch (engine->class) { + case RENDER_CLASS: + /* + * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl + * + * This covers 4 registers which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); + break; + default: + break; + } } void intel_engine_init_whitelist(struct intel_engine_cs *engine) From d506a65d56fd5287cf9219de7f3af5004b38fc6f Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 8 Oct 2019 14:17:16 -0700 Subject: [PATCH 057/154] drm/i915: Catch GTT fault errors for gen11+ planes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gen11+ has more hardware planes than gen9 so we need to test additional pipe interrupt register bits to recognize any GTT faults that happen on these extra planes. Bspec: 50335 Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20191008211716.8391-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 4 +++- drivers/gpu/drm/i915/i915_reg.h | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 572a5c37cc61..a048c79a6a55 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2597,7 +2597,9 @@ static u32 gen8_de_port_aux_mask(struct drm_i915_private *dev_priv) static u32 gen8_de_pipe_fault_mask(struct drm_i915_private *dev_priv) { - if (INTEL_GEN(dev_priv) >= 9) + if (INTEL_GEN(dev_priv) >= 11) + return GEN11_DE_PIPE_IRQ_FAULT_ERRORS; + else if (INTEL_GEN(dev_priv) >= 9) return GEN9_DE_PIPE_IRQ_FAULT_ERRORS; else return GEN8_DE_PIPE_IRQ_FAULT_ERRORS; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3ba503b5e0d9..fd3d2de59101 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7391,6 +7391,9 @@ enum { #define GEN8_PIPE_VSYNC (1 << 1) #define GEN8_PIPE_VBLANK (1 << 0) #define GEN9_PIPE_CURSOR_FAULT (1 << 11) +#define GEN11_PIPE_PLANE7_FAULT (1 << 22) +#define GEN11_PIPE_PLANE6_FAULT (1 << 21) +#define GEN11_PIPE_PLANE5_FAULT (1 << 20) #define GEN9_PIPE_PLANE4_FAULT (1 << 10) #define GEN9_PIPE_PLANE3_FAULT (1 << 9) #define GEN9_PIPE_PLANE2_FAULT (1 << 8) @@ -7410,6 +7413,11 @@ enum { GEN9_PIPE_PLANE3_FAULT | \ GEN9_PIPE_PLANE2_FAULT | \ GEN9_PIPE_PLANE1_FAULT) +#define GEN11_DE_PIPE_IRQ_FAULT_ERRORS \ + (GEN9_DE_PIPE_IRQ_FAULT_ERRORS | \ + GEN11_PIPE_PLANE7_FAULT | \ + GEN11_PIPE_PLANE6_FAULT | \ + GEN11_PIPE_PLANE5_FAULT) #define GEN8_DE_PORT_ISR _MMIO(0x44440) #define GEN8_DE_PORT_IMR _MMIO(0x44444) From b7412c6b2203da6187569de600a3d2e8630f9caf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 23 Oct 2019 14:49:32 -0700 Subject: [PATCH 058/154] drm/i915/display/psr: Print in debugfs if PSR is not enabled because of sink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now if sink reported any PSR error or if it fails to acknowledge the PSR wakeup it sets a flag and do not attempt to enable PSR anymore. That is the safest approach to avoid repetitive glitches and allowed us to have PSR enabled by default. But from time to time even good PSR panels have a PSR error, causing tests to fail. And for now we are not yet to the point were we could try to recover from PSR errors, so lets add this information to the debugfs so IGT can check if PSR is disabled because of sink errors or not and eliminate this noise from CI runs. Cc: Dhinakaran Pandiyan Cc: Rodrigo Vivi Cc: Matt Roper Cc: Ap Kamal Signed-off-by: José Roberto de Souza Reviewed-by: Ramalingam C Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20191023214932.94679-1-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b3f78e6495fd..4b6ce07c35d2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2122,8 +2122,12 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) status = "disabled"; seq_printf(m, "PSR mode: %s\n", status); - if (!psr->enabled) + if (!psr->enabled) { + seq_printf(m, "PSR sink not reliable: %s\n", + yesno(psr->sink_not_reliable)); + goto unlock; + } if (psr->psr2_enabled) { val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); From c35eb477c0cfc1ffd704a743f7a7daa52ea16415 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Oct 2019 10:27:49 +0100 Subject: [PATCH 059/154] drm/i915/selftests: Tweak the default subtest runtime BAT is growing a little fat and CI is under pressure and needs to trim off some redundant runtime. An easy option is to reduce the selftest runtimes, so try halving our default subtest timeout. While this reduces the number of iterations used, for the majority of tests that are passing, repeat runs (with different CI_DRM) will make up the difference -- a negative consequence though is that we may reduce the frequency of sporadic failures. Hopefully, we have no tests that were crucially dependent on the previous 1s timeout... Suggested-by: Tomi Sarvela Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191025092749.13468-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 92c9193cdc85..a6cca4ad96f6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -30,7 +30,7 @@ #include "igt_flush_test.h" struct i915_selftest i915_selftest __read_mostly = { - .timeout_ms = 1000, + .timeout_ms = 500, }; int i915_mock_sanitycheck(void) From 8c6388028d460b4a2997039f995bf826c275d888 Mon Sep 17 00:00:00 2001 From: Anna Karas Date: Tue, 22 Oct 2019 13:09:06 +0300 Subject: [PATCH 060/154] doc: Update header files names Update header files containing i915_perf_stream, i915_perf_stream_ops and i915_oa_ops definitions since they have been moved from i915_drv.h to i915_perf_types.h. Cc: Robert Bragg Cc: Lionel Landwerlin Signed-off-by: Anna Karas Reviewed-by: Lionel Landwerlin Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191022100906.16597-1-anna.karas@intel.com --- Documentation/gpu/i915.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 60bd6e6403da..d0947c5c4ab8 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -550,9 +550,9 @@ i915 Perf Stream This section covers the stream-semantics-agnostic structures and functions for representing an i915 perf stream FD and associated file operations. -.. kernel-doc:: drivers/gpu/drm/i915/i915_drv.h +.. kernel-doc:: drivers/gpu/drm/i915/i915_perf_types.h :functions: i915_perf_stream -.. kernel-doc:: drivers/gpu/drm/i915/i915_drv.h +.. kernel-doc:: drivers/gpu/drm/i915/i915_perf_types.h :functions: i915_perf_stream_ops .. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c @@ -577,7 +577,7 @@ for representing an i915 perf stream FD and associated file operations. i915 Perf Observation Architecture Stream ----------------------------------------- -.. kernel-doc:: drivers/gpu/drm/i915/i915_drv.h +.. kernel-doc:: drivers/gpu/drm/i915/i915_perf_types.h :functions: i915_oa_ops .. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c From 900554dc6bfc996ad07b9e187bbfd3864cd5bed0 Mon Sep 17 00:00:00 2001 From: Anna Karas Date: Tue, 8 Oct 2019 12:28:49 +0300 Subject: [PATCH 061/154] drm/i915: Describe structure member in documentation Add description of wakeref member of intel_shared_dpll structure to documentation. Cc: Lucas De Marchi Cc: Vivek Kasireddy Signed-off-by: Anna Karas Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191008092849.6511-1-anna.karas@intel.com --- drivers/gpu/drm/i915/display/intel_dpll_mgr.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h index e7588799fce5..8ee97c17af67 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h @@ -337,6 +337,11 @@ struct intel_shared_dpll { * @info: platform specific info */ const struct dpll_info *info; + + /** + * @wakeref: In some platforms a device-level runtime pm reference may + * need to be grabbed to disable DC states while this DPLL is enabled + */ intel_wakeref_t wakeref; }; From d328bd4f905834c7d87a49962ebc96e397aab7b9 Mon Sep 17 00:00:00 2001 From: Anna Karas Date: Thu, 26 Sep 2019 15:35:59 +0300 Subject: [PATCH 062/154] drm/i915/tgl: Fix doc not corresponding to code Replace PLLs names used in documentation to that used in the code. Cc: Vandita Kulkarni Fixes: 68ff39c3f8c0 ("drm/i915/tgl: Add new pll ids") Signed-off-by: Anna Karas Reviewed-by: Vandita Kulkarni Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190926123559.15717-1-anna.karas@intel.com --- drivers/gpu/drm/i915/display/intel_dpll_mgr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h index 8ee97c17af67..2a104c64291d 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h @@ -147,11 +147,11 @@ enum intel_dpll_id { */ DPLL_ID_ICL_MGPLL4 = 6, /** - * @DPLL_ID_TGL_TCPLL5: TGL TC PLL port 5 (TC5) + * @DPLL_ID_TGL_MGPLL5: TGL TC PLL port 5 (TC5) */ DPLL_ID_TGL_MGPLL5 = 7, /** - * @DPLL_ID_TGL_TCPLL6: TGL TC PLL port 6 (TC6) + * @DPLL_ID_TGL_MGPLL6: TGL TC PLL port 6 (TC6) */ DPLL_ID_TGL_MGPLL6 = 8, }; From 5932925ac1f3f820368bcf91eee9cf52794aa827 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 25 Oct 2019 10:09:52 +0100 Subject: [PATCH 063/154] drm/i915: Move intel_engine_context_in/out into intel_lrc.c Intel_lrc.c is the only caller and so to avoid some header file ordering issues in future patches move these two over there. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191025090952.10135-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 55 -------------------------- drivers/gpu/drm/i915/gt/intel_lrc.c | 55 ++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 97bbdd9773c9..c6895938b626 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -290,61 +290,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...); -static inline void intel_engine_context_in(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (READ_ONCE(engine->stats.enabled) == 0) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - if (engine->stats.active++ == 0) - engine->stats.start = ktime_get(); - GEM_BUG_ON(engine->stats.active == 0); - } - - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - -static inline void intel_engine_context_out(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (READ_ONCE(engine->stats.enabled) == 0) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - ktime_t last; - - if (engine->stats.active && --engine->stats.active == 0) { - /* - * Decrement the active context count and in case GPU - * is now idle add up to the running total. - */ - last = ktime_sub(ktime_get(), engine->stats.start); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } else if (engine->stats.active == 0) { - /* - * After turning on engine stats, context out might be - * the first event in which case we account from the - * time stats gathering was turned on. - */ - last = ktime_sub(ktime_get(), engine->stats.enabled_at); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } - } - - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - int intel_enable_engine_stats(struct intel_engine_cs *engine); void intel_disable_engine_stats(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 73eae85a2cc9..523de1fd4452 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -944,6 +944,61 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } +static void intel_engine_context_in(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + if (engine->stats.active++ == 0) + engine->stats.start = ktime_get(); + GEM_BUG_ON(engine->stats.active == 0); + } + + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + +static void intel_engine_context_out(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + ktime_t last; + + if (engine->stats.active && --engine->stats.active == 0) { + /* + * Decrement the active context count and in case GPU + * is now idle add up to the running total. + */ + last = ktime_sub(ktime_get(), engine->stats.start); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } else if (engine->stats.active == 0) { + /* + * After turning on engine stats, context out might be + * the first event in which case we account from the + * time stats gathering was turned on. + */ + last = ktime_sub(ktime_get(), engine->stats.enabled_at); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } + } + + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + static inline struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { From 2728200f48d315748b4fb2b2525e1ac211e97ecf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Oct 2019 23:57:30 +0100 Subject: [PATCH 064/154] drm/i915/selftests: Force ordering of context switches The parallel switch test has an underlying assumption that its requests are executed in order of submission, which is only true if the backend manages to keep up. Ensure the order of execution matches the submission order by explicit dependencies and so when we wait on the last request, we know we wait on completion of the entire queue. Signed-off-by: Chris Wilson Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191016225730.29447-1-chris@chris-wilson.co.uk --- .../drm/i915/gem/selftests/i915_gem_context.c | 35 +++++++++++++++---- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 8f72f173db03..2c65c9e95cc5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -169,18 +169,24 @@ static int __live_parallel_switch1(void *data) struct i915_request *rq = NULL; int err, n; - for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { - i915_request_put(rq); + err = 0; + for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { + struct i915_request *prev = rq; rq = i915_request_create(arg->ce[n]); - if (IS_ERR(rq)) + if (IS_ERR(rq)) { + i915_request_put(prev); return PTR_ERR(rq); + } i915_request_get(rq); + if (prev) { + err = i915_request_await_dma_fence(rq, &prev->fence); + i915_request_put(prev); + } + i915_request_add(rq); } - - err = 0; if (i915_request_wait(rq, 0, HZ / 5) < 0) err = -ETIME; i915_request_put(rq); @@ -197,6 +203,7 @@ static int __live_parallel_switch1(void *data) static int __live_parallel_switchN(void *data) { struct parallel_switch *arg = data; + struct i915_request *rq = NULL; IGT_TIMEOUT(end_time); unsigned long count; int n; @@ -204,17 +211,31 @@ static int __live_parallel_switchN(void *data) count = 0; do { for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { - struct i915_request *rq; + struct i915_request *prev = rq; + int err = 0; rq = i915_request_create(arg->ce[n]); - if (IS_ERR(rq)) + if (IS_ERR(rq)) { + i915_request_put(prev); return PTR_ERR(rq); + } + + i915_request_get(rq); + if (prev) { + err = i915_request_await_dma_fence(rq, &prev->fence); + i915_request_put(prev); + } i915_request_add(rq); + if (err) { + i915_request_put(rq); + return err; + } } count++; } while (!__igt_timeout(end_time, NULL)); + i915_request_put(rq); pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); return 0; From dd5279c71405533d4ddbb9453effc60f0f5bf211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 22 Oct 2019 21:56:43 +0300 Subject: [PATCH 065/154] drm/i915: Fix PCH reference clock for FDI on HSW/BDW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The change to skip the PCH reference initialization during fastboot did end up breaking FDI. To fix that let's try to do the PCH reference init whenever we're disabling a DPLL that was using said reference previously. Cc: stable@vger.kernel.org Tested-by: Andrija Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112084 Fixes: b16c7ed95caf ("drm/i915: Do not touch the PCH SSC reference if a PLL is using it") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191022185643.1483-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_display.c | 11 ++++++----- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 15 +++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 2 ++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 6dc3c931edb1..cbf9cf30050c 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -9432,7 +9432,6 @@ static bool wrpll_uses_pch_ssc(struct drm_i915_private *dev_priv, static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv) { struct intel_encoder *encoder; - bool pch_ssc_in_use = false; bool has_fdi = false; for_each_intel_encoder(&dev_priv->drm, encoder) { @@ -9460,22 +9459,24 @@ static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv) * clock hierarchy. That would also allow us to do * clock bending finally. */ + dev_priv->pch_ssc_use = 0; + if (spll_uses_pch_ssc(dev_priv)) { DRM_DEBUG_KMS("SPLL using PCH SSC\n"); - pch_ssc_in_use = true; + dev_priv->pch_ssc_use |= BIT(DPLL_ID_SPLL); } if (wrpll_uses_pch_ssc(dev_priv, DPLL_ID_WRPLL1)) { DRM_DEBUG_KMS("WRPLL1 using PCH SSC\n"); - pch_ssc_in_use = true; + dev_priv->pch_ssc_use |= BIT(DPLL_ID_WRPLL1); } if (wrpll_uses_pch_ssc(dev_priv, DPLL_ID_WRPLL2)) { DRM_DEBUG_KMS("WRPLL2 using PCH SSC\n"); - pch_ssc_in_use = true; + dev_priv->pch_ssc_use |= BIT(DPLL_ID_WRPLL2); } - if (pch_ssc_in_use) + if (dev_priv->pch_ssc_use) return; if (has_fdi) { diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index ec10fa7d3c69..3ce0a023eee0 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -526,16 +526,31 @@ static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv, val = I915_READ(WRPLL_CTL(id)); I915_WRITE(WRPLL_CTL(id), val & ~WRPLL_PLL_ENABLE); POSTING_READ(WRPLL_CTL(id)); + + /* + * Try to set up the PCH reference clock once all DPLLs + * that depend on it have been shut down. + */ + if (dev_priv->pch_ssc_use & BIT(id)) + intel_init_pch_refclk(dev_priv); } static void hsw_ddi_spll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + enum intel_dpll_id id = pll->info->id; u32 val; val = I915_READ(SPLL_CTL); I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE); POSTING_READ(SPLL_CTL); + + /* + * Try to set up the PCH reference clock once all DPLLs + * that depend on it have been shut down. + */ + if (dev_priv->pch_ssc_use & BIT(id)) + intel_init_pch_refclk(dev_priv); } static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 16e58a74fa6f..8622b4567aa5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1353,6 +1353,8 @@ struct drm_i915_private { } contexts; } gem; + u8 pch_ssc_use; + /* For i915gm/i945gm vblank irq workaround */ u8 vblank_enabled; From ba1d18e386d991356f59bbb417b0c642abf671fa Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 25 Oct 2019 15:17:18 +0300 Subject: [PATCH 066/154] drm/i915: capture aux page table error register TGL introduced a feature in which we map the main surface to the auxiliary surface. If we screw up the page tables, the HW has a register to tell us which engine encounters a fault in the page table walk. Signed-off-by: Lionel Landwerlin Acked-by: Chris Wilson [ickle: Be brave and apply to gen12] Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191025121718.18806-1-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_gpu_error.c | 6 ++++++ drivers/gpu/drm/i915/i915_gpu_error.h | 1 + drivers/gpu/drm/i915/i915_reg.h | 2 ++ 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 47239df653f2..9bcdcebd2948 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -734,6 +734,9 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, if (IS_GEN_RANGE(m->i915, 8, 11)) err_printf(m, "GTT_CACHE_EN: 0x%08x\n", error->gtt_cache); + if (IS_GEN(m->i915, 12)) + err_printf(m, "AUX_ERR_DBG: 0x%08x\n", error->aux_err); + for (ee = error->engine; ee; ee = ee->next) error_print_engine(m, ee, error->capture); @@ -1554,6 +1557,9 @@ static void capture_reg_state(struct i915_gpu_state *error) if (IS_GEN_RANGE(i915, 8, 11)) error->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN); + if (IS_GEN(i915, 12)) + error->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG); + /* 4: Everything else */ if (INTEL_GEN(i915) >= 11) { error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 4dc36d6ee3a2..c7f36be2a38e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -74,6 +74,7 @@ struct i915_gpu_state { u32 gab_ctl; u32 gfx_mode; u32 gtt_cache; + u32 aux_err; /* gen12 */ u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index fd3d2de59101..746326784a4d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2603,6 +2603,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define FAULT_VA_HIGH_BITS (0xf << 0) #define FAULT_GTT_SEL (1 << 4) +#define GEN12_AUX_ERR_DBG _MMIO(0x43f4) + #define FPGA_DBG _MMIO(0x42300) #define FPGA_DBG_RM_NOCLAIM (1 << 31) From dc90fe3fd219c7693617ba09a9467e4aadc2e039 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 24 Oct 2019 12:51:19 -0700 Subject: [PATCH 067/154] drm/i915: Add is_dgfx to device info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be helpful to diferentiate a set of GPUs with the same GEN version. Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Reviewed-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20191024195122.22877-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_device_info.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8622b4567aa5..84dc3703c116 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1551,6 +1551,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, } #define IS_MOBILE(dev_priv) (INTEL_INFO(dev_priv)->is_mobile) +#define IS_DGFX(dev_priv) (INTEL_INFO(dev_priv)->is_dgfx) #define IS_I830(dev_priv) IS_PLATFORM(dev_priv, INTEL_I830) #define IS_I845G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I845G) diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index e9940f932d26..78a383f63957 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -107,6 +107,7 @@ enum intel_ppgtt_type { func(is_mobile); \ func(is_lp); \ func(require_force_probe); \ + func(is_dgfx); \ /* Keep has_* in alphabetical order */ \ func(has_64bit_reloc); \ func(gpu_reset_clobbers_display); \ From d8203d398c0dca1b17922d096938b96c711f0367 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Thu, 24 Oct 2019 12:51:20 -0700 Subject: [PATCH 068/154] drm/i915: add new gen12 dgfx platform macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new macro for GEN12 platforms to be grouped under dgfx feature set. Signed-off-by: Stuart Summers Signed-off-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20191024195122.22877-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index f9a3bfe68689..04307e111f57 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -822,6 +822,10 @@ static const struct intel_device_info intel_tigerlake_12_info = { .has_rps = false, /* XXX disabled for debugging */ }; +#define GEN12_DGFX_FEATURES \ + GEN12_FEATURES, \ + .is_dgfx = 1 + #undef GEN #undef PLATFORM From e6e2ac07118b15f25683fcbd59ea1be73ec9465d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 24 Oct 2019 12:51:21 -0700 Subject: [PATCH 069/154] drm/i915: do not set MOCS control values on dgfx On dgfx there's no LLC and eDRAM control table. Since now this also means the device has global MOCS, just return early on the initialization function. L3 settings still apply and still need to be tweaked. Bspec: 45101 Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20191024195122.22877-3-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_mocs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 932833e5b712..6e881c735b20 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -462,6 +462,12 @@ static void intel_mocs_init_global(struct intel_gt *gt) struct drm_i915_mocs_table table; unsigned int index; + /* + * LLC and eDRAM control values are not applicable to dgfx + */ + if (IS_DGFX(gt->i915)) + return; + GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915)); if (!get_mocs_settings(gt->i915, &table)) From 7be8782a502fdb60e1f99c82a8d6b3015a2a6bc4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 24 Oct 2019 12:51:22 -0700 Subject: [PATCH 070/154] drm/i915: split gen11_irq_handler to make it shareable Split gen11_irq_handler() to receive as parameter the function pointers. This allows to share the interrupt handler even if the enable/disable functions are different. Make sure it's always inlined to avoid the extra indirect call on the hot path. Checking with gcc 9 this produce the exact same code as of now: $ size drivers/gpu/drm/i915/i915_irq*.o text data bss dec hex filename 47511 560 0 48071 bbc7 drivers/gpu/drm/i915/i915_irq.o 47511 560 0 48071 bbc7 drivers/gpu/drm/i915/i915_irq_new.o $ gdb -batch -ex 'file drivers/gpu/drm/i915/i915_irq.o' -ex 'disassemble gen11_irq_handler' > /tmp/old.s $ gdb -batch -ex 'file drivers/gpu/drm/i915/i915_irq_new.o' -ex 'disassemble gen11_irq_handler' > /tmp/new.s $ git diff --no-index /tmp/{old,new}.s $ So, no change in behavior, just a simple refactor. Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191024195122.22877-4-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a048c79a6a55..33020c8ca5f6 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2861,9 +2861,11 @@ static inline void gen11_master_intr_enable(void __iomem * const regs) raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, GEN11_MASTER_IRQ); } -static irqreturn_t gen11_irq_handler(int irq, void *arg) +static __always_inline irqreturn_t +__gen11_irq_handler(struct drm_i915_private * const i915, + u32 (*intr_disable)(void __iomem * const regs), + void (*intr_enable)(void __iomem * const regs)) { - struct drm_i915_private * const i915 = arg; void __iomem * const regs = i915->uncore.regs; struct intel_gt *gt = &i915->gt; u32 master_ctl; @@ -2872,9 +2874,9 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg) if (!intel_irqs_enabled(i915)) return IRQ_NONE; - master_ctl = gen11_master_intr_disable(regs); + master_ctl = intr_disable(regs); if (!master_ctl) { - gen11_master_intr_enable(regs); + intr_enable(regs); return IRQ_NONE; } @@ -2896,13 +2898,20 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg) gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); - gen11_master_intr_enable(regs); + intr_enable(regs); gen11_gu_misc_irq_handler(gt, gu_misc_iir); return IRQ_HANDLED; } +static irqreturn_t gen11_irq_handler(int irq, void *arg) +{ + return __gen11_irq_handler(arg, + gen11_master_intr_disable, + gen11_master_intr_enable); +} + /* Called from drm generic code, passed 'crtc' which * we use as a pipe index */ From b908be543e4441476916f2ae36cebc95cb187436 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Oct 2019 16:37:22 +0100 Subject: [PATCH 071/154] drm/i915: support creating LMEM objects We currently define LMEM, or local memory, as just another memory region, like system memory or stolen, which we can expose to userspace and can be mapped to the CPU via some BAR. Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Abdiel Janulgue Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191025153728.23689-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 2 + drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 57 +++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 29 ++++++++++ drivers/gpu/drm/i915/i915_drv.h | 3 + drivers/gpu/drm/i915/intel_region_lmem.c | 16 ++++++ drivers/gpu/drm/i915/intel_region_lmem.h | 11 ++++ .../drm/i915/selftests/i915_live_selftests.h | 1 + .../drm/i915/selftests/intel_memory_region.c | 40 +++++++++++++ 8 files changed, 159 insertions(+) create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_lmem.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_lmem.h create mode 100644 drivers/gpu/drm/i915/intel_region_lmem.c create mode 100644 drivers/gpu/drm/i915/intel_region_lmem.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 9bf1c0b20370..2f64db45f8e0 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -120,6 +120,7 @@ gem-y += \ gem/i915_gem_internal.o \ gem/i915_gem_object.o \ gem/i915_gem_object_blt.o \ + gem/i915_gem_lmem.o \ gem/i915_gem_mman.o \ gem/i915_gem_pages.o \ gem/i915_gem_phys.o \ @@ -148,6 +149,7 @@ i915-y += \ i915_scheduler.o \ i915_trace_points.o \ i915_vma.o \ + intel_region_lmem.o \ intel_wopcm.o # general-purpose microcontroller (GuC) support diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c new file mode 100644 index 000000000000..6a38d8028110 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "intel_memory_region.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_lmem.h" +#include "i915_drv.h" + +const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = { + .get_pages = i915_gem_object_get_pages_buddy, + .put_pages = i915_gem_object_put_pages_buddy, + .release = i915_gem_object_release_memory_region, +}; + +bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) +{ + return obj->ops == &i915_gem_lmem_obj_ops; +} + +struct drm_i915_gem_object * +i915_gem_object_create_lmem(struct drm_i915_private *i915, + resource_size_t size, + unsigned int flags) +{ + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], + size, flags); +} + +struct drm_i915_gem_object * +__i915_gem_lmem_object_create(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + static struct lock_class_key lock_class; + struct drm_i915_private *i915 = mem->i915; + struct drm_i915_gem_object *obj; + + if (size > BIT(mem->mm.max_order) * mem->mm.chunk_size) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class); + + obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT; + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + + i915_gem_object_init_memory_region(obj, mem, flags); + + return obj; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h new file mode 100644 index 000000000000..fc3f15580fe3 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_LMEM_H +#define __I915_GEM_LMEM_H + +#include + +struct drm_i915_private; +struct drm_i915_gem_object; +struct intel_memory_region; + +extern const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops; + +bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); + +struct drm_i915_gem_object * +i915_gem_object_create_lmem(struct drm_i915_private *i915, + resource_size_t size, + unsigned int flags); + +struct drm_i915_gem_object * +__i915_gem_lmem_object_create(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); + +#endif /* !__I915_GEM_LMEM_H */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 84dc3703c116..c3b2f29942f9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -101,6 +101,8 @@ #include "i915_vma.h" #include "i915_irq.h" +#include "intel_region_lmem.h" + #include "intel_gvt.h" /* General customization: @@ -1789,6 +1791,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc) #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) +#define HAS_LMEM(i915) HAS_REGION(i915, REGION_LMEM) #define HAS_GT_UC(dev_priv) (INTEL_INFO(dev_priv)->has_gt_uc) diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/intel_region_lmem.c new file mode 100644 index 000000000000..199532056e1b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_region_lmem.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_memory_region.h" +#include "gem/i915_gem_lmem.h" +#include "gem/i915_gem_region.h" +#include "intel_region_lmem.h" + +const struct intel_memory_region_ops intel_region_lmem_ops = { + .init = intel_memory_region_init_buddy, + .release = intel_memory_region_release_buddy, + .create_object = __i915_gem_lmem_object_create, +}; diff --git a/drivers/gpu/drm/i915/intel_region_lmem.h b/drivers/gpu/drm/i915/intel_region_lmem.h new file mode 100644 index 000000000000..ed2a3bab6443 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_region_lmem.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_REGION_LMEM_H +#define __INTEL_REGION_LMEM_H + +extern const struct intel_memory_region_ops intel_region_lmem_ops; + +#endif /* !__INTEL_REGION_LMEM_H */ diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 00a063730bc3..4b3cac73e291 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -33,6 +33,7 @@ selftest(gem_contexts, i915_gem_context_live_selftests) selftest(blt, i915_gem_object_blt_live_selftests) selftest(client, i915_gem_client_blt_live_selftests) selftest(reset, intel_reset_live_selftests) +selftest(memory_region, intel_memory_region_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) selftest(execlists, intel_execlists_live_selftests) selftest(guc, intel_guc_live_selftest) diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 56091e7e599e..617a35cfac2f 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -11,8 +11,10 @@ #include "mock_gem_device.h" #include "mock_region.h" +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gem/selftests/mock_context.h" +#include "gt/intel_gt.h" #include "selftests/i915_random.h" static void close_objects(struct intel_memory_region *mem, @@ -252,6 +254,27 @@ err_close_objects: return err; } +static int igt_lmem_create(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err = 0; + + obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + int intel_memory_region_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -280,3 +303,20 @@ out_unref: drm_dev_put(&i915->drm); return err; } + +int intel_memory_region_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_lmem_create), + }; + + if (!HAS_LMEM(i915)) { + pr_info("device lacks LMEM support, skipping\n"); + return 0; + } + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_live_subtests(tests, i915); +} From cb6d2467ace7fb50203ff3900c31d01c7670ff6f Mon Sep 17 00:00:00 2001 From: Abdiel Janulgue Date: Fri, 25 Oct 2019 16:37:23 +0100 Subject: [PATCH 072/154] drm/i915: setup io-mapping for LMEM Create an io-mapping to describe the CPU aperture for lmem. Signed-off-by: Abdiel Janulgue Cc: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191025153728.23689-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_region_lmem.c | 28 ++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/intel_region_lmem.c index 199532056e1b..9a351af45ce6 100644 --- a/drivers/gpu/drm/i915/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/intel_region_lmem.c @@ -9,8 +9,32 @@ #include "gem/i915_gem_region.h" #include "intel_region_lmem.h" +static void +region_lmem_release(struct intel_memory_region *mem) +{ + io_mapping_fini(&mem->iomap); + intel_memory_region_release_buddy(mem); +} + +static int +region_lmem_init(struct intel_memory_region *mem) +{ + int ret; + + if (!io_mapping_init_wc(&mem->iomap, + mem->io_start, + resource_size(&mem->region))) + return -EIO; + + ret = intel_memory_region_init_buddy(mem); + if (ret) + io_mapping_fini(&mem->iomap); + + return ret; +} + const struct intel_memory_region_ops intel_region_lmem_ops = { - .init = intel_memory_region_init_buddy, - .release = intel_memory_region_release_buddy, + .init = region_lmem_init, + .release = region_lmem_release, .create_object = __i915_gem_lmem_object_create, }; From 01377a0d7e6648b050588cf1a6f71a5d8e6ad2b4 Mon Sep 17 00:00:00 2001 From: Abdiel Janulgue Date: Fri, 25 Oct 2019 16:37:24 +0100 Subject: [PATCH 073/154] drm/i915/lmem: support kernel mapping We can create LMEM objects, but we also need to support mapping them into kernel space for internal use. Signed-off-by: Abdiel Janulgue Signed-off-by: Matthew Auld Signed-off-by: Steve Hampson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191025153728.23689-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 39 ++++++ drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 9 +- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 41 ++++--- .../drm/i915/selftests/intel_memory_region.c | 113 ++++++++++++++++++ 5 files changed, 192 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 6a38d8028110..926f6c940e0d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -9,11 +9,50 @@ #include "i915_drv.h" const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = { + .flags = I915_GEM_OBJECT_HAS_IOMEM, + .get_pages = i915_gem_object_get_pages_buddy, .put_pages = i915_gem_object_put_pages_buddy, .release = i915_gem_object_release_memory_region, }; +/* XXX: Time to vfunc your life up? */ +void __iomem * +i915_gem_object_lmem_io_map_page(struct drm_i915_gem_object *obj, + unsigned long n) +{ + resource_size_t offset; + + offset = i915_gem_object_get_dma_address(obj, n); + + return io_mapping_map_wc(&obj->mm.region->iomap, offset, PAGE_SIZE); +} + +void __iomem * +i915_gem_object_lmem_io_map_page_atomic(struct drm_i915_gem_object *obj, + unsigned long n) +{ + resource_size_t offset; + + offset = i915_gem_object_get_dma_address(obj, n); + + return io_mapping_map_atomic_wc(&obj->mm.region->iomap, offset); +} + +void __iomem * +i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, + unsigned long n, + unsigned long size) +{ + resource_size_t offset; + + GEM_BUG_ON(!i915_gem_object_is_contiguous(obj)); + + offset = i915_gem_object_get_dma_address(obj, n); + + return io_mapping_map_wc(&obj->mm.region->iomap, offset, size); +} + bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) { return obj->ops == &i915_gem_lmem_obj_ops; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index fc3f15580fe3..7c176b8b7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -14,6 +14,14 @@ struct intel_memory_region; extern const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops; +void __iomem *i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, + unsigned long n, unsigned long size); +void __iomem *i915_gem_object_lmem_io_map_page(struct drm_i915_gem_object *obj, + unsigned long n); +void __iomem * +i915_gem_object_lmem_io_map_page_atomic(struct drm_i915_gem_object *obj, + unsigned long n); + bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index a387e3ee728b..96008374a412 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -31,10 +31,11 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) -#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) -#define I915_GEM_OBJECT_IS_PROXY BIT(2) -#define I915_GEM_OBJECT_NO_GGTT BIT(3) -#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(4) +#define I915_GEM_OBJECT_HAS_IOMEM BIT(1) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2) +#define I915_GEM_OBJECT_IS_PROXY BIT(3) +#define I915_GEM_OBJECT_NO_GGTT BIT(4) +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(5) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index b0ec0959c13f..29f4c2850745 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -7,6 +7,7 @@ #include "i915_drv.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" +#include "i915_gem_lmem.h" void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, @@ -154,6 +155,16 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) rcu_read_unlock(); } +static void unmap_object(struct drm_i915_gem_object *obj, void *ptr) +{ + if (i915_gem_object_is_lmem(obj)) + io_mapping_unmap((void __force __iomem *)ptr); + else if (is_vmalloc_addr(ptr)) + vunmap(ptr); + else + kunmap(kmap_to_page(ptr)); +} + struct sg_table * __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) { @@ -169,14 +180,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) i915_gem_object_make_unshrinkable(obj); if (obj->mm.mapping) { - void *ptr; - - ptr = page_mask_bits(obj->mm.mapping); - if (is_vmalloc_addr(ptr)) - vunmap(ptr); - else - kunmap(kmap_to_page(ptr)); - + unmap_object(obj, page_mask_bits(obj->mm.mapping)); obj->mm.mapping = NULL; } @@ -231,7 +235,7 @@ unlock: } /* The 'mapping' part of i915_gem_object_pin_map() below */ -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, +static void *i915_gem_object_map(struct drm_i915_gem_object *obj, enum i915_map_type type) { unsigned long n_pages = obj->base.size >> PAGE_SHIFT; @@ -244,6 +248,16 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, pgprot_t pgprot; void *addr; + if (i915_gem_object_is_lmem(obj)) { + void __iomem *io; + + if (type != I915_MAP_WC) + return NULL; + + io = i915_gem_object_lmem_io_map(obj, 0, obj->base.size); + return (void __force *)io; + } + /* A single page can always be kmapped */ if (n_pages == 1 && type == I915_MAP_WB) return kmap(sg_page(sgt->sgl)); @@ -285,11 +299,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, enum i915_map_type type) { enum i915_map_type has_type; + unsigned int flags; bool pinned; void *ptr; int err; - if (unlikely(!i915_gem_object_has_struct_page(obj))) + flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_HAS_IOMEM; + if (!i915_gem_object_type_has(obj, flags)) return ERR_PTR(-ENXIO); err = mutex_lock_interruptible(&obj->mm.lock); @@ -321,10 +337,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, goto err_unpin; } - if (is_vmalloc_addr(ptr)) - vunmap(ptr); - else - kunmap(kmap_to_page(ptr)); + unmap_object(obj, ptr); ptr = obj->mm.mapping = NULL; } diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 617a35cfac2f..2e0f91fac85d 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -13,8 +13,10 @@ #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" +#include "gem/i915_gem_object_blt.h" #include "gem/selftests/mock_context.h" #include "gt/intel_gt.h" +#include "selftests/igt_flush_test.h" #include "selftests/i915_random.h" static void close_objects(struct intel_memory_region *mem, @@ -275,6 +277,116 @@ out_put: return err; } +static int igt_lmem_write_cpu(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + u32 bytes[] = { + 0, /* rng placeholder */ + sizeof(u32), + sizeof(u64), + 64, /* cl */ + PAGE_SIZE, + PAGE_SIZE - sizeof(u32), + PAGE_SIZE - sizeof(u64), + PAGE_SIZE - 64, + }; + u32 *vaddr; + u32 sz; + u32 i; + int *order; + int count; + int err; + + if (!HAS_ENGINE(i915, BCS0)) + return 0; + + sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); + sz = max_t(u32, 2 * PAGE_SIZE, sz); + + obj = i915_gem_object_create_lmem(i915, sz, I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_put; + } + + /* Put the pages into a known state -- from the gpu for added fun */ + err = i915_gem_object_fill_blt(obj, i915->engine[BCS0]->kernel_context, + 0xdeadbeaf); + if (err) + goto out_unpin; + + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_wc_domain(obj, true); + i915_gem_object_unlock(obj); + if (err) + goto out_unpin; + + count = ARRAY_SIZE(bytes); + order = i915_random_order(count * count, &prng); + if (!order) { + err = -ENOMEM; + goto out_unpin; + } + + /* We want to throw in a random width/align */ + bytes[0] = igt_random_offset(&prng, 0, PAGE_SIZE, sizeof(u32), + sizeof(u32)); + + i = 0; + do { + u32 offset; + u32 align; + u32 dword; + u32 size; + u32 val; + + size = bytes[order[i] % count]; + i = (i + 1) % (count * count); + + align = bytes[order[i] % count]; + i = (i + 1) % (count * count); + + align = max_t(u32, sizeof(u32), rounddown_pow_of_two(align)); + + offset = igt_random_offset(&prng, 0, obj->base.size, + size, align); + + val = prandom_u32_state(&prng); + memset32(vaddr + offset / sizeof(u32), val ^ 0xdeadbeaf, + size / sizeof(u32)); + + /* + * Sample random dw -- don't waste precious time reading every + * single dw. + */ + dword = igt_random_offset(&prng, offset, + offset + size, + sizeof(u32), sizeof(u32)); + dword /= sizeof(u32); + if (vaddr[dword] != (val ^ 0xdeadbeaf)) { + pr_err("%s vaddr[%u]=%u, val=%u, size=%u, align=%u, offset=%u\n", + __func__, dword, vaddr[dword], val ^ 0xdeadbeaf, + size, align, offset); + err = -EINVAL; + break; + } + } while (!__igt_timeout(end_time, NULL)); + +out_unpin: + i915_gem_object_unpin_map(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + int intel_memory_region_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -308,6 +420,7 @@ int intel_memory_region_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_lmem_create), + SUBTEST(igt_lmem_write_cpu), }; if (!HAS_LMEM(i915)) { From 340be48f2c5a3c099ee4a20586f706422d453417 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Oct 2019 16:37:25 +0100 Subject: [PATCH 074/154] drm/i915/selftests: add write-dword test for LMEM Simple test writing to dwords across an object, using various engines in a randomized order, checking that our writes land from the cpu. Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191025153728.23689-4-chris@chris-wilson.co.uk --- .../drm/i915/selftests/intel_memory_region.c | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 2e0f91fac85d..8bc6fadd14fb 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -11,9 +11,11 @@ #include "mock_gem_device.h" #include "mock_region.h" +#include "gem/i915_gem_context.h" #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_object_blt.h" +#include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" #include "gt/intel_gt.h" #include "selftests/igt_flush_test.h" @@ -256,6 +258,125 @@ err_close_objects: return err; } +static int igt_gpu_write_dw(struct intel_context *ce, + struct i915_vma *vma, + u32 dword, + u32 value) +{ + return igt_gpu_fill_dw(ce, vma, dword * sizeof(u32), + vma->size >> PAGE_SHIFT, value); +} + +static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + unsigned long n; + int err; + + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_wc_domain(obj, false); + i915_gem_object_unlock(obj); + if (err) + return err; + + err = i915_gem_object_pin_pages(obj); + if (err) + return err; + + for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { + u32 __iomem *base; + u32 read_val; + + base = i915_gem_object_lmem_io_map_page_atomic(obj, n); + + read_val = ioread32(base + dword); + io_mapping_unmap_atomic(base); + if (read_val != val) { + pr_err("n=%lu base[%u]=%u, val=%u\n", + n, dword, read_val, val); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_pages(obj); + return err; +} + +static int igt_gpu_write(struct i915_gem_context *ctx, + struct drm_i915_gem_object *obj) +{ + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + struct i915_address_space *vm; + struct intel_context *ce; + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + unsigned int count; + struct i915_vma *vma; + int *order; + int i, n; + int err = 0; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + + n = 0; + count = 0; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + count++; + if (!intel_engine_can_store_dword(ce->engine)) + continue; + + vm = ce->vm; + n++; + } + i915_gem_context_unlock_engines(ctx); + if (!n) + return 0; + + order = i915_random_order(count * count, &prng); + if (!order) + return -ENOMEM; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_free; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_free; + + i = 0; + engines = i915_gem_context_lock_engines(ctx); + do { + u32 rng = prandom_u32_state(&prng); + u32 dword = offset_in_page(rng) / 4; + + ce = engines->engines[order[i] % engines->num_engines]; + i = (i + 1) % (count * count); + if (!ce || !intel_engine_can_store_dword(ce->engine)) + continue; + + err = igt_gpu_write_dw(ce, vma, dword, rng); + if (err) + break; + + err = igt_cpu_check(obj, dword, rng); + if (err) + break; + } while (!__igt_timeout(end_time, NULL)); + i915_gem_context_unlock_engines(ctx); + +out_free: + kfree(order); + + if (err == -ENOMEM) + err = 0; + + return err; +} + static int igt_lmem_create(void *arg) { struct drm_i915_private *i915 = arg; @@ -277,6 +398,50 @@ out_put: return err; } +static int igt_lmem_write_gpu(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + struct drm_file *file; + I915_RND_STATE(prng); + u32 sz; + int err; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; + } + + sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); + + obj = i915_gem_object_create_lmem(i915, sz, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_file; + } + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + err = igt_gpu_write(ctx, obj); + if (err) + pr_err("igt_gpu_write failed(%d)\n", err); + + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); +out_file: + mock_file_free(i915, file); + return err; +} + static int igt_lmem_write_cpu(void *arg) { struct drm_i915_private *i915 = arg; @@ -421,6 +586,7 @@ int intel_memory_region_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(igt_lmem_create), SUBTEST(igt_lmem_write_cpu), + SUBTEST(igt_lmem_write_gpu), }; if (!HAS_LMEM(i915)) { From 23741bc81de98e4223a77cd970e858f249bc0fae Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Oct 2019 16:37:26 +0100 Subject: [PATCH 075/154] drm/i915/selftests: extend coverage to include LMEM huge-pages Add LMEM objects to list of backends we test for huge-GTT-pages. Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191025153728.23689-5-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/selftests/huge_pages.c | 123 +++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index dac8344507c1..fa134a82083d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -9,6 +9,7 @@ #include "i915_selftest.h" #include "gem/i915_gem_region.h" +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" @@ -983,7 +984,8 @@ static int gpu_write(struct intel_context *ce, vma->size >> PAGE_SHIFT, val); } -static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +static int +__cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) { unsigned int needs_flush; unsigned long n; @@ -1015,6 +1017,51 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) return err; } +static int __cpu_check_lmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + unsigned long n; + int err; + + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_wc_domain(obj, false); + i915_gem_object_unlock(obj); + if (err) + return err; + + err = i915_gem_object_pin_pages(obj); + if (err) + return err; + + for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { + u32 __iomem *base; + u32 read_val; + + base = i915_gem_object_lmem_io_map_page_atomic(obj, n); + + read_val = ioread32(base + dword); + io_mapping_unmap_atomic(base); + if (read_val != val) { + pr_err("n=%lu base[%u]=%u, val=%u\n", + n, dword, read_val, val); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_pages(obj); + return err; +} + +static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + if (i915_gem_object_has_struct_page(obj)) + return __cpu_check_shmem(obj, dword, val); + else if (i915_gem_object_is_lmem(obj)) + return __cpu_check_lmem(obj, dword, val); + + return -ENODEV; +} + static int __igt_write_huge(struct intel_context *ce, struct drm_i915_gem_object *obj, u64 size, u64 offset, @@ -1399,6 +1446,79 @@ out_put: return err; } +static int igt_ppgtt_lmem_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + static const unsigned int sizes[] = { + SZ_64K, + SZ_512K, + SZ_1M, + SZ_2M, + }; + int i; + int err; + + if (!HAS_LMEM(i915)) { + pr_info("device lacks LMEM support, skipping\n"); + return 0; + } + + /* + * Sanity check that the HW uses huge pages correctly through LMEM + * -- ensure that our writes land in the right place. + */ + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; + + obj = i915_gem_object_create_lmem(i915, size, + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err == -E2BIG) { + pr_info("object too big for region!\n"); + return 0; + } + + return err; + } + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { + pr_info("LMEM unable to allocate huge-page(s) with size=%u\n", + size); + goto out_unpin; + } + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("LMEM write-huge failed with size=%u\n", size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + if (err == -ENOMEM) + err = 0; + + return err; +} + static int igt_ppgtt_pin_update(void *arg) { struct i915_gem_context *ctx = arg; @@ -1760,6 +1880,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_exhaust_huge), SUBTEST(igt_ppgtt_gemfs_huge), SUBTEST(igt_ppgtt_internal_huge), + SUBTEST(igt_ppgtt_lmem_huge), }; struct drm_file *file; struct i915_gem_context *ctx; From 11d723ceb2979953711e8a6c74e4786018e0b108 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Oct 2019 16:37:27 +0100 Subject: [PATCH 076/154] drm/i915/selftests: prefer random sizes for the huge-GTT-page smoke tests Ditch the dubious static list of sizes to enumerate, in favour of choosing a random size within the limits of each backing store. With repeated CI runs this should give us a wider range of object sizes, and in turn more page-size combinations, while using less machine time. Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191025153728.23689-6-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/selftests/huge_pages.c | 245 +++++++----------- 1 file changed, 88 insertions(+), 157 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index fa134a82083d..b777999655b0 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1317,204 +1317,137 @@ out_device: return err; } -static int igt_ppgtt_internal_huge(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; - static const unsigned int sizes[] = { - SZ_64K, - SZ_128K, - SZ_256K, - SZ_512K, - SZ_1M, - SZ_2M, - }; - int i; - int err; - - /* - * Sanity check that the HW uses huge pages correctly through internal - * -- ensure that our writes land in the right place. - */ - - for (i = 0; i < ARRAY_SIZE(sizes); ++i) { - unsigned int size = sizes[i]; - - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_put; - - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { - pr_info("internal unable to allocate huge-page(s) with size=%u\n", - size); - goto out_unpin; - } - - err = igt_write_huge(ctx, obj); - if (err) { - pr_err("internal write-huge failed with size=%u\n", - size); - goto out_unpin; - } - - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } - - return 0; - -out_unpin: - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); - - return err; -} +typedef struct drm_i915_gem_object * +(*igt_create_fn)(struct drm_i915_private *i915, u32 size, u32 flags); static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) { return i915->mm.gemfs && has_transparent_hugepage(); } -static int igt_ppgtt_gemfs_huge(void *arg) +static struct drm_i915_gem_object * +igt_create_shmem(struct drm_i915_private *i915, u32 size, u32 flags) { - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; - static const unsigned int sizes[] = { - SZ_2M, - SZ_4M, - SZ_8M, - SZ_16M, - SZ_32M, - }; - int i; - int err; - - /* - * Sanity check that the HW uses huge pages correctly through gemfs -- - * ensure that our writes land in the right place. - */ - if (!igt_can_allocate_thp(i915)) { - pr_info("missing THP support, skipping\n"); - return 0; + pr_info("%s missing THP support, skipping\n", __func__); + return ERR_PTR(-ENODEV); } - for (i = 0; i < ARRAY_SIZE(sizes); ++i) { - unsigned int size = sizes[i]; - - obj = i915_gem_object_create_shmem(i915, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_put; - - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { - pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n", - size); - goto out_unpin; - } - - err = igt_write_huge(ctx, obj); - if (err) { - pr_err("gemfs write-huge failed with size=%u\n", - size); - goto out_unpin; - } - - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } - - return 0; - -out_unpin: - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); - - return err; + return i915_gem_object_create_shmem(i915, size); } -static int igt_ppgtt_lmem_huge(void *arg) +static struct drm_i915_gem_object * +igt_create_internal(struct drm_i915_private *i915, u32 size, u32 flags) +{ + return i915_gem_object_create_internal(i915, size); +} + +static struct drm_i915_gem_object * +igt_create_local(struct drm_i915_private *i915, u32 size, u32 flags) +{ + return i915_gem_object_create_lmem(i915, size, flags); +} + +static u32 igt_random_size(struct rnd_state *prng, + u32 min_page_size, + u32 max_page_size) +{ + u64 mask; + u32 size; + + GEM_BUG_ON(!is_power_of_2(min_page_size)); + GEM_BUG_ON(!is_power_of_2(max_page_size)); + GEM_BUG_ON(min_page_size < PAGE_SIZE); + GEM_BUG_ON(min_page_size > max_page_size); + + mask = ((max_page_size << 1ULL) - 1) & PAGE_MASK; + size = prandom_u32_state(prng) & mask; + if (size < min_page_size) + size |= min_page_size; + + return size; +} + +static int igt_ppgtt_smoke_huge(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; - static const unsigned int sizes[] = { - SZ_64K, - SZ_512K, - SZ_1M, - SZ_2M, + I915_RND_STATE(prng); + struct { + igt_create_fn fn; + u32 min; + u32 max; + } backends[] = { + { igt_create_internal, SZ_64K, SZ_2M, }, + { igt_create_shmem, SZ_64K, SZ_32M, }, + { igt_create_local, SZ_64K, SZ_1G, }, }; - int i; int err; - - if (!HAS_LMEM(i915)) { - pr_info("device lacks LMEM support, skipping\n"); - return 0; - } + int i; /* - * Sanity check that the HW uses huge pages correctly through LMEM - * -- ensure that our writes land in the right place. + * Sanity check that the HW uses huge pages correctly through our + * various backends -- ensure that our writes land in the right place. */ - for (i = 0; i < ARRAY_SIZE(sizes); ++i) { - unsigned int size = sizes[i]; + for (i = 0; i < ARRAY_SIZE(backends); ++i) { + u32 min = backends[i].min; + u32 max = backends[i].max; + u32 size = max; +try_again: + size = igt_random_size(&prng, min, rounddown_pow_of_two(size)); - obj = i915_gem_object_create_lmem(i915, size, - I915_BO_ALLOC_CONTIGUOUS); + obj = backends[i].fn(i915, size, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); if (err == -E2BIG) { - pr_info("object too big for region!\n"); - return 0; + size >>= 1; + goto try_again; + } else if (err == -ENODEV) { + err = 0; + continue; } return err; } err = i915_gem_object_pin_pages(obj); - if (err) + if (err) { + if (err == -ENXIO) { + i915_gem_object_put(obj); + size >>= 1; + goto try_again; + } goto out_put; + } - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { - pr_info("LMEM unable to allocate huge-page(s) with size=%u\n", - size); + if (obj->mm.page_sizes.phys < min) { + pr_info("%s unable to allocate huge-page(s) with size=%u, i=%d\n", + __func__, size, i); + err = -ENOMEM; goto out_unpin; } err = igt_write_huge(ctx, obj); if (err) { - pr_err("LMEM write-huge failed with size=%u\n", size); - goto out_unpin; + pr_err("%s write-huge failed with size=%u, i=%d\n", + __func__, size, i); } - +out_unpin: i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } - - return 0; - -out_unpin: - i915_gem_object_unpin_pages(obj); out_put: - i915_gem_object_put(obj); + i915_gem_object_put(obj); - if (err == -ENOMEM) - err = 0; + if (err == -ENOMEM || err == -ENXIO) + err = 0; + + if (err) + break; + + cond_resched(); + } return err; } @@ -1878,9 +1811,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_pin_update), SUBTEST(igt_tmpfs_fallback), SUBTEST(igt_ppgtt_exhaust_huge), - SUBTEST(igt_ppgtt_gemfs_huge), - SUBTEST(igt_ppgtt_internal_huge), - SUBTEST(igt_ppgtt_lmem_huge), + SUBTEST(igt_ppgtt_smoke_huge), }; struct drm_file *file; struct i915_gem_context *ctx; From dd158d71a08510e9a39fd1fd5e1f8e9d404bcb7f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Oct 2019 16:37:28 +0100 Subject: [PATCH 077/154] drm/i915/selftests: add sanity selftest for huge-GTT-pages Now that for all the relevant backends we do randomised testing, we need to make sure we still sanity check the obvious cases that might blow up, such that introducing a temporary regression is less likely. Also rather than do this for every backend, just limit to our two memory types: system and local. Suggested-by: Chris Wilson Signed-off-by: Matthew Auld Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191025153728.23689-7-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/selftests/huge_pages.c | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index b777999655b0..688c49a24f32 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1342,6 +1342,12 @@ igt_create_internal(struct drm_i915_private *i915, u32 size, u32 flags) return i915_gem_object_create_internal(i915, size); } +static struct drm_i915_gem_object * +igt_create_system(struct drm_i915_private *i915, u32 size, u32 flags) +{ + return huge_pages_object(i915, size, size); +} + static struct drm_i915_gem_object * igt_create_local(struct drm_i915_private *i915, u32 size, u32 flags) { @@ -1452,6 +1458,98 @@ out_put: return err; } +static int igt_ppgtt_sanity_check(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + unsigned int supported = INTEL_INFO(i915)->page_sizes; + struct { + igt_create_fn fn; + unsigned int flags; + } backends[] = { + { igt_create_system, 0, }, + { igt_create_local, I915_BO_ALLOC_CONTIGUOUS, }, + }; + struct { + u32 size; + u32 pages; + } combos[] = { + { SZ_64K, SZ_64K }, + { SZ_2M, SZ_2M }, + { SZ_2M, SZ_64K }, + { SZ_2M - SZ_64K, SZ_64K }, + { SZ_2M - SZ_4K, SZ_64K | SZ_4K }, + { SZ_2M + SZ_4K, SZ_64K | SZ_4K }, + { SZ_2M + SZ_4K, SZ_2M | SZ_4K }, + { SZ_2M + SZ_64K, SZ_2M | SZ_64K }, + }; + int i, j; + int err; + + if (supported == I915_GTT_PAGE_SIZE_4K) + return 0; + + /* + * Sanity check that the HW behaves with a limited set of combinations. + * We already have a bunch of randomised testing, which should give us + * a decent amount of variation between runs, however we should keep + * this to limit the chances of introducing a temporary regression, by + * testing the most obvious cases that might make something blow up. + */ + + for (i = 0; i < ARRAY_SIZE(backends); ++i) { + for (j = 0; j < ARRAY_SIZE(combos); ++j) { + struct drm_i915_gem_object *obj; + u32 size = combos[j].size; + u32 pages = combos[j].pages; + + obj = backends[i].fn(i915, size, backends[i].flags); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err == -ENODEV) { + pr_info("Device lacks local memory, skipping\n"); + err = 0; + break; + } + + return err; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + GEM_BUG_ON(pages > obj->base.size); + pages = pages & supported; + + if (pages) + obj->mm.page_sizes.sg = pages; + + err = igt_write_huge(ctx, obj); + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + + if (err) { + pr_err("%s write-huge failed with size=%u pages=%u i=%d, j=%d\n", + __func__, size, pages, i, j); + goto out; + } + } + + cond_resched(); + } + +out: + if (err == -ENOMEM) + err = 0; + + return err; +} + static int igt_ppgtt_pin_update(void *arg) { struct i915_gem_context *ctx = arg; @@ -1812,6 +1910,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_tmpfs_fallback), SUBTEST(igt_ppgtt_exhaust_huge), SUBTEST(igt_ppgtt_smoke_huge), + SUBTEST(igt_ppgtt_sanity_check), }; struct drm_file *file; struct i915_gem_context *ctx; From 0e99f939f08fc36820869e87dee109c5996abff0 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Oct 2019 18:25:11 +0100 Subject: [PATCH 078/154] drm/i915/selftests/blt: add some kthreads into the mix We can be more aggressive in our testing by launching a number of kthreads, where each is submitting its own copy or fill batches on a set of random sized objects. Also since the underlying fill and copy batches can be pre-empted mid-batch(for particularly large objects), throw in a random mixture of ctx priorities per thread to make pre-emption a possibility. Signed-off-by: Matthew Auld Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191025172511.25742-1-matthew.auld@intel.com --- .../i915/gem/selftests/i915_gem_object_blt.c | 180 +++++++++++++++--- 1 file changed, 154 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index 9ec55b3a3815..9666c0aeb6de 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -7,36 +7,67 @@ #include "i915_selftest.h" +#include "gem/i915_gem_context.h" #include "selftests/igt_flush_test.h" +#include "selftests/i915_random.h" #include "selftests/mock_drm.h" #include "huge_gem_object.h" #include "mock_context.h" -static int igt_fill_blt(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_context *ce = i915->engine[BCS0]->kernel_context; - struct drm_i915_gem_object *obj; +struct igt_thread_arg { + struct drm_i915_private *i915; struct rnd_state prng; + unsigned int n_cpus; +}; + +static int igt_fill_blt_thread(void *arg) +{ + struct igt_thread_arg *thread = arg; + struct drm_i915_private *i915 = thread->i915; + struct rnd_state *prng = &thread->prng; + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct drm_file *file; + unsigned int prio; IGT_TIMEOUT(end); - u32 *vaddr; - int err = 0; + int err; - prandom_seed_state(&prng, i915_selftest.random_seed); + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); - /* - * XXX: needs some threads to scale all these tests, also maybe throw - * in submission from higher priority context to see if we are - * preempted for very large objects... - */ + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; + } + + prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); + ctx->sched.priority = I915_USER_PRIORITY(prio); + + ce = i915_gem_context_get_engine(ctx, BCS0); + GEM_BUG_ON(IS_ERR(ce)); do { const u32 max_block_size = S16_MAX * PAGE_SIZE; - u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); - u32 phys_sz = sz % (max_block_size + 1); - u32 val = prandom_u32_state(&prng); + u32 val = prandom_u32_state(prng); + u64 total = ce->vm->total; + u32 phys_sz; + u32 sz; + u32 *vaddr; u32 i; + /* + * If we have a tiny shared address space, like for the GGTT + * then we can't be too greedy. + */ + if (i915_is_ggtt(ce->vm)) + total = div64_u64(total, thread->n_cpus); + + sz = min_t(u64, total >> 4, prandom_u32_state(prng)); + phys_sz = sz % (max_block_size + 1); + sz = round_up(sz, PAGE_SIZE); phys_sz = round_up(phys_sz, PAGE_SIZE); @@ -98,28 +129,56 @@ err_flush: if (err == -ENOMEM) err = 0; + intel_context_put(ce); +out_file: + mock_file_free(i915, file); return err; } -static int igt_copy_blt(void *arg) +static int igt_copy_blt_thread(void *arg) { - struct drm_i915_private *i915 = arg; - struct intel_context *ce = i915->engine[BCS0]->kernel_context; + struct igt_thread_arg *thread = arg; + struct drm_i915_private *i915 = thread->i915; + struct rnd_state *prng = &thread->prng; struct drm_i915_gem_object *src, *dst; - struct rnd_state prng; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct drm_file *file; + unsigned int prio; IGT_TIMEOUT(end); - u32 *vaddr; - int err = 0; + int err; - prandom_seed_state(&prng, i915_selftest.random_seed); + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; + } + + prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); + ctx->sched.priority = I915_USER_PRIORITY(prio); + + ce = i915_gem_context_get_engine(ctx, BCS0); + GEM_BUG_ON(IS_ERR(ce)); do { const u32 max_block_size = S16_MAX * PAGE_SIZE; - u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); - u32 phys_sz = sz % (max_block_size + 1); - u32 val = prandom_u32_state(&prng); + u32 val = prandom_u32_state(prng); + u64 total = ce->vm->total; + u32 phys_sz; + u32 sz; + u32 *vaddr; u32 i; + if (i915_is_ggtt(ce->vm)) + total = div64_u64(total, thread->n_cpus); + + sz = min_t(u64, total >> 4, prandom_u32_state(prng)); + phys_sz = sz % (max_block_size + 1); + sz = round_up(sz, PAGE_SIZE); phys_sz = round_up(phys_sz, PAGE_SIZE); @@ -201,9 +260,78 @@ err_flush: if (err == -ENOMEM) err = 0; + intel_context_put(ce); +out_file: + mock_file_free(i915, file); return err; } +static int igt_threaded_blt(struct drm_i915_private *i915, + int (*blt_fn)(void *arg)) +{ + struct igt_thread_arg *thread; + struct task_struct **tsk; + I915_RND_STATE(prng); + unsigned int n_cpus; + unsigned int i; + int err = 0; + + n_cpus = num_online_cpus() + 1; + + tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL); + if (!tsk) + return 0; + + thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL); + if (!thread) { + kfree(tsk); + return 0; + } + + for (i = 0; i < n_cpus; ++i) { + thread[i].i915 = i915; + thread[i].n_cpus = n_cpus; + thread[i].prng = + I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); + + tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i); + if (IS_ERR(tsk[i])) { + err = PTR_ERR(tsk[i]); + break; + } + + get_task_struct(tsk[i]); + } + + for (i = 0; i < n_cpus; ++i) { + int status; + + if (IS_ERR_OR_NULL(tsk[i])) + continue; + + status = kthread_stop(tsk[i]); + if (status && !err) + err = status; + + put_task_struct(tsk[i]); + } + + kfree(tsk); + kfree(thread); + + return err; +} + +static int igt_fill_blt(void *arg) +{ + return igt_threaded_blt(arg, igt_fill_blt_thread); +} + +static int igt_copy_blt(void *arg) +{ + return igt_threaded_blt(arg, igt_copy_blt_thread); +} + int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { From c442292a661bec3a32cf2a351c53c5f07da20e21 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Oct 2019 17:54:42 +0100 Subject: [PATCH 079/154] drm/i915/pmu: Initialise the spinlock before registering As the GT may be running in parallel with the module initialisation code, we may enter i915_pmu_gt_parked() as we are executing i915_pmu_register(). We have to init the spinlock before we mark pmu.event_init so that it is available for use by i915_pmu_gt_parked() (which may run as soon as event_init is set). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112127 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191025165442.23356-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_pmu.c | 37 +++++++++++++++++---------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 85912917c062..b5b67c0624ff 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -1101,20 +1101,6 @@ void i915_pmu_register(struct drm_i915_private *i915) return; } - i915_pmu_events_attr_group.attrs = create_event_attributes(pmu); - if (!i915_pmu_events_attr_group.attrs) - goto err; - - pmu->base.attr_groups = i915_pmu_attr_groups; - pmu->base.task_ctx_nr = perf_invalid_context; - pmu->base.event_init = i915_pmu_event_init; - pmu->base.add = i915_pmu_event_add; - pmu->base.del = i915_pmu_event_del; - pmu->base.start = i915_pmu_event_start; - pmu->base.stop = i915_pmu_event_stop; - pmu->base.read = i915_pmu_event_read; - pmu->base.event_idx = i915_pmu_event_event_idx; - spin_lock_init(&pmu->lock); hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; @@ -1128,9 +1114,23 @@ void i915_pmu_register(struct drm_i915_private *i915) if (!pmu->name) goto err; + i915_pmu_events_attr_group.attrs = create_event_attributes(pmu); + if (!i915_pmu_events_attr_group.attrs) + goto err_name; + + pmu->base.attr_groups = i915_pmu_attr_groups; + pmu->base.task_ctx_nr = perf_invalid_context; + pmu->base.event_init = i915_pmu_event_init; + pmu->base.add = i915_pmu_event_add; + pmu->base.del = i915_pmu_event_del; + pmu->base.start = i915_pmu_event_start; + pmu->base.stop = i915_pmu_event_stop; + pmu->base.read = i915_pmu_event_read; + pmu->base.event_idx = i915_pmu_event_event_idx; + ret = perf_pmu_register(&pmu->base, pmu->name, -1); if (ret) - goto err_name; + goto err_attr; ret = i915_pmu_register_cpuhp_state(pmu); if (ret) @@ -1140,13 +1140,14 @@ void i915_pmu_register(struct drm_i915_private *i915) err_unreg: perf_pmu_unregister(&pmu->base); +err_attr: + pmu->base.event_init = NULL; + free_event_attributes(pmu); err_name: if (!is_igp(i915)) kfree(pmu->name); err: - pmu->base.event_init = NULL; - free_event_attributes(pmu); - DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); + dev_notice(i915->drm.dev, "Failed to register PMU!\n"); } void i915_pmu_unregister(struct drm_i915_private *i915) From 2d69c42e373fa1ae2d4e27f7b11e61978d07c6d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Mon, 21 Oct 2019 15:34:08 -0700 Subject: [PATCH 080/154] drm/i915/tc: Clear DKL_TX_PMD_LANE_SUS before program voltage swing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This sequence was recently added to fix internal HW sequences to reset TC ports. HSDES: 1507287614 HSDES: 14010071447 BSpec: 49292 Reviewed-by: Lucas De Marchi Cc: Lucas De Marchi Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20191021223408.87344-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 2 ++ drivers/gpu/drm/i915/i915_reg.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 1a49266f4f57..c596b1f74fee 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2838,6 +2838,8 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, for (ln = 0; ln < 2; ln++) { I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, ln)); + I915_WRITE(DKL_TX_PMD_LANE_SUS(tc_port), 0); + /* All the registers are RMW */ val = I915_READ(DKL_TX_DPCNTL0(tc_port)); val &= ~dpcnt_mask; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 746326784a4d..7f04b6689ee0 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -10260,6 +10260,12 @@ enum skl_power_gate { _DKL_PHY2_BASE) + \ _DKL_TX_FW_CALIB) +#define _DKL_TX_PMD_LANE_SUS 0xD00 +#define DKL_TX_PMD_LANE_SUS(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_PMD_LANE_SUS) + #define _DKL_TX_DW17 0xDC4 #define DKL_TX_DW17(tc_port) _MMIO(_PORT(tc_port, \ _DKL_PHY1_BASE, \ From babaab2f473817f173a2d08e410c25abf5ed0f6b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Oct 2019 14:59:42 +0100 Subject: [PATCH 081/154] drm/i915: Encapsulate kconfig constant values inside boolean predicates Avoid angering clang and smatch by using a constant value in a '&&' test, by forcing that constant value into a boolean. E.g., drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:159:13: warning: use of logical '&&' with constant operand [-Wconstant-logical-operand] if (!delay && CONFIG_DRM_I915_PREEMPT_TIMEOUT) { ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Reported-by: kbuild test robot Signed-off-by: Chris Wilson Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Jani Nikula Reviewed-by: Nathan Chancellor Link: https://patchwork.freedesktop.org/patch/msgid/20191025135943.12524-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 3 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 4 ++-- drivers/gpu/drm/i915/i915_request.c | 2 +- drivers/gpu/drm/i915/i915_utils.h | 13 +++++++++++++ 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 55f1f93c0925..414fc55c9dd0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -325,7 +325,8 @@ static bool __cancel_engine(struct intel_engine_cs *engine) * kill the banned context, we fallback to doing a local reset * instead. */ - if (CONFIG_DRM_I915_PREEMPT_TIMEOUT && !intel_engine_pulse(engine)) + if (IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT) && + !intel_engine_pulse(engine)) return true; /* If we are unable to send a pulse, try resetting this engine. */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index fd4122d8c0a9..e3002849844b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -312,7 +312,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) list_add(&obj->userfault_link, &i915->ggtt.userfault_list); mutex_unlock(&i915->ggtt.vm.mutex); - if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) + if (IS_ACTIVE(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 9977f59f6b53..5051f304705b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -133,7 +133,7 @@ out: void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine) { - if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) + if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) return; next_heartbeat(engine); @@ -156,7 +156,7 @@ int intel_engine_set_heartbeat(struct intel_engine_cs *engine, int err; /* Send one last pulse before to cleanup persistent hogs */ - if (!delay && CONFIG_DRM_I915_PREEMPT_TIMEOUT) { + if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) { err = intel_engine_pulse(engine); if (err) return err; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 932c5cf190b5..19a1d447ab8d 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1447,7 +1447,7 @@ long i915_request_wait(struct i915_request *rq, * completion. That requires having a good predictor for the request * duration, which we currently lack. */ - if (CONFIG_DRM_I915_SPIN_REQUEST && + if (IS_ACTIVE(CONFIG_DRM_I915_SPIN_REQUEST) && __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) { dma_fence_signal(&rq->fence); goto out; diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 94f136d8a5fd..00b55252ef51 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -430,4 +430,17 @@ static inline bool timer_expired(const struct timer_list *t) return READ_ONCE(t->expires) && !timer_pending(t); } +/* + * This is a lookalike for IS_ENABLED() that takes a kconfig value, + * e.g. CONFIG_DRM_I915_SPIN_REQUEST, and evaluates whether it is non-zero + * i.e. whether the configuration is active. Wrapping up the config inside + * a boolean context prevents clang and smatch from complaining about potential + * issues in confusing logical-&& with bitwise-& for constants. + * + * Sadly IS_ENABLED() itself does not work with kconfig values. + * + * Returns 0 if @config is 0, 1 if set to any value. + */ +#define IS_ACTIVE(config) ((config) != 0) + #endif /* !__I915_UTILS_H */ From 35865aef057ca3e8da69a91679341a11633def92 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 26 Oct 2019 09:22:20 +0100 Subject: [PATCH 082/154] drm/i915/tgl: Adjust the location of RING_MI_MODE in the context image The location of RING_MI_MODE (used to stop the ring across resets) moved for Tigerlake. Fixup the new location and include a selftest to verify the location in the default context image. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191026082220.32632-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 20 ++++++-- drivers/gpu/drm/i915/gt/selftest_lrc.c | 70 ++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 523de1fd4452..16340740139d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2935,14 +2935,28 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) &execlists->csb_status[reset_value]); } +static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) +{ + if (INTEL_GEN(engine->i915) >= 12) + return 0x60; + else if (INTEL_GEN(engine->i915) >= 9) + return 0x54; + else if (engine->class == RENDER_CLASS) + return 0x58; + else + return -1; +} + static void __execlists_reset_reg_state(const struct intel_context *ce, const struct intel_engine_cs *engine) { u32 *regs = ce->lrc_reg_state; + int x; - if (INTEL_GEN(engine->i915) >= 9) { - regs[GEN9_CTX_RING_MI_MODE + 1] &= ~STOP_RING; - regs[GEN9_CTX_RING_MI_MODE + 1] |= STOP_RING << 16; + x = lrc_ring_mi_mode(engine); + if (x != -1) { + regs[x + 1] &= ~STOP_RING; + regs[x + 1] |= STOP_RING << 16; } } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index d5d268be554e..ba7fc4397bd9 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -3165,6 +3165,75 @@ static int live_lrc_layout(void *arg) return err; } +static int find_offset(const u32 *lri, u32 offset) +{ + int i; + + for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) + if (lri[i] == offset) + return i; + + return -1; +} + +static int live_lrc_fixed(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * Check the assumed register offsets match the actual locations in + * the context image. + */ + + for_each_engine(engine, gt, id) { + const struct { + u32 reg; + u32 offset; + const char *name; + } tbl[] = { + { + i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), + lrc_ring_mi_mode(engine), + "RING_MI_MODE", + }, + { }, + }, *t; + u32 *hw; + + if (!engine->default_state) + continue; + + hw = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(hw)) { + err = PTR_ERR(hw); + break; + } + hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + + for (t = tbl; t->name; t++) { + int dw = find_offset(hw, t->reg); + + if (dw != t->offset) { + pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", + engine->name, + t->name, + t->reg, + dw, + t->offset); + err = -EINVAL; + } + } + + i915_gem_object_unpin_map(engine->default_state); + } + + return err; +} + static int __live_lrc_state(struct i915_gem_context *fixme, struct intel_engine_cs *engine, struct i915_vma *scratch) @@ -3437,6 +3506,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_lrc_layout), + SUBTEST(live_lrc_fixed), SUBTEST(live_lrc_state), SUBTEST(live_gpr_clear), }; From 3e7abf8141935ded77abeb622480bf4a14241ece Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 24 Oct 2019 22:16:41 +0100 Subject: [PATCH 083/154] drm/i915: Extract GT render power state management i915_irq.c is large. One reason for this is that has a large chunk of the GT render power management stashed away in it. Extract that logic out of i915_irq.c and intel_pm.c and put it under one roof. Based on a patch by Chris Wilson. Signed-off-by: Andi Shyti Cc: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191024211642.7688-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/display/intel_display.c | 8 +- drivers/gpu/drm/i915/gt/intel_gt.c | 13 +- drivers/gpu/drm/i915/gt/intel_gt_irq.c | 5 +- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 28 +- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 1 - drivers/gpu/drm/i915/gt/intel_gt_types.h | 2 + drivers/gpu/drm/i915/gt/intel_llc.c | 2 +- drivers/gpu/drm/i915/gt/intel_rps.c | 1872 +++++++++++++++++ drivers/gpu/drm/i915/gt/intel_rps.h | 37 + drivers/gpu/drm/i915/gt/intel_rps_types.h | 93 + drivers/gpu/drm/i915/gt/selftest_llc.c | 7 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 +- drivers/gpu/drm/i915/i915_debugfs.c | 73 +- drivers/gpu/drm/i915/i915_drv.c | 3 - drivers/gpu/drm/i915/i915_drv.h | 96 - drivers/gpu/drm/i915/i915_gem.c | 3 +- drivers/gpu/drm/i915/i915_irq.c | 359 +--- drivers/gpu/drm/i915/i915_irq.h | 3 - drivers/gpu/drm/i915/i915_pmu.c | 10 +- drivers/gpu/drm/i915/i915_request.c | 7 +- drivers/gpu/drm/i915/i915_sysfs.c | 74 +- drivers/gpu/drm/i915/intel_pm.c | 1713 --------------- drivers/gpu/drm/i915/intel_pm.h | 22 - 24 files changed, 2126 insertions(+), 2310 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_rps.c create mode 100644 drivers/gpu/drm/i915/gt/intel_rps.h create mode 100644 drivers/gpu/drm/i915/gt/intel_rps_types.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 2f64db45f8e0..526f881325c8 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -95,6 +95,7 @@ gt-y += \ gt/intel_reset.o \ gt/intel_ring.o \ gt/intel_ring_submission.o \ + gt/intel_rps.o \ gt/intel_sseu.o \ gt/intel_timeline.o \ gt/intel_workarounds.o diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index cbf9cf30050c..0f0c582a56d5 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -55,6 +55,8 @@ #include "display/intel_tv.h" #include "display/intel_vdsc.h" +#include "gt/intel_rps.h" + #include "i915_drv.h" #include "i915_trace.h" #include "intel_acpi.h" @@ -14944,7 +14946,7 @@ static int do_rps_boost(struct wait_queue_entry *_wait, * vblank without our intervention, so leave RPS alone. */ if (!i915_request_started(rq)) - gen6_rps_boost(rq); + intel_rps_boost(rq); i915_request_put(rq); drm_crtc_vblank_put(wait->crtc); @@ -15138,7 +15140,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, * maximum clocks following a vblank miss (see do_rps_boost()). */ if (!intel_state->rps_interactive) { - intel_rps_mark_interactive(dev_priv, true); + intel_rps_mark_interactive(&dev_priv->gt.rps, true); intel_state->rps_interactive = true; } @@ -15163,7 +15165,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->dev); if (intel_state->rps_interactive) { - intel_rps_mark_interactive(dev_priv, false); + intel_rps_mark_interactive(&dev_priv->gt.rps, false); intel_state->rps_interactive = false; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 35127699591d..f2440113fdf8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -9,6 +9,7 @@ #include "intel_gt_requests.h" #include "intel_mocs.h" #include "intel_rc6.h" +#include "intel_rps.h" #include "intel_uncore.h" #include "intel_pm.h" @@ -31,9 +32,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) void intel_gt_init_hw_early(struct drm_i915_private *i915) { i915->gt.ggtt = &i915->ggtt; - - /* BIOS often leaves RC6 enabled, but disable it for hw init */ - intel_gt_pm_disable(&i915->gt); } static void init_unused_ring(struct intel_gt *gt, u32 base) @@ -320,8 +318,7 @@ void intel_gt_chipset_flush(struct intel_gt *gt) void intel_gt_driver_register(struct intel_gt *gt) { - if (IS_GEN(gt->i915, 5)) - intel_gpu_ips_init(gt->i915); + intel_rps_driver_register(>->rps); } static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) @@ -379,20 +376,16 @@ int intel_gt_init(struct intel_gt *gt) void intel_gt_driver_remove(struct intel_gt *gt) { GEM_BUG_ON(gt->awake); - intel_gt_pm_disable(gt); } void intel_gt_driver_unregister(struct intel_gt *gt) { - intel_gpu_ips_teardown(); + intel_rps_driver_unregister(>->rps); } void intel_gt_driver_release(struct intel_gt *gt) { - /* Paranoia: make sure we have disabled everything before we exit. */ - intel_gt_pm_disable(gt); intel_gt_pm_fini(gt); - intel_gt_fini_scratch(gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 34a4fb624bf7..973ee7eded64 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -11,6 +11,7 @@ #include "intel_gt.h" #include "intel_gt_irq.h" #include "intel_uncore.h" +#include "intel_rps.h" static void guc_irq_handler(struct intel_guc *guc, u16 iir) { @@ -77,7 +78,7 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, return guc_irq_handler(>->uc.guc, iir); if (instance == OTHER_GTPM_INSTANCE) - return gen11_rps_irq_handler(gt, iir); + return gen11_rps_irq_handler(>->rps, iir); WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", instance, iir); @@ -336,7 +337,7 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]) } if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { - gen6_rps_irq_handler(gt->i915, gt_iir[2]); + gen6_rps_irq_handler(>->rps, gt_iir[2]); guc_irq_handler(>->uc.guc, gt_iir[2] >> 16); } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index aff4eda47819..32becf15d4e0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -12,8 +12,10 @@ #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" +#include "intel_llc.h" #include "intel_pm.h" #include "intel_rc6.h" +#include "intel_rps.h" #include "intel_wakeref.h" static int __gt_unpark(struct intel_wakeref *wf) @@ -39,12 +41,7 @@ static int __gt_unpark(struct intel_wakeref *wf) gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); GEM_BUG_ON(!gt->awake); - intel_enable_gt_powersave(i915); - - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); - + intel_rps_unpark(>->rps); i915_pmu_gt_unparked(i915); intel_gt_unpark_requests(gt); @@ -64,8 +61,7 @@ static int __gt_park(struct intel_wakeref *wf) i915_vma_parked(gt); i915_pmu_gt_parked(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_idle(i915); + intel_rps_park(>->rps); /* Everything switched off, flush any residual interrupt just in case */ intel_synchronize_irq(i915); @@ -97,6 +93,7 @@ void intel_gt_pm_init(struct intel_gt *gt) * user. */ intel_rc6_init(>->rc6); + intel_rps_init(>->rps); } static bool reset_engines(struct intel_gt *gt) @@ -140,12 +137,6 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) engine->reset.finish(engine); } -void intel_gt_pm_disable(struct intel_gt *gt) -{ - if (!is_mock_gt(gt)) - intel_sanitize_gt_powersave(gt->i915); -} - void intel_gt_pm_fini(struct intel_gt *gt) { intel_rc6_fini(>->rc6); @@ -164,9 +155,13 @@ int intel_gt_resume(struct intel_gt *gt) * allowing us to fixup the user contexts on their first pin. */ intel_gt_pm_get(gt); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); intel_rc6_sanitize(>->rc6); + intel_rps_enable(>->rps); + intel_llc_enable(>->llc); + for_each_engine(engine, gt, id) { struct intel_context *ce; @@ -217,8 +212,11 @@ void intel_gt_suspend(struct intel_gt *gt) /* We expect to be idle already; but also want to be independent */ wait_for_idle(gt); - with_intel_runtime_pm(gt->uncore->rpm, wakeref) + with_intel_runtime_pm(gt->uncore->rpm, wakeref) { + intel_rps_disable(>->rps); intel_rc6_disable(>->rc6); + intel_llc_disable(>->llc); + } } void intel_gt_runtime_suspend(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 0ed87da4bb68..d924c984c74d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -39,7 +39,6 @@ static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) void intel_gt_pm_init_early(struct intel_gt *gt); void intel_gt_pm_init(struct intel_gt *gt); -void intel_gt_pm_disable(struct intel_gt *gt); void intel_gt_pm_fini(struct intel_gt *gt); void intel_gt_sanitize(struct intel_gt *gt, bool force); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index fcb3a6c9421c..d4e14dbd172e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -20,6 +20,7 @@ #include "intel_llc_types.h" #include "intel_reset_types.h" #include "intel_rc6_types.h" +#include "intel_rps_types.h" #include "intel_wakeref.h" struct drm_i915_private; @@ -73,6 +74,7 @@ struct intel_gt { struct intel_llc llc; struct intel_rc6 rc6; + struct intel_rps rps; ktime_t last_init_time; diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c index 35093eb5f24e..ceb785b75c25 100644 --- a/drivers/gpu/drm/i915/gt/intel_llc.c +++ b/drivers/gpu/drm/i915/gt/intel_llc.c @@ -48,7 +48,7 @@ static bool get_ia_constants(struct intel_llc *llc, struct ia_constants *consts) { struct drm_i915_private *i915 = llc_to_gt(llc)->i915; - struct intel_rps *rps = &i915->gt_pm.rps; + struct intel_rps *rps = &llc_to_gt(llc)->rps; if (rps->max_freq <= rps->min_freq) return false; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c new file mode 100644 index 000000000000..30f56c786468 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -0,0 +1,1872 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_gt_pm_irq.h" +#include "intel_rps.h" +#include "intel_sideband.h" +#include "../../../platform/x86/intel_ips.h" + +/* + * Lock protecting IPS related data structures + */ +static DEFINE_SPINLOCK(mchdev_lock); + +static struct intel_gt *rps_to_gt(struct intel_rps *rps) +{ + return container_of(rps, struct intel_gt, rps); +} + +static struct drm_i915_private *rps_to_i915(struct intel_rps *rps) +{ + return rps_to_gt(rps)->i915; +} + +static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) +{ + return rps_to_gt(rps)->uncore; +} + +static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) +{ + return mask & ~rps->pm_intrmsk_mbz; +} + +static u32 rps_pm_mask(struct intel_rps *rps, u8 val) +{ + u32 mask = 0; + + /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */ + if (val > rps->min_freq_softlimit) + mask |= (GEN6_PM_RP_UP_EI_EXPIRED | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + + if (val < rps->max_freq_softlimit) + mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; + + mask &= rps->pm_events; + + return rps_pm_sanitize_mask(rps, ~mask); +} + +static void rps_reset_ei(struct intel_rps *rps) +{ + memset(&rps->ei, 0, sizeof(rps->ei)); +} + +static void rps_enable_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + rps_reset_ei(rps); + + if (IS_VALLEYVIEW(gt->i915)) + /* WaGsvRC0ResidencyMethod:vlv */ + rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; + else + rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_enable_irq(gt, rps->pm_events); + spin_unlock_irq(>->irq_lock); + + intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, + rps_pm_mask(rps, rps->cur_freq)); +} + +static void gen6_rps_reset_interrupts(struct intel_rps *rps) +{ + gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS); +} + +static void gen11_rps_reset_interrupts(struct intel_rps *rps) +{ + while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM)) + ; +} + +static void rps_reset_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + spin_lock_irq(>->irq_lock); + if (INTEL_GEN(gt->i915) >= 11) + gen11_rps_reset_interrupts(rps); + else + gen6_rps_reset_interrupts(rps); + + rps->pm_iir = 0; + spin_unlock_irq(>->irq_lock); +} + +static void rps_disable_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + rps->pm_events = 0; + + intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, + rps_pm_sanitize_mask(rps, ~0u)); + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); + spin_unlock_irq(>->irq_lock); + + intel_synchronize_irq(gt->i915); + + /* + * Now that we will not be generating any more work, flush any + * outstanding tasks. As we are called on the RPS idle path, + * we will reset the GPU to minimum frequencies, so the current + * state of the worker can be discarded. + */ + cancel_work_sync(&rps->work); + + rps_reset_interrupts(rps); +} + +static const struct cparams { + u16 i; + u16 t; + u16 m; + u16 c; +} cparams[] = { + { 1, 1333, 301, 28664 }, + { 1, 1066, 294, 24460 }, + { 1, 800, 294, 25192 }, + { 0, 1333, 276, 27605 }, + { 0, 1066, 276, 27605 }, + { 0, 800, 231, 23784 }, +}; + +static void gen5_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + u8 fmax, fmin, fstart; + u32 rgvmodectl; + int c_m, i; + + if (i915->fsb_freq <= 3200) + c_m = 0; + else if (i915->fsb_freq <= 4800) + c_m = 1; + else + c_m = 2; + + for (i = 0; i < ARRAY_SIZE(cparams); i++) { + if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) { + rps->ips.m = cparams[i].m; + rps->ips.c = cparams[i].c; + break; + } + } + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + + /* Set up min, max, and cur for interrupt handling */ + fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; + fmin = (rgvmodectl & MEMMODE_FMIN_MASK); + fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> + MEMMODE_FSTART_SHIFT; + DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", + fmax, fmin, fstart); + + rps->min_freq = -fstart; + rps->max_freq = -fmin; + + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; +} + +static unsigned long +__ips_chipset_val(struct intel_ips *ips) +{ + struct intel_uncore *uncore = + rps_to_uncore(container_of(ips, struct intel_rps, ips)); + unsigned long now = jiffies_to_msecs(jiffies), dt; + unsigned long result; + u64 total, delta; + + lockdep_assert_held(&mchdev_lock); + + /* + * Prevent division-by-zero if we are asking too fast. + * Also, we don't get interesting results if we are polling + * faster than once in 10ms, so just return the saved value + * in such cases. + */ + dt = now - ips->last_time1; + if (dt <= 10) + return ips->chipset_power; + + /* FIXME: handle per-counter overflow */ + total = intel_uncore_read(uncore, DMIEC); + total += intel_uncore_read(uncore, DDREC); + total += intel_uncore_read(uncore, CSIEC); + + delta = total - ips->last_count1; + + result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10); + + ips->last_count1 = total; + ips->last_time1 = now; + + ips->chipset_power = result; + + return result; +} + +static unsigned long ips_mch_val(struct intel_uncore *uncore) +{ + unsigned int m, x, b; + u32 tsfs; + + tsfs = intel_uncore_read(uncore, TSFS); + x = intel_uncore_read8(uncore, TR1); + + b = tsfs & TSFS_INTR_MASK; + m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT; + + return m * x / 127 - b; +} + +static int _pxvid_to_vd(u8 pxvid) +{ + if (pxvid == 0) + return 0; + + if (pxvid >= 8 && pxvid < 31) + pxvid = 31; + + return (pxvid + 2) * 125; +} + +static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid) +{ + const int vd = _pxvid_to_vd(pxvid); + + if (INTEL_INFO(i915)->is_mobile) + return max(vd - 1125, 0); + + return vd; +} + +static void __gen5_ips_update(struct intel_ips *ips) +{ + struct intel_uncore *uncore = + rps_to_uncore(container_of(ips, struct intel_rps, ips)); + u64 now, delta, dt; + u32 count; + + lockdep_assert_held(&mchdev_lock); + + now = ktime_get_raw_ns(); + dt = now - ips->last_time2; + do_div(dt, NSEC_PER_MSEC); + + /* Don't divide by 0 */ + if (dt <= 10) + return; + + count = intel_uncore_read(uncore, GFXEC); + delta = count - ips->last_count2; + + ips->last_count2 = count; + ips->last_time2 = now; + + /* More magic constants... */ + ips->gfx_power = div_u64(delta * 1181, dt * 10); +} + +static void gen5_rps_update(struct intel_rps *rps) +{ + spin_lock_irq(&mchdev_lock); + __gen5_ips_update(&rps->ips); + spin_unlock_irq(&mchdev_lock); +} + +static bool gen5_rps_set(struct intel_rps *rps, u8 val) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u16 rgvswctl; + + lockdep_assert_held(&mchdev_lock); + + rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + if (rgvswctl & MEMCTL_CMD_STS) { + DRM_DEBUG("gpu busy, RCS change rejected\n"); + return false; /* still busy with another command */ + } + + val = -val; + + rgvswctl = + (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | + (val << MEMCTL_FREQ_SHIFT) | + MEMCTL_SFCAVM; + intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); + intel_uncore_posting_read16(uncore, MEMSWCTL); + + rgvswctl |= MEMCTL_CMD_STS; + intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); + + return true; +} + +static unsigned long intel_pxfreq(u32 vidfreq) +{ + int div = (vidfreq & 0x3f0000) >> 16; + int post = (vidfreq & 0x3000) >> 12; + int pre = (vidfreq & 0x7); + + if (!pre) + return 0; + + return div * 133333 / (pre << post); +} + +static unsigned int init_emon(struct intel_uncore *uncore) +{ + u8 pxw[16]; + int i; + + /* Disable to program */ + intel_uncore_write(uncore, ECR, 0); + intel_uncore_posting_read(uncore, ECR); + + /* Program energy weights for various events */ + intel_uncore_write(uncore, SDEW, 0x15040d00); + intel_uncore_write(uncore, CSIEW0, 0x007f0000); + intel_uncore_write(uncore, CSIEW1, 0x1e220004); + intel_uncore_write(uncore, CSIEW2, 0x04000004); + + for (i = 0; i < 5; i++) + intel_uncore_write(uncore, PEW(i), 0); + for (i = 0; i < 3; i++) + intel_uncore_write(uncore, DEW(i), 0); + + /* Program P-state weights to account for frequency power adjustment */ + for (i = 0; i < 16; i++) { + u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i)); + unsigned int freq = intel_pxfreq(pxvidfreq); + unsigned int vid = + (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; + unsigned int val; + + val = vid * vid * freq / 1000 * 255; + val /= 127 * 127 * 900; + + pxw[i] = val; + } + /* Render standby states get 0 weight */ + pxw[14] = 0; + pxw[15] = 0; + + for (i = 0; i < 4; i++) { + intel_uncore_write(uncore, PXW(i), + pxw[i * 4 + 0] << 24 | + pxw[i * 4 + 1] << 16 | + pxw[i * 4 + 2] << 8 | + pxw[i * 4 + 3] << 0); + } + + /* Adjust magic regs to magic values (more experimental results) */ + intel_uncore_write(uncore, OGW0, 0); + intel_uncore_write(uncore, OGW1, 0); + intel_uncore_write(uncore, EG0, 0x00007f00); + intel_uncore_write(uncore, EG1, 0x0000000e); + intel_uncore_write(uncore, EG2, 0x000e0000); + intel_uncore_write(uncore, EG3, 0x68000300); + intel_uncore_write(uncore, EG4, 0x42000000); + intel_uncore_write(uncore, EG5, 0x00140031); + intel_uncore_write(uncore, EG6, 0); + intel_uncore_write(uncore, EG7, 0); + + for (i = 0; i < 8; i++) + intel_uncore_write(uncore, PXWL(i), 0); + + /* Enable PMON + select events */ + intel_uncore_write(uncore, ECR, 0x80000019); + + return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK; +} + +static bool gen5_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u8 fstart, vstart; + u32 rgvmodectl; + + spin_lock_irq(&mchdev_lock); + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + + /* Enable temp reporting */ + intel_uncore_write16(uncore, PMMISC, + intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN); + intel_uncore_write16(uncore, TSC1, + intel_uncore_read16(uncore, TSC1) | TSE); + + /* 100ms RC evaluation intervals */ + intel_uncore_write(uncore, RCUPEI, 100000); + intel_uncore_write(uncore, RCDNEI, 100000); + + /* Set max/min thresholds to 90ms and 80ms respectively */ + intel_uncore_write(uncore, RCBMAXAVG, 90000); + intel_uncore_write(uncore, RCBMINAVG, 80000); + + intel_uncore_write(uncore, MEMIHYST, 1); + + /* Set up min, max, and cur for interrupt handling */ + fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> + MEMMODE_FSTART_SHIFT; + + vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & + PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; + + intel_uncore_write(uncore, + MEMINTREN, + MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); + + intel_uncore_write(uncore, VIDSTART, vstart); + intel_uncore_posting_read(uncore, VIDSTART); + + rgvmodectl |= MEMMODE_SWMODE_EN; + intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); + + if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & + MEMCTL_CMD_STS) == 0, 10)) + DRM_ERROR("stuck trying to change perf mode\n"); + mdelay(1); + + gen5_rps_set(rps, rps->cur_freq); + + rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); + rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); + rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC); + rps->ips.last_time1 = jiffies_to_msecs(jiffies); + + rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); + rps->ips.last_time2 = ktime_get_raw_ns(); + + spin_unlock_irq(&mchdev_lock); + + rps->ips.corr = init_emon(uncore); + + return true; +} + +static void gen5_rps_disable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u16 rgvswctl; + + spin_lock_irq(&mchdev_lock); + + rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + + /* Ack interrupts, disable EFC interrupt */ + intel_uncore_write(uncore, MEMINTREN, + intel_uncore_read(uncore, MEMINTREN) & + ~MEMINT_EVAL_CHG_EN); + intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); + intel_uncore_write(uncore, DEIER, + intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT); + intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT); + intel_uncore_write(uncore, DEIMR, + intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT); + + /* Go back to the starting frequency */ + gen5_rps_set(rps, rps->idle_freq); + mdelay(1); + rgvswctl |= MEMCTL_CMD_STS; + intel_uncore_write(uncore, MEMSWCTL, rgvswctl); + mdelay(1); + + spin_unlock_irq(&mchdev_lock); +} + +static u32 rps_limits(struct intel_rps *rps, u8 val) +{ + u32 limits; + + /* + * Only set the down limit when we've reached the lowest level to avoid + * getting more interrupts, otherwise leave this clear. This prevents a + * race in the hw when coming out of rc6: There's a tiny window where + * the hw runs at the minimal clock before selecting the desired + * frequency, if the down threshold expires in that window we will not + * receive a down interrupt. + */ + if (INTEL_GEN(rps_to_i915(rps)) >= 9) { + limits = rps->max_freq_softlimit << 23; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 14; + } else { + limits = rps->max_freq_softlimit << 24; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 16; + } + + return limits; +} + +static void rps_set_power(struct intel_rps *rps, int new_power) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 threshold_up = 0, threshold_down = 0; /* in % */ + u32 ei_up = 0, ei_down = 0; + + lockdep_assert_held(&rps->power.mutex); + + if (new_power == rps->power.mode) + return; + + /* Note the units here are not exactly 1us, but 1280ns. */ + switch (new_power) { + case LOW_POWER: + /* Upclock if more than 95% busy over 16ms */ + ei_up = 16000; + threshold_up = 95; + + /* Downclock if less than 85% busy over 32ms */ + ei_down = 32000; + threshold_down = 85; + break; + + case BETWEEN: + /* Upclock if more than 90% busy over 13ms */ + ei_up = 13000; + threshold_up = 90; + + /* Downclock if less than 75% busy over 32ms */ + ei_down = 32000; + threshold_down = 75; + break; + + case HIGH_POWER: + /* Upclock if more than 85% busy over 10ms */ + ei_up = 10000; + threshold_up = 85; + + /* Downclock if less than 60% busy over 32ms */ + ei_down = 32000; + threshold_down = 60; + break; + } + + /* When byt can survive without system hang with dynamic + * sw freq adjustments, this restriction can be lifted. + */ + if (IS_VALLEYVIEW(i915)) + goto skip_hw_write; + + intel_uncore_write(uncore, GEN6_RP_UP_EI, + GT_INTERVAL_FROM_US(i915, ei_up)); + intel_uncore_write(uncore, GEN6_RP_UP_THRESHOLD, + GT_INTERVAL_FROM_US(i915, + ei_up * threshold_up / 100)); + + intel_uncore_write(uncore, GEN6_RP_DOWN_EI, + GT_INTERVAL_FROM_US(i915, ei_down)); + intel_uncore_write(uncore, GEN6_RP_DOWN_THRESHOLD, + GT_INTERVAL_FROM_US(i915, + ei_down * threshold_down / 100)); + + intel_uncore_write(uncore, GEN6_RP_CONTROL, + (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + +skip_hw_write: + rps->power.mode = new_power; + rps->power.up_threshold = threshold_up; + rps->power.down_threshold = threshold_down; +} + +static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) +{ + int new_power; + + new_power = rps->power.mode; + switch (rps->power.mode) { + case LOW_POWER: + if (val > rps->efficient_freq + 1 && + val > rps->cur_freq) + new_power = BETWEEN; + break; + + case BETWEEN: + if (val <= rps->efficient_freq && + val < rps->cur_freq) + new_power = LOW_POWER; + else if (val >= rps->rp0_freq && + val > rps->cur_freq) + new_power = HIGH_POWER; + break; + + case HIGH_POWER: + if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && + val < rps->cur_freq) + new_power = BETWEEN; + break; + } + /* Max/min bins are special */ + if (val <= rps->min_freq_softlimit) + new_power = LOW_POWER; + if (val >= rps->max_freq_softlimit) + new_power = HIGH_POWER; + + mutex_lock(&rps->power.mutex); + if (rps->power.interactive) + new_power = HIGH_POWER; + rps_set_power(rps, new_power); + mutex_unlock(&rps->power.mutex); +} + +void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) +{ + if (!rps->enabled) + return; + + mutex_lock(&rps->power.mutex); + if (interactive) { + if (!rps->power.interactive++ && rps->active) + rps_set_power(rps, HIGH_POWER); + } else { + GEM_BUG_ON(!rps->power.interactive); + rps->power.interactive--; + } + mutex_unlock(&rps->power.mutex); +} + +static int gen6_rps_set(struct intel_rps *rps, u8 val) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 swreq; + + if (INTEL_GEN(i915) >= 9) + swreq = GEN9_FREQUENCY(val); + else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + swreq = HSW_FREQUENCY(val); + else + swreq = (GEN6_FREQUENCY(val) | + GEN6_OFFSET(0) | + GEN6_AGGRESSIVE_TURBO); + intel_uncore_write(uncore, GEN6_RPNSWREQ, swreq); + + return 0; +} + +static int vlv_rps_set(struct intel_rps *rps, u8 val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + int err; + + vlv_punit_get(i915); + err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); + vlv_punit_put(i915); + + return err; +} + +static int rps_set(struct intel_rps *rps, u8 val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + int err; + + if (INTEL_GEN(i915) < 6) + return 0; + + if (val == rps->last_freq) + return 0; + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + err = vlv_rps_set(rps, val); + else + err = gen6_rps_set(rps, val); + if (err) + return err; + + gen6_rps_set_thresholds(rps, val); + rps->last_freq = val; + + return 0; +} + +void intel_rps_unpark(struct intel_rps *rps) +{ + u8 freq; + + if (!rps->enabled) + return; + + /* + * Use the user's desired frequency as a guide, but for better + * performance, jump directly to RPe as our starting frequency. + */ + mutex_lock(&rps->lock); + rps->active = true; + freq = max(rps->cur_freq, rps->efficient_freq), + freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit); + intel_rps_set(rps, freq); + rps->last_adj = 0; + mutex_unlock(&rps->lock); + + if (INTEL_GEN(rps_to_i915(rps)) >= 6) + rps_enable_interrupts(rps); + + if (IS_GEN(rps_to_i915(rps), 5)) + gen5_rps_update(rps); +} + +void intel_rps_park(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (!rps->enabled) + return; + + if (INTEL_GEN(i915) >= 6) + rps_disable_interrupts(rps); + + rps->active = false; + if (rps->last_freq <= rps->idle_freq) + return; + + /* + * The punit delays the write of the frequency and voltage until it + * determines the GPU is awake. During normal usage we don't want to + * waste power changing the frequency if the GPU is sleeping (rc6). + * However, the GPU and driver is now idle and we do not want to delay + * switching to minimum voltage (reducing power whilst idle) as we do + * not expect to be woken in the near future and so must flush the + * change by waking the device. + * + * We choose to take the media powerwell (either would do to trick the + * punit into committing the voltage change) as that takes a lot less + * power than the render powerwell. + */ + intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); + rps_set(rps, rps->idle_freq); + intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); +} + +void intel_rps_boost(struct i915_request *rq) +{ + struct intel_rps *rps = &rq->engine->gt->rps; + unsigned long flags; + + if (i915_request_signaled(rq) || !rps->active) + return; + + /* Serializes with i915_request_retire() */ + spin_lock_irqsave(&rq->lock, flags); + if (!i915_request_has_waitboost(rq) && + !dma_fence_is_signaled_locked(&rq->fence)) { + rq->flags |= I915_REQUEST_WAITBOOST; + + if (!atomic_fetch_inc(&rps->num_waiters) && + READ_ONCE(rps->cur_freq) < rps->boost_freq) + schedule_work(&rps->work); + + atomic_inc(&rps->boosts); + } + spin_unlock_irqrestore(&rq->lock, flags); +} + +int intel_rps_set(struct intel_rps *rps, u8 val) +{ + int err = 0; + + lockdep_assert_held(&rps->lock); + GEM_BUG_ON(val > rps->max_freq); + GEM_BUG_ON(val < rps->min_freq); + + if (rps->active) { + err = rps_set(rps, val); + + /* + * Make sure we continue to get interrupts + * until we hit the minimum or maximum frequencies. + */ + if (INTEL_GEN(rps_to_i915(rps)) >= 6) { + struct intel_uncore *uncore = rps_to_uncore(rps); + + intel_uncore_write(uncore, GEN6_RP_INTERRUPT_LIMITS, + rps_limits(rps, val)); + + intel_uncore_write(uncore, GEN6_PMINTRMSK, + rps_pm_mask(rps, val)); + } + } + + if (err == 0) + rps->cur_freq = val; + + return err; +} + +static void gen6_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* All of these values are in units of 50MHz */ + + /* static values from HW: RP0 > RP1 > RPn (min_freq) */ + if (IS_GEN9_LP(i915)) { + u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); + + rps->rp0_freq = (rp_state_cap >> 16) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 0) & 0xff; + } else { + u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); + + rps->rp0_freq = (rp_state_cap >> 0) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 16) & 0xff; + } + + /* hw_max = RP0 until we check for overclocking */ + rps->max_freq = rps->rp0_freq; + + rps->efficient_freq = rps->rp1_freq; + if (IS_HASWELL(i915) || IS_BROADWELL(i915) || + IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + u32 ddcc_status = 0; + + if (sandybridge_pcode_read(i915, + HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, + &ddcc_status, NULL) == 0) + rps->efficient_freq = + clamp_t(u8, + (ddcc_status >> 8) & 0xff, + rps->min_freq, + rps->max_freq); + } + + if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + /* Store the frequency values in 16.66 MHZ units, which is + * the natural hardware unit for SKL + */ + rps->rp0_freq *= GEN9_FREQ_SCALER; + rps->rp1_freq *= GEN9_FREQ_SCALER; + rps->min_freq *= GEN9_FREQ_SCALER; + rps->max_freq *= GEN9_FREQ_SCALER; + rps->efficient_freq *= GEN9_FREQ_SCALER; + } +} + +static bool rps_reset(struct intel_rps *rps) +{ + /* force a reset */ + rps->power.mode = -1; + rps->last_freq = -1; + + if (rps_set(rps, rps->min_freq)) { + DRM_ERROR("Failed to reset RPS to initial values\n"); + return false; + } + + rps->cur_freq = rps->min_freq; + return true; +} + +/* See the Gen9_GT_PM_Programming_Guide doc for the below */ +static bool gen9_rps_enable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* Program defaults and thresholds for RPS */ + if (IS_GEN(i915, 9)) + intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, + GEN9_FREQUENCY(rps->rp1_freq)); + + /* 1 second timeout */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, + GT_INTERVAL_FROM_US(i915, 1000000)); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); + + return rps_reset(rps); +} + +static bool gen8_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, + HSW_FREQUENCY(rps->rp1_freq)); + + /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, + 100000000 / 128); /* 1 second timeout */ + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + return rps_reset(rps); +} + +static bool gen6_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* Power down if completely idle for over 50ms */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + return rps_reset(rps); +} + +static int chv_rps_max_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); + + switch (RUNTIME_INFO(i915)->sseu.eu_total) { + case 8: + /* (2 * 4) config */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT; + break; + case 12: + /* (2 * 6) config */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT; + break; + case 16: + /* (2 * 8) config */ + default: + /* Setting (2 * 8) Min RP0 for any other combination */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT; + break; + } + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static int chv_rps_rpe_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG); + val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT; + + return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; +} + +static int chv_rps_guar_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static u32 chv_rps_min_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE); + val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT; + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static bool chv_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + /* 1: Program defaults and thresholds for RPS*/ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + /* 2: Enable RPS */ + intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + + /* Setting Fixed Bias */ + vlv_punit_get(i915); + + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; + vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + + vlv_punit_put(i915); + + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); + + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + + return rps_reset(rps); +} + +static int vlv_rps_guar_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rp1; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); + + rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK; + rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; + + return rp1; +} + +static int vlv_rps_max_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rp0; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); + + rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; + /* Clamp to max */ + rp0 = min_t(u32, rp0, 0xea); + + return rp0; +} + +static int vlv_rps_rpe_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rpe; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO); + rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI); + rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; + + return rpe; +} + +static int vlv_rps_min_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff; + /* + * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value + * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on + * a BYT-M B0 the above register contains 0xbf. Moreover when setting + * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 + * to make sure it matches what Punit accepts. + */ + return max_t(u32, val, 0xc0); +} + +static bool vlv_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_CONT); + + vlv_punit_get(i915); + + /* Setting Fixed Bias */ + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; + vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + + vlv_punit_put(i915); + + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); + + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + + return rps_reset(rps); +} + +static unsigned long __ips_gfx_val(struct intel_ips *ips) +{ + struct intel_rps *rps = container_of(ips, typeof(*rps), ips); + struct intel_uncore *uncore = rps_to_uncore(rps); + unsigned long t, corr, state1, corr2, state2; + u32 pxvid, ext_v; + + lockdep_assert_held(&mchdev_lock); + + pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq)); + pxvid = (pxvid >> 24) & 0x7f; + ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid); + + state1 = ext_v; + + /* Revel in the empirically derived constants */ + + /* Correction factor in 1/100000 units */ + t = ips_mch_val(uncore); + if (t > 80) + corr = t * 2349 + 135940; + else if (t >= 50) + corr = t * 964 + 29317; + else /* < 50 */ + corr = t * 301 + 1004; + + corr = corr * 150142 * state1 / 10000 - 78642; + corr /= 100000; + corr2 = corr * ips->corr; + + state2 = corr2 * state1 / 10000; + state2 /= 100; /* convert to mW */ + + __gen5_ips_update(ips); + + return ips->gfx_power + state2; +} + +void intel_rps_enable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + if (IS_CHERRYVIEW(i915)) + rps->enabled = chv_rps_enable(rps); + else if (IS_VALLEYVIEW(i915)) + rps->enabled = vlv_rps_enable(rps); + else if (INTEL_GEN(i915) >= 9) + rps->enabled = gen9_rps_enable(rps); + else if (INTEL_GEN(i915) >= 8) + rps->enabled = gen8_rps_enable(rps); + else if (INTEL_GEN(i915) >= 6) + rps->enabled = gen6_rps_enable(rps); + else if (IS_IRONLAKE_M(i915)) + rps->enabled = gen5_rps_enable(rps); + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + if (!rps->enabled) + return; + + WARN_ON(rps->max_freq < rps->min_freq); + WARN_ON(rps->idle_freq > rps->max_freq); + + WARN_ON(rps->efficient_freq < rps->min_freq); + WARN_ON(rps->efficient_freq > rps->max_freq); +} + +static void gen6_rps_disable(struct intel_rps *rps) +{ + intel_uncore_write(rps_to_uncore(rps), GEN6_RP_CONTROL, 0); +} + +void intel_rps_disable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + rps->enabled = false; + + if (INTEL_GEN(i915) >= 6) + gen6_rps_disable(rps); + else if (IS_IRONLAKE_M(i915)) + gen5_rps_disable(rps); +} + +static int byt_gpu_freq(struct intel_rps *rps, int val) +{ + /* + * N = val - 0xb7 + * Slow = Fast = GPLL ref * N + */ + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); +} + +static int byt_freq_opcode(struct intel_rps *rps, int val) +{ + return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; +} + +static int chv_gpu_freq(struct intel_rps *rps, int val) +{ + /* + * N = val / 2 + * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 + */ + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); +} + +static int chv_freq_opcode(struct intel_rps *rps, int val) +{ + /* CHV needs even values */ + return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; +} + +int intel_gpu_freq(struct intel_rps *rps, int val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (INTEL_GEN(i915) >= 9) + return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, + GEN9_FREQ_SCALER); + else if (IS_CHERRYVIEW(i915)) + return chv_gpu_freq(rps, val); + else if (IS_VALLEYVIEW(i915)) + return byt_gpu_freq(rps, val); + else + return val * GT_FREQUENCY_MULTIPLIER; +} + +int intel_freq_opcode(struct intel_rps *rps, int val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (INTEL_GEN(i915) >= 9) + return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, + GT_FREQUENCY_MULTIPLIER); + else if (IS_CHERRYVIEW(i915)) + return chv_freq_opcode(rps, val); + else if (IS_VALLEYVIEW(i915)) + return byt_freq_opcode(rps, val); + else + return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); +} + +static void vlv_init_gpll_ref_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + rps->gpll_ref_freq = + vlv_get_cck_clock(i915, "GPLL ref", + CCK_GPLL_CLOCK_CONTROL, + i915->czclk_freq); + + DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq); +} + +static void vlv_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + vlv_iosf_sb_get(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + vlv_init_gpll_ref_freq(rps); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + switch ((val >> 6) & 3) { + case 0: + case 1: + i915->mem_freq = 800; + break; + case 2: + i915->mem_freq = 1066; + break; + case 3: + i915->mem_freq = 1333; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); + + rps->max_freq = vlv_rps_max_freq(rps); + rps->rp0_freq = rps->max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->max_freq), + rps->max_freq); + + rps->efficient_freq = vlv_rps_rpe_freq(rps); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->efficient_freq), + rps->efficient_freq); + + rps->rp1_freq = vlv_rps_guar_freq(rps); + DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->rp1_freq), + rps->rp1_freq); + + rps->min_freq = vlv_rps_min_freq(rps); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->min_freq), + rps->min_freq); + + vlv_iosf_sb_put(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); +} + +static void chv_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + vlv_iosf_sb_get(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + vlv_init_gpll_ref_freq(rps); + + val = vlv_cck_read(i915, CCK_FUSE_REG); + + switch ((val >> 2) & 0x7) { + case 3: + i915->mem_freq = 2000; + break; + default: + i915->mem_freq = 1600; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); + + rps->max_freq = chv_rps_max_freq(rps); + rps->rp0_freq = rps->max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->max_freq), + rps->max_freq); + + rps->efficient_freq = chv_rps_rpe_freq(rps); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->efficient_freq), + rps->efficient_freq); + + rps->rp1_freq = chv_rps_guar_freq(rps); + DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->rp1_freq), + rps->rp1_freq); + + rps->min_freq = chv_rps_min_freq(rps); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->min_freq), + rps->min_freq); + + vlv_iosf_sb_put(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | + rps->min_freq) & 1, + "Odd GPU freq values\n"); +} + +static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) +{ + ei->ktime = ktime_get_raw(); + ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT); + ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT); +} + +static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + const struct intel_rps_ei *prev = &rps->ei; + struct intel_rps_ei now; + u32 events = 0; + + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) + return 0; + + vlv_c0_read(uncore, &now); + + if (prev->ktime) { + u64 time, c0; + u32 render, media; + + time = ktime_us_delta(now.ktime, prev->ktime); + + time *= rps_to_i915(rps)->czclk_freq; + + /* Workload can be split between render + media, + * e.g. SwapBuffers being blitted in X after being rendered in + * mesa. To account for this we need to combine both engines + * into our activity counter. + */ + render = now.render_c0 - prev->render_c0; + media = now.media_c0 - prev->media_c0; + c0 = max(render, media); + c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ + + if (c0 > time * rps->power.up_threshold) + events = GEN6_PM_RP_UP_THRESHOLD; + else if (c0 < time * rps->power.down_threshold) + events = GEN6_PM_RP_DOWN_THRESHOLD; + } + + rps->ei = now; + return events; +} + +static void rps_work(struct work_struct *work) +{ + struct intel_rps *rps = container_of(work, typeof(*rps), work); + struct intel_gt *gt = rps_to_gt(rps); + bool client_boost = false; + int new_freq, adj, min, max; + u32 pm_iir = 0; + + spin_lock_irq(>->irq_lock); + pm_iir = fetch_and_zero(&rps->pm_iir); + client_boost = atomic_read(&rps->num_waiters); + spin_unlock_irq(>->irq_lock); + + /* Make sure we didn't queue anything we're not going to process. */ + if ((pm_iir & rps->pm_events) == 0 && !client_boost) + goto out; + + mutex_lock(&rps->lock); + + pm_iir |= vlv_wa_c0_ei(rps, pm_iir); + + adj = rps->last_adj; + new_freq = rps->cur_freq; + min = rps->min_freq_softlimit; + max = rps->max_freq_softlimit; + if (client_boost) + max = rps->max_freq; + if (client_boost && new_freq < rps->boost_freq) { + new_freq = rps->boost_freq; + adj = 0; + } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { + if (adj > 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1; + + if (new_freq >= rps->max_freq_softlimit) + adj = 0; + } else if (client_boost) { + adj = 0; + } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { + if (rps->cur_freq > rps->efficient_freq) + new_freq = rps->efficient_freq; + else if (rps->cur_freq > rps->min_freq_softlimit) + new_freq = rps->min_freq_softlimit; + adj = 0; + } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { + if (adj < 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1; + + if (new_freq <= rps->min_freq_softlimit) + adj = 0; + } else { /* unknown event */ + adj = 0; + } + + rps->last_adj = adj; + + /* + * Limit deboosting and boosting to keep ourselves at the extremes + * when in the respective power modes (i.e. slowly decrease frequencies + * while in the HIGH_POWER zone and slowly increase frequencies while + * in the LOW_POWER zone). On idle, we will hit the timeout and drop + * to the next level quickly, and conversely if busy we expect to + * hit a waitboost and rapidly switch into max power. + */ + if ((adj < 0 && rps->power.mode == HIGH_POWER) || + (adj > 0 && rps->power.mode == LOW_POWER)) + rps->last_adj = 0; + + /* sysfs frequency interfaces may have snuck in while servicing the + * interrupt + */ + new_freq += adj; + new_freq = clamp_t(int, new_freq, min, max); + + if (intel_rps_set(rps, new_freq)) { + DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); + rps->last_adj = 0; + } + + mutex_unlock(&rps->lock); + +out: + spin_lock_irq(>->irq_lock); + gen6_gt_pm_unmask_irq(gt, rps->pm_events); + spin_unlock_irq(>->irq_lock); +} + +void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) +{ + struct intel_gt *gt = rps_to_gt(rps); + const u32 events = rps->pm_events & pm_iir; + + lockdep_assert_held(>->irq_lock); + + if (unlikely(!events)) + return; + + gen6_gt_pm_mask_irq(gt, events); + + rps->pm_iir |= events; + schedule_work(&rps->work); +} + +void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (pm_iir & rps->pm_events) { + struct intel_gt *gt = rps_to_gt(rps); + + spin_lock(>->irq_lock); + gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events); + rps->pm_iir |= pm_iir & rps->pm_events; + schedule_work(&rps->work); + spin_unlock(>->irq_lock); + } + + if (INTEL_GEN(i915) >= 8) + return; + + if (pm_iir & PM_VEBOX_USER_INTERRUPT) + intel_engine_breadcrumbs_irq(i915->engine[VECS0]); + + if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) + DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); +} + +void gen5_rps_irq_handler(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 busy_up, busy_down, max_avg, min_avg; + u8 new_freq; + + spin_lock(&mchdev_lock); + + intel_uncore_write16(uncore, + MEMINTRSTS, + intel_uncore_read(uncore, MEMINTRSTS)); + + intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); + busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); + busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); + max_avg = intel_uncore_read(uncore, RCBMAXAVG); + min_avg = intel_uncore_read(uncore, RCBMINAVG); + + /* Handle RCS change request from hw */ + new_freq = rps->cur_freq; + if (busy_up > max_avg) + new_freq++; + else if (busy_down < min_avg) + new_freq--; + new_freq = clamp(new_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq)) + rps->cur_freq = new_freq; + + spin_unlock(&mchdev_lock); +} + +void intel_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + mutex_init(&rps->lock); + mutex_init(&rps->power.mutex); + + INIT_WORK(&rps->work, rps_work); + + atomic_set(&rps->num_waiters, 0); + + if (IS_CHERRYVIEW(i915)) + chv_rps_init(rps); + else if (IS_VALLEYVIEW(i915)) + vlv_rps_init(rps); + else if (INTEL_GEN(i915) >= 6) + gen6_rps_init(rps); + else if (IS_IRONLAKE_M(i915)) + gen5_rps_init(rps); + + /* Derive initial user preferences/limits from the hardware limits */ + rps->max_freq_softlimit = rps->max_freq; + rps->min_freq_softlimit = rps->min_freq; + + /* After setting max-softlimit, find the overclock max freq */ + if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { + u32 params = 0; + + sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS, + ¶ms, NULL); + if (params & BIT(31)) { /* OC supported */ + DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", + (rps->max_freq & 0xff) * 50, + (params & 0xff) * 50); + rps->max_freq = params & 0xff; + } + } + + /* Finally allow us to boost to max by default */ + rps->boost_freq = rps->max_freq; + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; + + rps->pm_intrmsk_mbz = 0; + + /* + * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer + * if GEN6_PM_UP_EI_EXPIRED is masked. + * + * TODO: verify if this can be reproduced on VLV,CHV. + */ + if (INTEL_GEN(i915) <= 7) + rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; + + if (INTEL_GEN(i915) >= 8) + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; +} + +u32 intel_get_cagf(struct intel_rps *rps, u32 rpstat) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 cagf; + + if (INTEL_GEN(i915) >= 9) + cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; + else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; + else + cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; + + return cagf; +} + +/* External interface for intel_ips.ko */ + +static struct drm_i915_private __rcu *ips_mchdev; + +/** + * Tells the intel_ips driver that the i915 driver is now loaded, if + * IPS got loaded first. + * + * This awkward dance is so that neither module has to depend on the + * other in order for IPS to do the appropriate communication of + * GPU turbo limits to i915. + */ +static void +ips_ping_for_i915_load(void) +{ + void (*link)(void); + + link = symbol_get(ips_link_to_i915_driver); + if (link) { + link(); + symbol_put(ips_link_to_i915_driver); + } +} + +void intel_rps_driver_register(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + /* + * We only register the i915 ips part with intel-ips once everything is + * set up, to avoid intel-ips sneaking in and reading bogus values. + */ + if (IS_GEN(gt->i915, 5)) { + rcu_assign_pointer(ips_mchdev, gt->i915); + ips_ping_for_i915_load(); + } +} + +void intel_rps_driver_unregister(struct intel_rps *rps) +{ + rcu_assign_pointer(ips_mchdev, NULL); +} + +static struct drm_i915_private *mchdev_get(void) +{ + struct drm_i915_private *i915; + + rcu_read_lock(); + i915 = rcu_dereference(ips_mchdev); + if (!kref_get_unless_zero(&i915->drm.ref)) + i915 = NULL; + rcu_read_unlock(); + + return i915; +} + +/** + * i915_read_mch_val - return value for IPS use + * + * Calculate and return a value for the IPS driver to use when deciding whether + * we have thermal and power headroom to increase CPU or GPU power budget. + */ +unsigned long i915_read_mch_val(void) +{ + struct drm_i915_private *i915; + unsigned long chipset_val = 0; + unsigned long graphics_val = 0; + intel_wakeref_t wakeref; + + i915 = mchdev_get(); + if (!i915) + return 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + struct intel_ips *ips = &i915->gt.rps.ips; + + spin_lock_irq(&mchdev_lock); + chipset_val = __ips_chipset_val(ips); + graphics_val = __ips_gfx_val(ips); + spin_unlock_irq(&mchdev_lock); + } + + drm_dev_put(&i915->drm); + return chipset_val + graphics_val; +} +EXPORT_SYMBOL_GPL(i915_read_mch_val); + +/** + * i915_gpu_raise - raise GPU frequency limit + * + * Raise the limit; IPS indicates we have thermal headroom. + */ +bool i915_gpu_raise(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + if (rps->max_freq_softlimit < rps->max_freq) + rps->max_freq_softlimit++; + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return true; +} +EXPORT_SYMBOL_GPL(i915_gpu_raise); + +/** + * i915_gpu_lower - lower GPU frequency limit + * + * IPS indicates we're close to a thermal limit, so throttle back the GPU + * frequency maximum. + */ +bool i915_gpu_lower(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + if (rps->max_freq_softlimit > rps->min_freq) + rps->max_freq_softlimit--; + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return true; +} +EXPORT_SYMBOL_GPL(i915_gpu_lower); + +/** + * i915_gpu_busy - indicate GPU business to IPS + * + * Tell the IPS driver whether or not the GPU is busy. + */ +bool i915_gpu_busy(void) +{ + struct drm_i915_private *i915; + bool ret; + + i915 = mchdev_get(); + if (!i915) + return false; + + ret = i915->gt.awake; + + drm_dev_put(&i915->drm); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_busy); + +/** + * i915_gpu_turbo_disable - disable graphics turbo + * + * Disable graphics turbo by resetting the max frequency and setting the + * current frequency to the default. + */ +bool i915_gpu_turbo_disable(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + bool ret; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + rps->max_freq_softlimit = rps->min_freq; + ret = gen5_rps_set(&i915->gt.rps, rps->min_freq); + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h new file mode 100644 index 000000000000..997a4b4e0207 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RPS_H +#define INTEL_RPS_H + +#include "intel_rps_types.h" + +struct i915_request; + +void intel_rps_init(struct intel_rps *rps); + +void intel_rps_driver_register(struct intel_rps *rps); +void intel_rps_driver_unregister(struct intel_rps *rps); + +void intel_rps_enable(struct intel_rps *rps); +void intel_rps_disable(struct intel_rps *rps); + +void intel_rps_park(struct intel_rps *rps); +void intel_rps_unpark(struct intel_rps *rps); +void intel_rps_boost(struct i915_request *rq); + +int intel_rps_set(struct intel_rps *rps, u8 val); +void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive); + +int intel_gpu_freq(struct intel_rps *rps, int val); +int intel_freq_opcode(struct intel_rps *rps, int val); +u32 intel_get_cagf(struct intel_rps *rps, u32 rpstat1); + +void gen5_rps_irq_handler(struct intel_rps *rps); +void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); +void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); + +#endif /* INTEL_RPS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h new file mode 100644 index 000000000000..c2e279154bd5 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h @@ -0,0 +1,93 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RPS_TYPES_H +#define INTEL_RPS_TYPES_H + +#include +#include +#include +#include +#include + +struct intel_ips { + u64 last_count1; + unsigned long last_time1; + unsigned long chipset_power; + u64 last_count2; + u64 last_time2; + unsigned long gfx_power; + u8 corr; + + int c, m; +}; + +struct intel_rps_ei { + ktime_t ktime; + u32 render_c0; + u32 media_c0; +}; + +struct intel_rps { + struct mutex lock; /* protects enabling and the worker */ + + /* + * work, interrupts_enabled and pm_iir are protected by + * dev_priv->irq_lock + */ + struct work_struct work; + bool enabled; + bool active; + u32 pm_iir; + + /* PM interrupt bits that should never be masked */ + u32 pm_intrmsk_mbz; + u32 pm_events; + + /* Frequencies are stored in potentially platform dependent multiples. + * In other words, *_freq needs to be multiplied by X to be interesting. + * Soft limits are those which are used for the dynamic reclocking done + * by the driver (raise frequencies under heavy loads, and lower for + * lighter loads). Hard limits are those imposed by the hardware. + * + * A distinction is made for overclocking, which is never enabled by + * default, and is considered to be above the hard limit if it's + * possible at all. + */ + u8 cur_freq; /* Current frequency (cached, may not == HW) */ + u8 last_freq; /* Last SWREQ frequency */ + u8 min_freq_softlimit; /* Minimum frequency permitted by the driver */ + u8 max_freq_softlimit; /* Max frequency permitted by the driver */ + u8 max_freq; /* Maximum frequency, RP0 if not overclocking */ + u8 min_freq; /* AKA RPn. Minimum frequency */ + u8 boost_freq; /* Frequency to request when wait boosting */ + u8 idle_freq; /* Frequency to request when we are idle */ + u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ + u8 rp1_freq; /* "less than" RP0 power/freqency */ + u8 rp0_freq; /* Non-overclocked max frequency. */ + u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */ + + int last_adj; + + struct { + struct mutex mutex; + + enum { LOW_POWER, BETWEEN, HIGH_POWER } mode; + unsigned int interactive; + + u8 up_threshold; /* Current %busy required to uplock */ + u8 down_threshold; /* Current %busy required to downclock */ + } power; + + atomic_t num_waiters; + atomic_t boosts; + + /* manual wa residency calculations */ + struct intel_rps_ei ei; + struct intel_ips ips; +}; + +#endif /* INTEL_RPS_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c index a7057785e420..fd3770e48ac7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_llc.c +++ b/drivers/gpu/drm/i915/gt/selftest_llc.c @@ -6,6 +6,7 @@ #include "intel_pm.h" /* intel_gpu_freq() */ #include "selftest_llc.h" +#include "intel_rps.h" static int gen6_verify_ring_freq(struct intel_llc *llc) { @@ -25,6 +26,8 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) for (gpu_freq = consts.min_gpu_freq; gpu_freq <= consts.max_gpu_freq; gpu_freq++) { + struct intel_rps *rps = &llc_to_gt(llc)->rps; + unsigned int ia_freq, ring_freq, found; u32 val; @@ -44,7 +47,7 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) if (found != ia_freq) { pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected CPU freq, found %d, expected %d\n", gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, - intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), found, ia_freq); err = -EINVAL; break; @@ -54,7 +57,7 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) if (found != ring_freq) { pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected ring freq, found %d, expected %d\n", gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, - intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), found, ring_freq); err = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 1b1691aaed28..b7f8514e9b1d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1011,7 +1011,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) static void guc_interrupts_capture(struct intel_gt *gt) { - struct intel_rps *rps = >->i915->gt_pm.rps; + struct intel_rps *rps = >->rps; struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1057,7 +1057,7 @@ static void guc_interrupts_capture(struct intel_gt *gt) static void guc_interrupts_release(struct intel_gt *gt) { - struct intel_rps *rps = >->i915->gt_pm.rps; + struct intel_rps *rps = >->rps; struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; enum intel_engine_id id; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4b6ce07c35d2..8016484ebcd3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -44,6 +44,7 @@ #include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" #include "gt/intel_rc6.h" +#include "gt/intel_rps.h" #include "gt/uc/intel_guc_submission.h" #include "i915_debugfs.h" @@ -791,7 +792,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_uncore *uncore = &dev_priv->uncore; - struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_rps *rps = &dev_priv->gt.rps; intel_wakeref_t wakeref; int ret = 0; @@ -827,23 +828,23 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq); seq_printf(m, "actual GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff)); + intel_gpu_freq(rps, (freq_sts >> 8) & 0xff)); seq_printf(m, "current GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->cur_freq)); + intel_gpu_freq(rps, rps->cur_freq)); seq_printf(m, "max GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->max_freq)); + intel_gpu_freq(rps, rps->max_freq)); seq_printf(m, "min GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->min_freq)); + intel_gpu_freq(rps, rps->min_freq)); seq_printf(m, "idle GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->idle_freq)); + intel_gpu_freq(rps, rps->idle_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(dev_priv, rps->efficient_freq)); + intel_gpu_freq(rps, rps->efficient_freq)); } else if (INTEL_GEN(dev_priv) >= 6) { u32 rp_state_limits; u32 gt_perf_status; @@ -877,7 +878,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) else reqf >>= 25; } - reqf = intel_gpu_freq(dev_priv, reqf); + reqf = intel_gpu_freq(rps, reqf); rpmodectl = I915_READ(GEN6_RP_CONTROL); rpinclimit = I915_READ(GEN6_RP_UP_THRESHOLD); @@ -890,8 +891,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpdownei = I915_READ(GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; rpcurdown = I915_READ(GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; rpprevdown = I915_READ(GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; - cagf = intel_gpu_freq(dev_priv, - intel_get_cagf(dev_priv, rpstat)); + cagf = intel_gpu_freq(rps, intel_get_cagf(rps, rpstat)); intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); @@ -968,37 +968,37 @@ static int i915_frequency_info(struct seq_file *m, void *unused) max_freq *= (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", - intel_gpu_freq(dev_priv, max_freq)); + intel_gpu_freq(rps, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; max_freq *= (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", - intel_gpu_freq(dev_priv, max_freq)); + intel_gpu_freq(rps, max_freq)); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; max_freq *= (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", - intel_gpu_freq(dev_priv, max_freq)); + intel_gpu_freq(rps, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", - intel_gpu_freq(dev_priv, rps->max_freq)); + intel_gpu_freq(rps, rps->max_freq)); seq_printf(m, "Current freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->cur_freq)); + intel_gpu_freq(rps, rps->cur_freq)); seq_printf(m, "Actual freq: %d MHz\n", cagf); seq_printf(m, "Idle freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->idle_freq)); + intel_gpu_freq(rps, rps->idle_freq)); seq_printf(m, "Min freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->min_freq)); + intel_gpu_freq(rps, rps->min_freq)); seq_printf(m, "Boost freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->boost_freq)); + intel_gpu_freq(rps, rps->boost_freq)); seq_printf(m, "Max freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->max_freq)); + intel_gpu_freq(rps, rps->max_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(dev_priv, rps->efficient_freq)); + intel_gpu_freq(rps, rps->efficient_freq)); } else { seq_puts(m, "no P-state info available\n"); } @@ -1375,7 +1375,7 @@ static int i915_sr_status(struct seq_file *m, void *unused) static int i915_ring_freq_table(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_rps *rps = &dev_priv->gt.rps; unsigned int max_gpu_freq, min_gpu_freq; intel_wakeref_t wakeref; int gpu_freq, ia_freq; @@ -1400,10 +1400,11 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) GEN6_PCODE_READ_MIN_FREQ_TABLE, &ia_freq, NULL); seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", - intel_gpu_freq(dev_priv, (gpu_freq * - (IS_GEN9_BC(dev_priv) || - INTEL_GEN(dev_priv) >= 10 ? - GEN9_FREQ_SCALER : 1))), + intel_gpu_freq(rps, + (gpu_freq * + (IS_GEN9_BC(dev_priv) || + INTEL_GEN(dev_priv) >= 10 ? + GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); } @@ -1631,7 +1632,7 @@ static const char *rps_power_to_str(unsigned int power) static int i915_rps_boost_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_rps *rps = &dev_priv->gt.rps; u32 act_freq = rps->cur_freq; intel_wakeref_t wakeref; @@ -1643,7 +1644,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) vlv_punit_put(dev_priv); act_freq = (act_freq >> 8) & 0xff; } else { - act_freq = intel_get_cagf(dev_priv, + act_freq = intel_get_cagf(rps, I915_READ(GEN6_RPSTAT1)); } } @@ -1654,17 +1655,17 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) atomic_read(&rps->num_waiters)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); seq_printf(m, "Frequency requested %d, actual %d\n", - intel_gpu_freq(dev_priv, rps->cur_freq), - intel_gpu_freq(dev_priv, act_freq)); + intel_gpu_freq(rps, rps->cur_freq), + intel_gpu_freq(rps, act_freq)); seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", - intel_gpu_freq(dev_priv, rps->min_freq), - intel_gpu_freq(dev_priv, rps->min_freq_softlimit), - intel_gpu_freq(dev_priv, rps->max_freq_softlimit), - intel_gpu_freq(dev_priv, rps->max_freq)); + intel_gpu_freq(rps, rps->min_freq), + intel_gpu_freq(rps, rps->min_freq_softlimit), + intel_gpu_freq(rps, rps->max_freq_softlimit), + intel_gpu_freq(rps, rps->max_freq)); seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", - intel_gpu_freq(dev_priv, rps->idle_freq), - intel_gpu_freq(dev_priv, rps->efficient_freq), - intel_gpu_freq(dev_priv, rps->boost_freq)); + intel_gpu_freq(rps, rps->idle_freq), + intel_gpu_freq(rps, rps->efficient_freq), + intel_gpu_freq(rps, rps->boost_freq)); seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 24a3988281d2..355526a35d58 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1794,7 +1794,6 @@ static int i915_drm_resume(struct drm_device *dev) int ret; disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); - intel_gt_pm_disable(&dev_priv->gt); i915_gem_sanitize(dev_priv); @@ -1925,8 +1924,6 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_display_power_resume_early(dev_priv); - intel_gt_pm_disable(&dev_priv->gt); - intel_power_domains_resume(dev_priv); intel_gt_sanitize(&dev_priv->gt, true); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c3b2f29942f9..96fe0e8d468a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -545,94 +545,6 @@ struct i915_suspend_saved_registers { struct vlv_s0ix_state; -struct intel_rps_ei { - ktime_t ktime; - u32 render_c0; - u32 media_c0; -}; - -struct intel_rps { - struct mutex lock; /* protects enabling and the worker */ - - /* - * work, interrupts_enabled and pm_iir are protected by - * dev_priv->irq_lock - */ - struct work_struct work; - bool interrupts_enabled; - u32 pm_iir; - - /* PM interrupt bits that should never be masked */ - u32 pm_intrmsk_mbz; - - /* Frequencies are stored in potentially platform dependent multiples. - * In other words, *_freq needs to be multiplied by X to be interesting. - * Soft limits are those which are used for the dynamic reclocking done - * by the driver (raise frequencies under heavy loads, and lower for - * lighter loads). Hard limits are those imposed by the hardware. - * - * A distinction is made for overclocking, which is never enabled by - * default, and is considered to be above the hard limit if it's - * possible at all. - */ - u8 cur_freq; /* Current frequency (cached, may not == HW) */ - u8 min_freq_softlimit; /* Minimum frequency permitted by the driver */ - u8 max_freq_softlimit; /* Max frequency permitted by the driver */ - u8 max_freq; /* Maximum frequency, RP0 if not overclocking */ - u8 min_freq; /* AKA RPn. Minimum frequency */ - u8 boost_freq; /* Frequency to request when wait boosting */ - u8 idle_freq; /* Frequency to request when we are idle */ - u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ - u8 rp1_freq; /* "less than" RP0 power/freqency */ - u8 rp0_freq; /* Non-overclocked max frequency. */ - u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */ - - int last_adj; - - struct { - struct mutex mutex; - - enum { LOW_POWER, BETWEEN, HIGH_POWER } mode; - unsigned int interactive; - - u8 up_threshold; /* Current %busy required to uplock */ - u8 down_threshold; /* Current %busy required to downclock */ - } power; - - bool enabled; - atomic_t num_waiters; - atomic_t boosts; - - /* manual wa residency calculations */ - struct intel_rps_ei ei; -}; - -struct intel_gen6_power_mgmt { - struct intel_rps rps; -}; - -/* defined intel_pm.c */ -extern spinlock_t mchdev_lock; - -struct intel_ilk_power_mgmt { - u8 cur_delay; - u8 min_delay; - u8 max_delay; - u8 fmax; - u8 fstart; - - u64 last_count1; - unsigned long last_time1; - unsigned long chipset_power; - u64 last_count2; - u64 last_time2; - unsigned long gfx_power; - u8 corr; - - int c_m; - int r_t; -}; - #define MAX_L3_SLICES 2 struct intel_l3_parity { u32 *remap_info[MAX_L3_SLICES]; @@ -1069,7 +981,6 @@ struct drm_i915_private { u32 irq_mask; u32 de_irq_mask[I915_MAX_PIPES]; }; - u32 pm_rps_events; u32 pipestat_irq_mask[I915_MAX_PIPES]; struct i915_hotplug hotplug; @@ -1209,13 +1120,6 @@ struct drm_i915_private { */ u32 edram_size_mb; - /* gen6+ GT PM state */ - struct intel_gen6_power_mgmt gt_pm; - - /* ilk-only ips/rps state. Everything in here is protected by the global - * mchdev_lock in intel_pm.c */ - struct intel_ilk_power_mgmt ips; - struct i915_power_domains power_domains; struct i915_psr psr; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 319e96d833fa..0f271a53012d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -52,6 +52,7 @@ #include "gt/intel_mocs.h" #include "gt/intel_reset.h" #include "gt/intel_renderstate.h" +#include "gt/intel_rps.h" #include "gt/intel_workarounds.h" #include "i915_drv.h" @@ -1269,8 +1270,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_context; } - intel_init_gt_powersave(dev_priv); - intel_uc_init(&dev_priv->gt.uc); ret = intel_gt_init_hw(&dev_priv->gt); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 33020c8ca5f6..8a3aed17c3b7 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -45,6 +45,7 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" +#include "gt/intel_rps.h" #include "i915_drv.h" #include "i915_irq.h" @@ -327,87 +328,6 @@ static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv) return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR; } -void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv) -{ - struct intel_gt *gt = &dev_priv->gt; - - spin_lock_irq(>->irq_lock); - - while (gen11_gt_reset_one_iir(gt, 0, GEN11_GTPM)) - ; - - dev_priv->gt_pm.rps.pm_iir = 0; - - spin_unlock_irq(>->irq_lock); -} - -void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) -{ - struct intel_gt *gt = &dev_priv->gt; - - spin_lock_irq(>->irq_lock); - gen6_gt_pm_reset_iir(gt, GEN6_PM_RPS_EVENTS); - dev_priv->gt_pm.rps.pm_iir = 0; - spin_unlock_irq(>->irq_lock); -} - -void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) -{ - struct intel_gt *gt = &dev_priv->gt; - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - if (READ_ONCE(rps->interrupts_enabled)) - return; - - spin_lock_irq(>->irq_lock); - WARN_ON_ONCE(rps->pm_iir); - - if (INTEL_GEN(dev_priv) >= 11) - WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GTPM)); - else - WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); - - rps->interrupts_enabled = true; - gen6_gt_pm_enable_irq(gt, dev_priv->pm_rps_events); - - spin_unlock_irq(>->irq_lock); -} - -u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915, u32 mask) -{ - return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz; -} - -void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - struct intel_gt *gt = &dev_priv->gt; - - if (!READ_ONCE(rps->interrupts_enabled)) - return; - - spin_lock_irq(>->irq_lock); - rps->interrupts_enabled = false; - - I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u)); - - gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); - - spin_unlock_irq(>->irq_lock); - intel_synchronize_irq(dev_priv); - - /* Now that we will not be generating any more work, flush any - * outstanding tasks. As we are called on the RPS idle path, - * we will reset the GPU to minimum frequencies, so the current - * state of the worker can be discarded. - */ - cancel_work_sync(&rps->work); - if (INTEL_GEN(dev_priv) >= 11) - gen11_reset_rps_interrupts(dev_priv); - else - gen6_reset_rps_interrupts(dev_priv); -} - void gen9_reset_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -1065,199 +985,6 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc) return position; } -static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - u32 busy_up, busy_down, max_avg, min_avg; - u8 new_delay; - - spin_lock(&mchdev_lock); - - intel_uncore_write16(uncore, - MEMINTRSTS, - intel_uncore_read(uncore, MEMINTRSTS)); - - new_delay = dev_priv->ips.cur_delay; - - intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); - busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); - busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); - max_avg = intel_uncore_read(uncore, RCBMAXAVG); - min_avg = intel_uncore_read(uncore, RCBMINAVG); - - /* Handle RCS change request from hw */ - if (busy_up > max_avg) { - if (dev_priv->ips.cur_delay != dev_priv->ips.max_delay) - new_delay = dev_priv->ips.cur_delay - 1; - if (new_delay < dev_priv->ips.max_delay) - new_delay = dev_priv->ips.max_delay; - } else if (busy_down < min_avg) { - if (dev_priv->ips.cur_delay != dev_priv->ips.min_delay) - new_delay = dev_priv->ips.cur_delay + 1; - if (new_delay > dev_priv->ips.min_delay) - new_delay = dev_priv->ips.min_delay; - } - - if (ironlake_set_drps(dev_priv, new_delay)) - dev_priv->ips.cur_delay = new_delay; - - spin_unlock(&mchdev_lock); - - return; -} - -static void vlv_c0_read(struct drm_i915_private *dev_priv, - struct intel_rps_ei *ei) -{ - ei->ktime = ktime_get_raw(); - ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT); - ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT); -} - -void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) -{ - memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei)); -} - -static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - const struct intel_rps_ei *prev = &rps->ei; - struct intel_rps_ei now; - u32 events = 0; - - if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) - return 0; - - vlv_c0_read(dev_priv, &now); - - if (prev->ktime) { - u64 time, c0; - u32 render, media; - - time = ktime_us_delta(now.ktime, prev->ktime); - - time *= dev_priv->czclk_freq; - - /* Workload can be split between render + media, - * e.g. SwapBuffers being blitted in X after being rendered in - * mesa. To account for this we need to combine both engines - * into our activity counter. - */ - render = now.render_c0 - prev->render_c0; - media = now.media_c0 - prev->media_c0; - c0 = max(render, media); - c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ - - if (c0 > time * rps->power.up_threshold) - events = GEN6_PM_RP_UP_THRESHOLD; - else if (c0 < time * rps->power.down_threshold) - events = GEN6_PM_RP_DOWN_THRESHOLD; - } - - rps->ei = now; - return events; -} - -static void gen6_pm_rps_work(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, gt_pm.rps.work); - struct intel_gt *gt = &dev_priv->gt; - struct intel_rps *rps = &dev_priv->gt_pm.rps; - bool client_boost = false; - int new_delay, adj, min, max; - u32 pm_iir = 0; - - spin_lock_irq(>->irq_lock); - if (rps->interrupts_enabled) { - pm_iir = fetch_and_zero(&rps->pm_iir); - client_boost = atomic_read(&rps->num_waiters); - } - spin_unlock_irq(>->irq_lock); - - /* Make sure we didn't queue anything we're not going to process. */ - WARN_ON(pm_iir & ~dev_priv->pm_rps_events); - if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) - goto out; - - mutex_lock(&rps->lock); - - pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir); - - adj = rps->last_adj; - new_delay = rps->cur_freq; - min = rps->min_freq_softlimit; - max = rps->max_freq_softlimit; - if (client_boost) - max = rps->max_freq; - if (client_boost && new_delay < rps->boost_freq) { - new_delay = rps->boost_freq; - adj = 0; - } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { - if (adj > 0) - adj *= 2; - else /* CHV needs even encode values */ - adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1; - - if (new_delay >= rps->max_freq_softlimit) - adj = 0; - } else if (client_boost) { - adj = 0; - } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { - if (rps->cur_freq > rps->efficient_freq) - new_delay = rps->efficient_freq; - else if (rps->cur_freq > rps->min_freq_softlimit) - new_delay = rps->min_freq_softlimit; - adj = 0; - } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { - if (adj < 0) - adj *= 2; - else /* CHV needs even encode values */ - adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1; - - if (new_delay <= rps->min_freq_softlimit) - adj = 0; - } else { /* unknown event */ - adj = 0; - } - - rps->last_adj = adj; - - /* - * Limit deboosting and boosting to keep ourselves at the extremes - * when in the respective power modes (i.e. slowly decrease frequencies - * while in the HIGH_POWER zone and slowly increase frequencies while - * in the LOW_POWER zone). On idle, we will hit the timeout and drop - * to the next level quickly, and conversely if busy we expect to - * hit a waitboost and rapidly switch into max power. - */ - if ((adj < 0 && rps->power.mode == HIGH_POWER) || - (adj > 0 && rps->power.mode == LOW_POWER)) - rps->last_adj = 0; - - /* sysfs frequency interfaces may have snuck in while servicing the - * interrupt - */ - new_delay += adj; - new_delay = clamp_t(int, new_delay, min, max); - - if (intel_set_rps(dev_priv, new_delay)) { - DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); - rps->last_adj = 0; - } - - mutex_unlock(&rps->lock); - -out: - /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ - spin_lock_irq(>->irq_lock); - if (rps->interrupts_enabled) - gen6_gt_pm_unmask_irq(gt, dev_priv->pm_rps_events); - spin_unlock_irq(>->irq_lock); -} - - /** * ivybridge_parity_work - Workqueue called when a parity error interrupt * occurred. @@ -1631,54 +1358,6 @@ static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, res1, res2); } -/* The RPS events need forcewake, so we add them to a work queue and mask their - * IMR bits until the work is done. Other interrupts can be processed without - * the work queue. */ -void gen11_rps_irq_handler(struct intel_gt *gt, u32 pm_iir) -{ - struct drm_i915_private *i915 = gt->i915; - struct intel_rps *rps = &i915->gt_pm.rps; - const u32 events = i915->pm_rps_events & pm_iir; - - lockdep_assert_held(>->irq_lock); - - if (unlikely(!events)) - return; - - gen6_gt_pm_mask_irq(gt, events); - - if (!rps->interrupts_enabled) - return; - - rps->pm_iir |= events; - schedule_work(&rps->work); -} - -void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - struct intel_gt *gt = &dev_priv->gt; - - if (pm_iir & dev_priv->pm_rps_events) { - spin_lock(>->irq_lock); - gen6_gt_pm_mask_irq(gt, pm_iir & dev_priv->pm_rps_events); - if (rps->interrupts_enabled) { - rps->pm_iir |= pm_iir & dev_priv->pm_rps_events; - schedule_work(&rps->work); - } - spin_unlock(>->irq_lock); - } - - if (INTEL_GEN(dev_priv) >= 8) - return; - - if (pm_iir & PM_VEBOX_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[VECS0]); - - if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) - DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); -} - static void i9xx_pipestat_irq_reset(struct drm_i915_private *dev_priv) { enum pipe pipe; @@ -1989,7 +1668,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg) if (gt_iir) gen6_gt_irq_handler(&dev_priv->gt, gt_iir); if (pm_iir) - gen6_rps_irq_handler(dev_priv, pm_iir); + gen6_rps_irq_handler(&dev_priv->gt.rps, pm_iir); if (hotplug_status) i9xx_hpd_irq_handler(dev_priv, hotplug_status); @@ -2393,7 +2072,7 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv, } if (IS_GEN(dev_priv, 5) && de_iir & DE_PCU_EVENT) - ironlake_rps_change_irq_handler(dev_priv); + gen5_rps_irq_handler(&dev_priv->gt.rps); } static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, @@ -2498,7 +2177,7 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) if (pm_iir) { I915_WRITE(GEN6_PMIIR, pm_iir); ret = IRQ_HANDLED; - gen6_rps_irq_handler(dev_priv, pm_iir); + gen6_rps_irq_handler(&dev_priv->gt.rps, pm_iir); } } @@ -4281,13 +3960,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) void intel_irq_init(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; - struct intel_rps *rps = &dev_priv->gt_pm.rps; int i; intel_hpd_init_work(dev_priv); - INIT_WORK(&rps->work, gen6_pm_rps_work); - INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); for (i = 0; i < MAX_L3_SLICES; ++i) dev_priv->l3_parity.remap_info[i] = NULL; @@ -4296,33 +3972,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (HAS_GT_UC(dev_priv) && INTEL_GEN(dev_priv) < 11) dev_priv->gt.pm_guc_events = GUC_INTR_GUC2HOST << 16; - /* Let's track the enabled rps events */ - if (IS_VALLEYVIEW(dev_priv)) - /* WaGsvRC0ResidencyMethod:vlv */ - dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED; - else - dev_priv->pm_rps_events = (GEN6_PM_RP_UP_THRESHOLD | - GEN6_PM_RP_DOWN_THRESHOLD | - GEN6_PM_RP_DOWN_TIMEOUT); - - /* We share the register with other engine */ - if (INTEL_GEN(dev_priv) > 9) - GEM_WARN_ON(dev_priv->pm_rps_events & 0xffff0000); - - rps->pm_intrmsk_mbz = 0; - - /* - * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer - * if GEN6_PM_UP_EI_EXPIRED is masked. - * - * TODO: verify if this can be reproduced on VLV,CHV. - */ - if (INTEL_GEN(dev_priv) <= 7) - rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; - - if (INTEL_GEN(dev_priv) >= 8) - rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; - dev->vblank_disable_immediate = true; /* Most platforms treat the display irq block as an always-on diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h index 19a3bc019535..d0d91c6e00d7 100644 --- a/drivers/gpu/drm/i915/i915_irq.h +++ b/drivers/gpu/drm/i915/i915_irq.h @@ -22,9 +22,6 @@ struct intel_gt; struct intel_guc; struct intel_uncore; -void gen11_rps_irq_handler(struct intel_gt *gt, u32 pm_iir); -void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir); - void intel_irq_init(struct drm_i915_private *dev_priv); void intel_irq_fini(struct drm_i915_private *dev_priv); int intel_irq_install(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index b5b67c0624ff..05395015d1f2 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -12,6 +12,7 @@ #include "gt/intel_engine_user.h" #include "gt/intel_gt_pm.h" #include "gt/intel_rc6.h" +#include "gt/intel_rps.h" #include "i915_drv.h" #include "i915_pmu.h" @@ -358,25 +359,26 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; struct i915_pmu *pmu = &i915->pmu; + struct intel_rps *rps = >->rps; if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { u32 val; - val = i915->gt_pm.rps.cur_freq; + val = rps->cur_freq; if (intel_gt_pm_get_if_awake(gt)) { val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1); - val = intel_get_cagf(i915, val); + val = intel_get_cagf(rps, val); intel_gt_pm_put(gt); } add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], - intel_gpu_freq(i915, val), + intel_gpu_freq(rps, val), period_ns / 1000); } if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ], - intel_gpu_freq(i915, i915->gt_pm.rps.cur_freq), + intel_gpu_freq(rps, rps->cur_freq), period_ns / 1000); } } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 19a1d447ab8d..00011f9533b6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -32,6 +32,7 @@ #include "gem/i915_gem_context.h" #include "gt/intel_context.h" #include "gt/intel_ring.h" +#include "gt/intel_rps.h" #include "i915_active.h" #include "i915_drv.h" @@ -258,8 +259,8 @@ bool i915_request_retire(struct i915_request *rq) if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) i915_request_cancel_breadcrumb(rq); if (i915_request_has_waitboost(rq)) { - GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); - atomic_dec(&rq->i915->gt_pm.rps.num_waiters); + GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters)); + atomic_dec(&rq->engine->gt->rps.num_waiters); } if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); @@ -1467,7 +1468,7 @@ long i915_request_wait(struct i915_request *rq, */ if (flags & I915_WAIT_PRIORITY) { if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6) - gen6_rps_boost(rq); + intel_rps_boost(rq); i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); } diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index bf039b8ba593..65476909d1bf 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -31,6 +31,7 @@ #include #include "gt/intel_rc6.h" +#include "gt/intel_rps.h" #include "i915_drv.h" #include "i915_sysfs.h" @@ -259,6 +260,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; intel_wakeref_t wakeref; u32 freq; @@ -271,31 +273,31 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, freq = (freq >> 8) & 0xff; } else { - freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); + freq = intel_get_cagf(rps, I915_READ(GEN6_RPSTAT1)); } intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, freq)); + return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(rps, freq)); } static ssize_t gt_cur_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.cur_freq)); + intel_gpu_freq(rps, rps->cur_freq)); } static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.boost_freq)); + intel_gpu_freq(rps, rps->boost_freq)); } static ssize_t gt_boost_freq_mhz_store(struct device *kdev, @@ -303,7 +305,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_rps *rps = &dev_priv->gt.rps; bool boost = false; ssize_t ret; u32 val; @@ -313,7 +315,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, return ret; /* Validate against (static) hardware limits */ - val = intel_freq_opcode(dev_priv, val); + val = intel_freq_opcode(rps, val); if (val < rps->min_freq || val > rps->max_freq) return -EINVAL; @@ -333,19 +335,19 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.efficient_freq)); + intel_gpu_freq(rps, rps->efficient_freq)); } static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.max_freq_softlimit)); + intel_gpu_freq(rps, rps->max_freq_softlimit)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, @@ -353,19 +355,17 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt_pm.rps; - intel_wakeref_t wakeref; - u32 val; + struct intel_rps *rps = &dev_priv->gt.rps; ssize_t ret; + u32 val; ret = kstrtou32(buf, 0, &val); if (ret) return ret; - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); mutex_lock(&rps->lock); - val = intel_freq_opcode(dev_priv, val); + val = intel_freq_opcode(rps, val); if (val < rps->min_freq || val > rps->max_freq || val < rps->min_freq_softlimit) { @@ -375,7 +375,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, if (val > rps->rp0_freq) DRM_DEBUG("User requested overclocking to %d\n", - intel_gpu_freq(dev_priv, val)); + intel_gpu_freq(rps, val)); rps->max_freq_softlimit = val; @@ -383,14 +383,15 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, rps->min_freq_softlimit, rps->max_freq_softlimit); - /* We still need *_set_rps to process the new max_delay and + /* + * We still need *_set_rps to process the new max_delay and * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. */ - ret = intel_set_rps(dev_priv, val); + * frequency request may be unchanged. + */ + intel_rps_set(rps, val); unlock: mutex_unlock(&rps->lock); - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); return ret ?: count; } @@ -398,10 +399,10 @@ unlock: static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.min_freq_softlimit)); + intel_gpu_freq(rps, rps->min_freq_softlimit)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, @@ -409,19 +410,17 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt_pm.rps; - intel_wakeref_t wakeref; - u32 val; + struct intel_rps *rps = &dev_priv->gt.rps; ssize_t ret; + u32 val; ret = kstrtou32(buf, 0, &val); if (ret) return ret; - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); mutex_lock(&rps->lock); - val = intel_freq_opcode(dev_priv, val); + val = intel_freq_opcode(rps, val); if (val < rps->min_freq || val > rps->max_freq || val > rps->max_freq_softlimit) { @@ -435,14 +434,15 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, rps->min_freq_softlimit, rps->max_freq_softlimit); - /* We still need *_set_rps to process the new min_delay and + /* + * We still need *_set_rps to process the new min_delay and * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. */ - ret = intel_set_rps(dev_priv, val); + * frequency request may be unchanged. + */ + intel_rps_set(rps, val); unlock: mutex_unlock(&rps->lock); - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); return ret ?: count; } @@ -464,15 +464,15 @@ static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL); static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_rps *rps = &dev_priv->gt.rps; u32 val; if (attr == &dev_attr_gt_RP0_freq_mhz) - val = intel_gpu_freq(dev_priv, rps->rp0_freq); + val = intel_gpu_freq(rps, rps->rp0_freq); else if (attr == &dev_attr_gt_RP1_freq_mhz) - val = intel_gpu_freq(dev_priv, rps->rp1_freq); + val = intel_gpu_freq(rps, rps->rp1_freq); else if (attr == &dev_attr_gt_RPn_freq_mhz) - val = intel_gpu_freq(dev_priv, rps->min_freq); + val = intel_gpu_freq(rps, rps->min_freq); else BUG(); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6b37d22fc68d..5d2b460d3ee5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -197,8 +197,6 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) break; } - dev_priv->ips.r_t = dev_priv->mem_freq; - switch (csipll & 0x3ff) { case 0x00c: dev_priv->fsb_freq = 3200; @@ -227,14 +225,6 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) dev_priv->fsb_freq = 0; break; } - - if (dev_priv->fsb_freq == 3200) { - dev_priv->ips.c_m = 0; - } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) { - dev_priv->ips.c_m = 1; - } else { - dev_priv->ips.c_m = 2; - } } static const struct cxsr_latency cxsr_latency_table[] = { @@ -6252,1627 +6242,6 @@ void intel_init_ipc(struct drm_i915_private *dev_priv) intel_enable_ipc(dev_priv); } -/* - * Lock protecting IPS related data structures - */ -DEFINE_SPINLOCK(mchdev_lock); - -bool ironlake_set_drps(struct drm_i915_private *i915, u8 val) -{ - struct intel_uncore *uncore = &i915->uncore; - u16 rgvswctl; - - lockdep_assert_held(&mchdev_lock); - - rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); - if (rgvswctl & MEMCTL_CMD_STS) { - DRM_DEBUG("gpu busy, RCS change rejected\n"); - return false; /* still busy with another command */ - } - - rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | - (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; - intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); - intel_uncore_posting_read16(uncore, MEMSWCTL); - - rgvswctl |= MEMCTL_CMD_STS; - intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); - - return true; -} - -static void ironlake_enable_drps(struct drm_i915_private *dev_priv) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - u32 rgvmodectl; - u8 fmax, fmin, fstart, vstart; - - spin_lock_irq(&mchdev_lock); - - rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); - - /* Enable temp reporting */ - intel_uncore_write16(uncore, PMMISC, I915_READ(PMMISC) | MCPPCE_EN); - intel_uncore_write16(uncore, TSC1, I915_READ(TSC1) | TSE); - - /* 100ms RC evaluation intervals */ - intel_uncore_write(uncore, RCUPEI, 100000); - intel_uncore_write(uncore, RCDNEI, 100000); - - /* Set max/min thresholds to 90ms and 80ms respectively */ - intel_uncore_write(uncore, RCBMAXAVG, 90000); - intel_uncore_write(uncore, RCBMINAVG, 80000); - - intel_uncore_write(uncore, MEMIHYST, 1); - - /* Set up min, max, and cur for interrupt handling */ - fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; - fmin = (rgvmodectl & MEMMODE_FMIN_MASK); - fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> - MEMMODE_FSTART_SHIFT; - - vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & - PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; - - dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ - dev_priv->ips.fstart = fstart; - - dev_priv->ips.max_delay = fstart; - dev_priv->ips.min_delay = fmin; - dev_priv->ips.cur_delay = fstart; - - DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", - fmax, fmin, fstart); - - intel_uncore_write(uncore, - MEMINTREN, - MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); - - /* - * Interrupts will be enabled in ironlake_irq_postinstall - */ - - intel_uncore_write(uncore, VIDSTART, vstart); - intel_uncore_posting_read(uncore, VIDSTART); - - rgvmodectl |= MEMMODE_SWMODE_EN; - intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); - - if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & - MEMCTL_CMD_STS) == 0, 10)) - DRM_ERROR("stuck trying to change perf mode\n"); - mdelay(1); - - ironlake_set_drps(dev_priv, fstart); - - dev_priv->ips.last_count1 = - intel_uncore_read(uncore, DMIEC) + - intel_uncore_read(uncore, DDREC) + - intel_uncore_read(uncore, CSIEC); - dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); - dev_priv->ips.last_count2 = intel_uncore_read(uncore, GFXEC); - dev_priv->ips.last_time2 = ktime_get_raw_ns(); - - spin_unlock_irq(&mchdev_lock); -} - -static void ironlake_disable_drps(struct drm_i915_private *i915) -{ - struct intel_uncore *uncore = &i915->uncore; - u16 rgvswctl; - - spin_lock_irq(&mchdev_lock); - - rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); - - /* Ack interrupts, disable EFC interrupt */ - intel_uncore_write(uncore, - MEMINTREN, - intel_uncore_read(uncore, MEMINTREN) & - ~MEMINT_EVAL_CHG_EN); - intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); - intel_uncore_write(uncore, - DEIER, - intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT); - intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT); - intel_uncore_write(uncore, - DEIMR, - intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT); - - /* Go back to the starting frequency */ - ironlake_set_drps(i915, i915->ips.fstart); - mdelay(1); - rgvswctl |= MEMCTL_CMD_STS; - intel_uncore_write(uncore, MEMSWCTL, rgvswctl); - mdelay(1); - - spin_unlock_irq(&mchdev_lock); -} - -/* There's a funny hw issue where the hw returns all 0 when reading from - * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value - * ourselves, instead of doing a rmw cycle (which might result in us clearing - * all limits and the gpu stuck at whatever frequency it is at atm). - */ -static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 limits; - - /* Only set the down limit when we've reached the lowest level to avoid - * getting more interrupts, otherwise leave this clear. This prevents a - * race in the hw when coming out of rc6: There's a tiny window where - * the hw runs at the minimal clock before selecting the desired - * frequency, if the down threshold expires in that window we will not - * receive a down interrupt. */ - if (INTEL_GEN(dev_priv) >= 9) { - limits = (rps->max_freq_softlimit) << 23; - if (val <= rps->min_freq_softlimit) - limits |= (rps->min_freq_softlimit) << 14; - } else { - limits = rps->max_freq_softlimit << 24; - if (val <= rps->min_freq_softlimit) - limits |= rps->min_freq_softlimit << 16; - } - - return limits; -} - -static void rps_set_power(struct drm_i915_private *dev_priv, int new_power) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 threshold_up = 0, threshold_down = 0; /* in % */ - u32 ei_up = 0, ei_down = 0; - - lockdep_assert_held(&rps->power.mutex); - - if (new_power == rps->power.mode) - return; - - /* Note the units here are not exactly 1us, but 1280ns. */ - switch (new_power) { - case LOW_POWER: - /* Upclock if more than 95% busy over 16ms */ - ei_up = 16000; - threshold_up = 95; - - /* Downclock if less than 85% busy over 32ms */ - ei_down = 32000; - threshold_down = 85; - break; - - case BETWEEN: - /* Upclock if more than 90% busy over 13ms */ - ei_up = 13000; - threshold_up = 90; - - /* Downclock if less than 75% busy over 32ms */ - ei_down = 32000; - threshold_down = 75; - break; - - case HIGH_POWER: - /* Upclock if more than 85% busy over 10ms */ - ei_up = 10000; - threshold_up = 85; - - /* Downclock if less than 60% busy over 32ms */ - ei_down = 32000; - threshold_down = 60; - break; - } - - /* When byt can survive without system hang with dynamic - * sw freq adjustments, this restriction can be lifted. - */ - if (IS_VALLEYVIEW(dev_priv)) - goto skip_hw_write; - - I915_WRITE(GEN6_RP_UP_EI, - GT_INTERVAL_FROM_US(dev_priv, ei_up)); - I915_WRITE(GEN6_RP_UP_THRESHOLD, - GT_INTERVAL_FROM_US(dev_priv, - ei_up * threshold_up / 100)); - - I915_WRITE(GEN6_RP_DOWN_EI, - GT_INTERVAL_FROM_US(dev_priv, ei_down)); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, - GT_INTERVAL_FROM_US(dev_priv, - ei_down * threshold_down / 100)); - - I915_WRITE(GEN6_RP_CONTROL, - (INTEL_GEN(dev_priv) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - -skip_hw_write: - rps->power.mode = new_power; - rps->power.up_threshold = threshold_up; - rps->power.down_threshold = threshold_down; -} - -static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - int new_power; - - new_power = rps->power.mode; - switch (rps->power.mode) { - case LOW_POWER: - if (val > rps->efficient_freq + 1 && - val > rps->cur_freq) - new_power = BETWEEN; - break; - - case BETWEEN: - if (val <= rps->efficient_freq && - val < rps->cur_freq) - new_power = LOW_POWER; - else if (val >= rps->rp0_freq && - val > rps->cur_freq) - new_power = HIGH_POWER; - break; - - case HIGH_POWER: - if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && - val < rps->cur_freq) - new_power = BETWEEN; - break; - } - /* Max/min bins are special */ - if (val <= rps->min_freq_softlimit) - new_power = LOW_POWER; - if (val >= rps->max_freq_softlimit) - new_power = HIGH_POWER; - - mutex_lock(&rps->power.mutex); - if (rps->power.interactive) - new_power = HIGH_POWER; - rps_set_power(dev_priv, new_power); - mutex_unlock(&rps->power.mutex); -} - -void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive) -{ - struct intel_rps *rps = &i915->gt_pm.rps; - - if (INTEL_GEN(i915) < 6) - return; - - mutex_lock(&rps->power.mutex); - if (interactive) { - if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake)) - rps_set_power(i915, HIGH_POWER); - } else { - GEM_BUG_ON(!rps->power.interactive); - rps->power.interactive--; - } - mutex_unlock(&rps->power.mutex); -} - -static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 mask = 0; - - /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ - if (val > rps->min_freq_softlimit) - mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; - if (val < rps->max_freq_softlimit) - mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; - - mask &= dev_priv->pm_rps_events; - - return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); -} - -/* gen6_set_rps is called to update the frequency request, but should also be - * called when the range (min_delay and max_delay) is modified so that we can - * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ -static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* min/max delay may still have been modified so be sure to - * write the limits value. - */ - if (val != rps->cur_freq) { - gen6_set_rps_thresholds(dev_priv, val); - - if (INTEL_GEN(dev_priv) >= 9) - I915_WRITE(GEN6_RPNSWREQ, - GEN9_FREQUENCY(val)); - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - I915_WRITE(GEN6_RPNSWREQ, - HSW_FREQUENCY(val)); - else - I915_WRITE(GEN6_RPNSWREQ, - GEN6_FREQUENCY(val) | - GEN6_OFFSET(0) | - GEN6_AGGRESSIVE_TURBO); - } - - /* Make sure we continue to get interrupts - * until we hit the minimum or maximum frequencies. - */ - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); - I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - - rps->cur_freq = val; - trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); - - return 0; -} - -static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) -{ - int err; - - if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1), - "Odd GPU freq value\n")) - val &= ~1; - - I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - - if (val != dev_priv->gt_pm.rps.cur_freq) { - vlv_punit_get(dev_priv); - err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); - vlv_punit_put(dev_priv); - if (err) - return err; - - gen6_set_rps_thresholds(dev_priv, val); - } - - dev_priv->gt_pm.rps.cur_freq = val; - trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); - - return 0; -} - -/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down - * - * * If Gfx is Idle, then - * 1. Forcewake Media well. - * 2. Request idle freq. - * 3. Release Forcewake of Media well. -*/ -static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 val = rps->idle_freq; - int err; - - if (rps->cur_freq <= val) - return; - - /* The punit delays the write of the frequency and voltage until it - * determines the GPU is awake. During normal usage we don't want to - * waste power changing the frequency if the GPU is sleeping (rc6). - * However, the GPU and driver is now idle and we do not want to delay - * switching to minimum voltage (reducing power whilst idle) as we do - * not expect to be woken in the near future and so must flush the - * change by waking the device. - * - * We choose to take the media powerwell (either would do to trick the - * punit into committing the voltage change) as that takes a lot less - * power than the render powerwell. - */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_MEDIA); - err = valleyview_set_rps(dev_priv, val); - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_MEDIA); - - if (err) - DRM_ERROR("Failed to set RPS for idle\n"); -} - -void gen6_rps_busy(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - mutex_lock(&rps->lock); - if (rps->enabled) { - u8 freq; - - if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) - gen6_rps_reset_ei(dev_priv); - I915_WRITE(GEN6_PMINTRMSK, - gen6_rps_pm_mask(dev_priv, rps->cur_freq)); - - gen6_enable_rps_interrupts(dev_priv); - - /* Use the user's desired frequency as a guide, but for better - * performance, jump directly to RPe as our starting frequency. - */ - freq = max(rps->cur_freq, - rps->efficient_freq); - - if (intel_set_rps(dev_priv, - clamp(freq, - rps->min_freq_softlimit, - rps->max_freq_softlimit))) - DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); - } - mutex_unlock(&rps->lock); -} - -void gen6_rps_idle(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* Flush our bottom-half so that it does not race with us - * setting the idle frequency and so that it is bounded by - * our rpm wakeref. And then disable the interrupts to stop any - * futher RPS reclocking whilst we are asleep. - */ - gen6_disable_rps_interrupts(dev_priv); - - mutex_lock(&rps->lock); - if (rps->enabled) { - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - vlv_set_rps_idle(dev_priv); - else - gen6_set_rps(dev_priv, rps->idle_freq); - rps->last_adj = 0; - I915_WRITE(GEN6_PMINTRMSK, - gen6_sanitize_rps_pm_mask(dev_priv, ~0)); - } - mutex_unlock(&rps->lock); -} - -void gen6_rps_boost(struct i915_request *rq) -{ - struct intel_rps *rps = &rq->i915->gt_pm.rps; - unsigned long flags; - bool boost; - - /* This is intentionally racy! We peek at the state here, then - * validate inside the RPS worker. - */ - if (!rps->enabled) - return; - - if (i915_request_signaled(rq)) - return; - - /* Serializes with i915_request_retire() */ - boost = false; - spin_lock_irqsave(&rq->lock, flags); - if (!i915_request_has_waitboost(rq) && - !dma_fence_is_signaled_locked(&rq->fence)) { - boost = !atomic_fetch_inc(&rps->num_waiters); - rq->flags |= I915_REQUEST_WAITBOOST; - } - spin_unlock_irqrestore(&rq->lock, flags); - if (!boost) - return; - - if (READ_ONCE(rps->cur_freq) < rps->boost_freq) - schedule_work(&rps->work); - - atomic_inc(&rps->boosts); -} - -int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - int err; - - lockdep_assert_held(&rps->lock); - GEM_BUG_ON(val > rps->max_freq); - GEM_BUG_ON(val < rps->min_freq); - - if (!rps->enabled) { - rps->cur_freq = val; - return 0; - } - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - err = valleyview_set_rps(dev_priv, val); - else - err = gen6_set_rps(dev_priv, val); - - return err; -} - -static void gen9_disable_rps(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RP_CONTROL, 0); -} - -static void gen6_disable_rps(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RPNSWREQ, 1 << 31); - I915_WRITE(GEN6_RP_CONTROL, 0); -} - -static void cherryview_disable_rps(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RP_CONTROL, 0); -} - -static void valleyview_disable_rps(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RP_CONTROL, 0); -} - -static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* All of these values are in units of 50MHz */ - - /* static values from HW: RP0 > RP1 > RPn (min_freq) */ - if (IS_GEN9_LP(dev_priv)) { - u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 16) & 0xff; - rps->rp1_freq = (rp_state_cap >> 8) & 0xff; - rps->min_freq = (rp_state_cap >> 0) & 0xff; - } else { - u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 0) & 0xff; - rps->rp1_freq = (rp_state_cap >> 8) & 0xff; - rps->min_freq = (rp_state_cap >> 16) & 0xff; - } - /* hw_max = RP0 until we check for overclocking */ - rps->max_freq = rps->rp0_freq; - - rps->efficient_freq = rps->rp1_freq; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || - IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { - u32 ddcc_status = 0; - - if (sandybridge_pcode_read(dev_priv, - HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, - &ddcc_status, NULL) == 0) - rps->efficient_freq = - clamp_t(u8, - ((ddcc_status >> 8) & 0xff), - rps->min_freq, - rps->max_freq); - } - - if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { - /* Store the frequency values in 16.66 MHZ units, which is - * the natural hardware unit for SKL - */ - rps->rp0_freq *= GEN9_FREQ_SCALER; - rps->rp1_freq *= GEN9_FREQ_SCALER; - rps->min_freq *= GEN9_FREQ_SCALER; - rps->max_freq *= GEN9_FREQ_SCALER; - rps->efficient_freq *= GEN9_FREQ_SCALER; - } -} - -static void reset_rps(struct drm_i915_private *dev_priv, - int (*set)(struct drm_i915_private *, u8)) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u8 freq = rps->cur_freq; - - /* force a reset */ - rps->power.mode = -1; - rps->cur_freq = -1; - - if (set(dev_priv, freq)) - DRM_ERROR("Failed to reset RPS to initial values\n"); -} - -/* See the Gen9_GT_PM_Programming_Guide doc for the below */ -static void gen9_enable_rps(struct drm_i915_private *dev_priv) -{ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* Program defaults and thresholds for RPS */ - if (IS_GEN(dev_priv, 9)) - I915_WRITE(GEN6_RC_VIDEO_FREQ, - GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq)); - - /* 1 second timeout*/ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, - GT_INTERVAL_FROM_US(dev_priv, 1000000)); - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa); - - /* Leaning on the below call to gen6_set_rps to program/setup the - * Up/Down EI & threshold registers, as well as the RP_CONTROL, - * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ - reset_rps(dev_priv, gen6_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen8_enable_rps(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 1 Program defaults and thresholds for RPS*/ - I915_WRITE(GEN6_RPNSWREQ, - HSW_FREQUENCY(rps->rp1_freq)); - I915_WRITE(GEN6_RC_VIDEO_FREQ, - HSW_FREQUENCY(rps->rp1_freq)); - /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ - - /* Docs recommend 900MHz, and 300 MHz respectively */ - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - rps->max_freq_softlimit << 24 | - rps->min_freq_softlimit << 16); - - I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ - I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ - I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - /* 2: Enable RPS */ - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - - reset_rps(dev_priv, gen6_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen6_enable_rps(struct drm_i915_private *dev_priv) -{ - /* Here begins a magic sequence of register writes to enable - * auto-downclocking. - * - * Perhaps there might be some value in exposing these to - * userspace... - */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* Power down if completely idle for over 50ms */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - reset_rps(dev_priv, gen6_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp0; - - val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); - - switch (RUNTIME_INFO(dev_priv)->sseu.eu_total) { - case 8: - /* (2 * 4) config */ - rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT); - break; - case 12: - /* (2 * 6) config */ - rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT); - break; - case 16: - /* (2 * 8) config */ - default: - /* Setting (2 * 8) Min RP0 for any other combination */ - rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT); - break; - } - - rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK); - - return rp0; -} - -static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rpe; - - val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); - rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; - - return rpe; -} - -static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp1; - - val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); - rp1 = (val & FB_GFX_FREQ_FUSE_MASK); - - return rp1; -} - -static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rpn; - - val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE); - rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) & - FB_GFX_FREQ_FUSE_MASK); - - return rpn; -} - -static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp1; - - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); - - rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; - - return rp1; -} - -static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp0; - - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); - - rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; - /* Clamp to max */ - rp0 = min_t(u32, rp0, 0xea); - - return rp0; -} - -static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rpe; - - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); - rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); - rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; - - return rpe; -} - -static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) -{ - u32 val; - - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; - /* - * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value - * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on - * a BYT-M B0 the above register contains 0xbf. Moreover when setting - * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 - * to make sure it matches what Punit accepts. - */ - return max_t(u32, val, 0xc0); -} - -static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) -{ - dev_priv->gt_pm.rps.gpll_ref_freq = - vlv_get_cck_clock(dev_priv, "GPLL ref", - CCK_GPLL_CLOCK_CONTROL, - dev_priv->czclk_freq); - - DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", - dev_priv->gt_pm.rps.gpll_ref_freq); -} - -static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 val; - - vlv_iosf_sb_get(dev_priv, - BIT(VLV_IOSF_SB_PUNIT) | - BIT(VLV_IOSF_SB_NC) | - BIT(VLV_IOSF_SB_CCK)); - - vlv_init_gpll_ref_freq(dev_priv); - - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - switch ((val >> 6) & 3) { - case 0: - case 1: - dev_priv->mem_freq = 800; - break; - case 2: - dev_priv->mem_freq = 1066; - break; - case 3: - dev_priv->mem_freq = 1333; - break; - } - DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - - rps->max_freq = valleyview_rps_max_freq(dev_priv); - rps->rp0_freq = rps->max_freq; - DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->max_freq), - rps->max_freq); - - rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv); - DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->efficient_freq), - rps->efficient_freq); - - rps->rp1_freq = valleyview_rps_guar_freq(dev_priv); - DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->rp1_freq), - rps->rp1_freq); - - rps->min_freq = valleyview_rps_min_freq(dev_priv); - DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->min_freq), - rps->min_freq); - - vlv_iosf_sb_put(dev_priv, - BIT(VLV_IOSF_SB_PUNIT) | - BIT(VLV_IOSF_SB_NC) | - BIT(VLV_IOSF_SB_CCK)); -} - -static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 val; - - vlv_iosf_sb_get(dev_priv, - BIT(VLV_IOSF_SB_PUNIT) | - BIT(VLV_IOSF_SB_NC) | - BIT(VLV_IOSF_SB_CCK)); - - vlv_init_gpll_ref_freq(dev_priv); - - val = vlv_cck_read(dev_priv, CCK_FUSE_REG); - - switch ((val >> 2) & 0x7) { - case 3: - dev_priv->mem_freq = 2000; - break; - default: - dev_priv->mem_freq = 1600; - break; - } - DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - - rps->max_freq = cherryview_rps_max_freq(dev_priv); - rps->rp0_freq = rps->max_freq; - DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->max_freq), - rps->max_freq); - - rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv); - DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->efficient_freq), - rps->efficient_freq); - - rps->rp1_freq = cherryview_rps_guar_freq(dev_priv); - DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->rp1_freq), - rps->rp1_freq); - - rps->min_freq = cherryview_rps_min_freq(dev_priv); - DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->min_freq), - rps->min_freq); - - vlv_iosf_sb_put(dev_priv, - BIT(VLV_IOSF_SB_PUNIT) | - BIT(VLV_IOSF_SB_NC) | - BIT(VLV_IOSF_SB_CCK)); - - WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | - rps->min_freq) & 1, - "Odd GPU freq values\n"); -} - -static void cherryview_enable_rps(struct drm_i915_private *dev_priv) -{ - u32 val; - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 1: Program defaults and thresholds for RPS*/ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); - I915_WRITE(GEN6_RP_UP_EI, 66000); - I915_WRITE(GEN6_RP_DOWN_EI, 350000); - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - /* 2: Enable RPS */ - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - - /* Setting Fixed Bias */ - vlv_punit_get(dev_priv); - - val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; - vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); - - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - - vlv_punit_put(dev_priv); - - /* RPS code assumes GPLL is used */ - WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); - - DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); - DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - - reset_rps(dev_priv, valleyview_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void valleyview_enable_rps(struct drm_i915_private *dev_priv) -{ - u32 val; - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); - I915_WRITE(GEN6_RP_UP_EI, 66000); - I915_WRITE(GEN6_RP_DOWN_EI, 350000); - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_CONT); - - vlv_punit_get(dev_priv); - - /* Setting Fixed Bias */ - val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; - vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); - - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - - vlv_punit_put(dev_priv); - - /* RPS code assumes GPLL is used */ - WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); - - DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); - DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - - reset_rps(dev_priv, valleyview_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static unsigned long intel_pxfreq(u32 vidfreq) -{ - unsigned long freq; - int div = (vidfreq & 0x3f0000) >> 16; - int post = (vidfreq & 0x3000) >> 12; - int pre = (vidfreq & 0x7); - - if (!pre) - return 0; - - freq = ((div * 133333) / ((1<ips.last_time1; - - /* Prevent division-by-zero if we are asking too fast. - * Also, we don't get interesting results if we are polling - * faster than once in 10ms, so just return the saved value - * in such cases. - */ - if (diff1 <= 10) - return dev_priv->ips.chipset_power; - - count1 = I915_READ(DMIEC); - count2 = I915_READ(DDREC); - count3 = I915_READ(CSIEC); - - total_count = count1 + count2 + count3; - - /* FIXME: handle per-counter overflow */ - if (total_count < dev_priv->ips.last_count1) { - diff = ~0UL - dev_priv->ips.last_count1; - diff += total_count; - } else { - diff = total_count - dev_priv->ips.last_count1; - } - - for (i = 0; i < ARRAY_SIZE(cparams); i++) { - if (cparams[i].i == dev_priv->ips.c_m && - cparams[i].t == dev_priv->ips.r_t) { - m = cparams[i].m; - c = cparams[i].c; - break; - } - } - - diff = div_u64(diff, diff1); - ret = ((m * diff) + c); - ret = div_u64(ret, 10); - - dev_priv->ips.last_count1 = total_count; - dev_priv->ips.last_time1 = now; - - dev_priv->ips.chipset_power = ret; - - return ret; -} - -unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) -{ - intel_wakeref_t wakeref; - unsigned long val = 0; - - if (!IS_GEN(dev_priv, 5)) - return 0; - - with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { - spin_lock_irq(&mchdev_lock); - val = __i915_chipset_val(dev_priv); - spin_unlock_irq(&mchdev_lock); - } - - return val; -} - -unsigned long i915_mch_val(struct drm_i915_private *i915) -{ - unsigned long m, x, b; - u32 tsfs; - - tsfs = intel_uncore_read(&i915->uncore, TSFS); - - m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); - x = intel_uncore_read8(&i915->uncore, TR1); - - b = tsfs & TSFS_INTR_MASK; - - return ((m * x) / 127) - b; -} - -static int _pxvid_to_vd(u8 pxvid) -{ - if (pxvid == 0) - return 0; - - if (pxvid >= 8 && pxvid < 31) - pxvid = 31; - - return (pxvid + 2) * 125; -} - -static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) -{ - const int vd = _pxvid_to_vd(pxvid); - const int vm = vd - 1125; - - if (INTEL_INFO(dev_priv)->is_mobile) - return vm > 0 ? vm : 0; - - return vd; -} - -static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) -{ - u64 now, diff, diffms; - u32 count; - - lockdep_assert_held(&mchdev_lock); - - now = ktime_get_raw_ns(); - diffms = now - dev_priv->ips.last_time2; - do_div(diffms, NSEC_PER_MSEC); - - /* Don't divide by 0 */ - if (!diffms) - return; - - count = I915_READ(GFXEC); - - if (count < dev_priv->ips.last_count2) { - diff = ~0UL - dev_priv->ips.last_count2; - diff += count; - } else { - diff = count - dev_priv->ips.last_count2; - } - - dev_priv->ips.last_count2 = count; - dev_priv->ips.last_time2 = now; - - /* More magic constants... */ - diff = diff * 1181; - diff = div_u64(diff, diffms * 10); - dev_priv->ips.gfx_power = diff; -} - -void i915_update_gfx_val(struct drm_i915_private *dev_priv) -{ - intel_wakeref_t wakeref; - - if (!IS_GEN(dev_priv, 5)) - return; - - with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { - spin_lock_irq(&mchdev_lock); - __i915_update_gfx_val(dev_priv); - spin_unlock_irq(&mchdev_lock); - } -} - -static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) -{ - unsigned long t, corr, state1, corr2, state2; - u32 pxvid, ext_v; - - lockdep_assert_held(&mchdev_lock); - - pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq)); - pxvid = (pxvid >> 24) & 0x7f; - ext_v = pvid_to_extvid(dev_priv, pxvid); - - state1 = ext_v; - - t = i915_mch_val(dev_priv); - - /* Revel in the empirically derived constants */ - - /* Correction factor in 1/100000 units */ - if (t > 80) - corr = ((t * 2349) + 135940); - else if (t >= 50) - corr = ((t * 964) + 29317); - else /* < 50 */ - corr = ((t * 301) + 1004); - - corr = corr * ((150142 * state1) / 10000 - 78642); - corr /= 100000; - corr2 = (corr * dev_priv->ips.corr); - - state2 = (corr2 * state1) / 10000; - state2 /= 100; /* convert to mW */ - - __i915_update_gfx_val(dev_priv); - - return dev_priv->ips.gfx_power + state2; -} - -unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) -{ - intel_wakeref_t wakeref; - unsigned long val = 0; - - if (!IS_GEN(dev_priv, 5)) - return 0; - - with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { - spin_lock_irq(&mchdev_lock); - val = __i915_gfx_val(dev_priv); - spin_unlock_irq(&mchdev_lock); - } - - return val; -} - -static struct drm_i915_private __rcu *i915_mch_dev; - -static struct drm_i915_private *mchdev_get(void) -{ - struct drm_i915_private *i915; - - rcu_read_lock(); - i915 = rcu_dereference(i915_mch_dev); - if (!kref_get_unless_zero(&i915->drm.ref)) - i915 = NULL; - rcu_read_unlock(); - - return i915; -} - -/** - * i915_read_mch_val - return value for IPS use - * - * Calculate and return a value for the IPS driver to use when deciding whether - * we have thermal and power headroom to increase CPU or GPU power budget. - */ -unsigned long i915_read_mch_val(void) -{ - struct drm_i915_private *i915; - unsigned long chipset_val = 0; - unsigned long graphics_val = 0; - intel_wakeref_t wakeref; - - i915 = mchdev_get(); - if (!i915) - return 0; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - spin_lock_irq(&mchdev_lock); - chipset_val = __i915_chipset_val(i915); - graphics_val = __i915_gfx_val(i915); - spin_unlock_irq(&mchdev_lock); - } - - drm_dev_put(&i915->drm); - return chipset_val + graphics_val; -} -EXPORT_SYMBOL_GPL(i915_read_mch_val); - -/** - * i915_gpu_raise - raise GPU frequency limit - * - * Raise the limit; IPS indicates we have thermal headroom. - */ -bool i915_gpu_raise(void) -{ - struct drm_i915_private *i915; - - i915 = mchdev_get(); - if (!i915) - return false; - - spin_lock_irq(&mchdev_lock); - if (i915->ips.max_delay > i915->ips.fmax) - i915->ips.max_delay--; - spin_unlock_irq(&mchdev_lock); - - drm_dev_put(&i915->drm); - return true; -} -EXPORT_SYMBOL_GPL(i915_gpu_raise); - -/** - * i915_gpu_lower - lower GPU frequency limit - * - * IPS indicates we're close to a thermal limit, so throttle back the GPU - * frequency maximum. - */ -bool i915_gpu_lower(void) -{ - struct drm_i915_private *i915; - - i915 = mchdev_get(); - if (!i915) - return false; - - spin_lock_irq(&mchdev_lock); - if (i915->ips.max_delay < i915->ips.min_delay) - i915->ips.max_delay++; - spin_unlock_irq(&mchdev_lock); - - drm_dev_put(&i915->drm); - return true; -} -EXPORT_SYMBOL_GPL(i915_gpu_lower); - -/** - * i915_gpu_busy - indicate GPU business to IPS - * - * Tell the IPS driver whether or not the GPU is busy. - */ -bool i915_gpu_busy(void) -{ - struct drm_i915_private *i915; - bool ret; - - i915 = mchdev_get(); - if (!i915) - return false; - - ret = i915->gt.awake; - - drm_dev_put(&i915->drm); - return ret; -} -EXPORT_SYMBOL_GPL(i915_gpu_busy); - -/** - * i915_gpu_turbo_disable - disable graphics turbo - * - * Disable graphics turbo by resetting the max frequency and setting the - * current frequency to the default. - */ -bool i915_gpu_turbo_disable(void) -{ - struct drm_i915_private *i915; - bool ret; - - i915 = mchdev_get(); - if (!i915) - return false; - - spin_lock_irq(&mchdev_lock); - i915->ips.max_delay = i915->ips.fstart; - ret = ironlake_set_drps(i915, i915->ips.fstart); - spin_unlock_irq(&mchdev_lock); - - drm_dev_put(&i915->drm); - return ret; -} -EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); - -/** - * Tells the intel_ips driver that the i915 driver is now loaded, if - * IPS got loaded first. - * - * This awkward dance is so that neither module has to depend on the - * other in order for IPS to do the appropriate communication of - * GPU turbo limits to i915. - */ -static void -ips_ping_for_i915_load(void) -{ - void (*link)(void); - - link = symbol_get(ips_link_to_i915_driver); - if (link) { - link(); - symbol_put(ips_link_to_i915_driver); - } -} - -void intel_gpu_ips_init(struct drm_i915_private *dev_priv) -{ - /* We only register the i915 ips part with intel-ips once everything is - * set up, to avoid intel-ips sneaking in and reading bogus values. */ - rcu_assign_pointer(i915_mch_dev, dev_priv); - - ips_ping_for_i915_load(); -} - -void intel_gpu_ips_teardown(void) -{ - rcu_assign_pointer(i915_mch_dev, NULL); -} - -static void intel_init_emon(struct drm_i915_private *dev_priv) -{ - u32 lcfuse; - u8 pxw[16]; - int i; - - /* Disable to program */ - I915_WRITE(ECR, 0); - POSTING_READ(ECR); - - /* Program energy weights for various events */ - I915_WRITE(SDEW, 0x15040d00); - I915_WRITE(CSIEW0, 0x007f0000); - I915_WRITE(CSIEW1, 0x1e220004); - I915_WRITE(CSIEW2, 0x04000004); - - for (i = 0; i < 5; i++) - I915_WRITE(PEW(i), 0); - for (i = 0; i < 3; i++) - I915_WRITE(DEW(i), 0); - - /* Program P-state weights to account for frequency power adjustment */ - for (i = 0; i < 16; i++) { - u32 pxvidfreq = I915_READ(PXVFREQ(i)); - unsigned long freq = intel_pxfreq(pxvidfreq); - unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> - PXVFREQ_PX_SHIFT; - unsigned long val; - - val = vid * vid; - val *= (freq / 1000); - val *= 255; - val /= (127*127*900); - if (val > 0xff) - DRM_ERROR("bad pxval: %ld\n", val); - pxw[i] = val; - } - /* Render standby states get 0 weight */ - pxw[14] = 0; - pxw[15] = 0; - - for (i = 0; i < 4; i++) { - u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | - (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); - I915_WRITE(PXW(i), val); - } - - /* Adjust magic regs to magic values (more experimental results) */ - I915_WRITE(OGW0, 0); - I915_WRITE(OGW1, 0); - I915_WRITE(EG0, 0x00007f00); - I915_WRITE(EG1, 0x0000000e); - I915_WRITE(EG2, 0x000e0000); - I915_WRITE(EG3, 0x68000300); - I915_WRITE(EG4, 0x42000000); - I915_WRITE(EG5, 0x00140031); - I915_WRITE(EG6, 0); - I915_WRITE(EG7, 0); - - for (i = 0; i < 8; i++) - I915_WRITE(PXWL(i), 0); - - /* Enable PMON + select events */ - I915_WRITE(ECR, 0x80000019); - - lcfuse = I915_READ(LCFUSE02); - - dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); -} - -void intel_init_gt_powersave(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(dev_priv)) - mkwrite_device_info(dev_priv)->has_rps = false; - - /* Initialize RPS limits (for userspace) */ - if (IS_CHERRYVIEW(dev_priv)) - cherryview_init_gt_powersave(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_init_gt_powersave(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_init_rps_frequencies(dev_priv); - - /* Derive initial user preferences/limits from the hardware limits */ - rps->max_freq_softlimit = rps->max_freq; - rps->min_freq_softlimit = rps->min_freq; - - /* After setting max-softlimit, find the overclock max freq */ - if (IS_GEN(dev_priv, 6) || - IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { - u32 params = 0; - - sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, - ¶ms, NULL); - if (params & BIT(31)) { /* OC supported */ - DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", - (rps->max_freq & 0xff) * 50, - (params & 0xff) * 50); - rps->max_freq = params & 0xff; - } - } - - /* Finally allow us to boost to max by default */ - rps->boost_freq = rps->max_freq; - rps->idle_freq = rps->min_freq; - rps->cur_freq = rps->idle_freq; -} - -void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) -{ - dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */ - intel_disable_gt_powersave(dev_priv); - - if (INTEL_GEN(dev_priv) >= 11) - gen11_reset_rps_interrupts(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_reset_rps_interrupts(dev_priv); -} - -static void intel_disable_rps(struct drm_i915_private *dev_priv) -{ - lockdep_assert_held(&dev_priv->gt_pm.rps.lock); - - if (!dev_priv->gt_pm.rps.enabled) - return; - - if (INTEL_GEN(dev_priv) >= 9) - gen9_disable_rps(dev_priv); - else if (IS_CHERRYVIEW(dev_priv)) - cherryview_disable_rps(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_disable_rps(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_disable_rps(dev_priv); - else if (IS_IRONLAKE_M(dev_priv)) - ironlake_disable_drps(dev_priv); - - dev_priv->gt_pm.rps.enabled = false; -} - -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) -{ - mutex_lock(&dev_priv->gt_pm.rps.lock); - - intel_disable_rps(dev_priv); - if (HAS_LLC(dev_priv)) - intel_llc_disable(&dev_priv->gt.llc); - - mutex_unlock(&dev_priv->gt_pm.rps.lock); -} - -static void intel_enable_rps(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - lockdep_assert_held(&rps->lock); - - if (rps->enabled) - return; - - if (IS_CHERRYVIEW(dev_priv)) { - cherryview_enable_rps(dev_priv); - } else if (IS_VALLEYVIEW(dev_priv)) { - valleyview_enable_rps(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 9) { - gen9_enable_rps(dev_priv); - } else if (IS_BROADWELL(dev_priv)) { - gen8_enable_rps(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 6) { - gen6_enable_rps(dev_priv); - } else if (IS_IRONLAKE_M(dev_priv)) { - ironlake_enable_drps(dev_priv); - intel_init_emon(dev_priv); - } - - WARN_ON(rps->max_freq < rps->min_freq); - WARN_ON(rps->idle_freq > rps->max_freq); - - WARN_ON(rps->efficient_freq < rps->min_freq); - WARN_ON(rps->efficient_freq > rps->max_freq); - - rps->enabled = true; -} - -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) -{ - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(dev_priv)) - return; - - mutex_lock(&dev_priv->gt_pm.rps.lock); - - if (HAS_RPS(dev_priv)) - intel_enable_rps(dev_priv); - - intel_llc_enable(&dev_priv->gt.llc); - - mutex_unlock(&dev_priv->gt_pm.rps.lock); -} - static void ibx_init_clock_gating(struct drm_i915_private *dev_priv) { /* @@ -8855,90 +7224,8 @@ void intel_init_pm(struct drm_i915_private *dev_priv) } } -static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* - * N = val - 0xb7 - * Slow = Fast = GPLL ref * N - */ - return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); -} - -static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; -} - -static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* - * N = val / 2 - * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 - */ - return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); -} - -static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* CHV needs even values */ - return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; -} - -int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) -{ - if (INTEL_GEN(dev_priv) >= 9) - return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, - GEN9_FREQ_SCALER); - else if (IS_CHERRYVIEW(dev_priv)) - return chv_gpu_freq(dev_priv, val); - else if (IS_VALLEYVIEW(dev_priv)) - return byt_gpu_freq(dev_priv, val); - else - return val * GT_FREQUENCY_MULTIPLIER; -} - -int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) -{ - if (INTEL_GEN(dev_priv) >= 9) - return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, - GT_FREQUENCY_MULTIPLIER); - else if (IS_CHERRYVIEW(dev_priv)) - return chv_freq_opcode(dev_priv, val); - else if (IS_VALLEYVIEW(dev_priv)) - return byt_freq_opcode(dev_priv, val); - else - return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); -} - void intel_pm_setup(struct drm_i915_private *dev_priv) { - mutex_init(&dev_priv->gt_pm.rps.lock); - mutex_init(&dev_priv->gt_pm.rps.power.mutex); - - atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0); - dev_priv->runtime_pm.suspended = false; atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); } - -u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat) -{ - u32 cagf; - - if (INTEL_GEN(dev_priv) >= 9) - cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; - else - cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; - - return cagf; -} diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 00a5801dfc06..b579c724b915 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -29,15 +29,6 @@ void intel_update_watermarks(struct intel_crtc *crtc); void intel_init_pm(struct drm_i915_private *dev_priv); void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv); void intel_pm_setup(struct drm_i915_private *dev_priv); -void intel_gpu_ips_init(struct drm_i915_private *dev_priv); -void intel_gpu_ips_teardown(void); -void intel_init_gt_powersave(struct drm_i915_private *dev_priv); -void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); -void gen6_rps_busy(struct drm_i915_private *dev_priv); -void gen6_rps_idle(struct drm_i915_private *dev_priv); -void gen6_rps_boost(struct i915_request *rq); void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv); void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv); void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv); @@ -67,19 +58,6 @@ bool ilk_disable_lp_wm(struct drm_device *dev); void intel_init_ipc(struct drm_i915_private *dev_priv); void intel_enable_ipc(struct drm_i915_private *dev_priv); -int intel_gpu_freq(struct drm_i915_private *dev_priv, int val); -int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); - -u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1); - -unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); -unsigned long i915_mch_val(struct drm_i915_private *dev_priv); -unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); -void i915_update_gfx_val(struct drm_i915_private *dev_priv); - -bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val); -int intel_set_rps(struct drm_i915_private *dev_priv, u8 val); -void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive); bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); #endif /* __INTEL_PM_H__ */ From 9fb94522ddf2d3dca0d723e53db5f3f5ffb5487b Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 24 Oct 2019 22:16:42 +0100 Subject: [PATCH 084/154] drm/i915: Extract the GuC interrupt handlers Pull the GuC interrupt handlers out of i915_irq.c. They now use the GT interrupt facilities rather than the central dispatch. Based on a patch by Chris Wilson. Signed-off-by: Andi Shyti Cc: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191024211642.7688-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 89 ++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_irq.c | 93 -------------------------- drivers/gpu/drm/i915/i915_irq.h | 9 --- 3 files changed, 89 insertions(+), 102 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 37f7bcbf7dac..f12959182182 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -4,6 +4,8 @@ */ #include "gt/intel_gt.h" +#include "gt/intel_gt_irq.h" +#include "gt/intel_gt_pm_irq.h" #include "intel_guc.h" #include "intel_guc_ads.h" #include "intel_guc_submission.h" @@ -77,6 +79,93 @@ void intel_guc_init_send_regs(struct intel_guc *guc) guc->send_regs.fw_domains = fw_domains; } +static void gen9_reset_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + assert_rpm_wakelock_held(>->i915->runtime_pm); + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_reset_iir(gt, gt->pm_guc_events); + spin_unlock_irq(>->irq_lock); +} + +static void gen9_enable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + assert_rpm_wakelock_held(>->i915->runtime_pm); + + spin_lock_irq(>->irq_lock); + if (!guc->interrupts.enabled) { + WARN_ON_ONCE(intel_uncore_read(gt->uncore, GEN8_GT_IIR(2)) & + gt->pm_guc_events); + guc->interrupts.enabled = true; + gen6_gt_pm_enable_irq(gt, gt->pm_guc_events); + } + spin_unlock_irq(>->irq_lock); +} + +static void gen9_disable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + assert_rpm_wakelock_held(>->i915->runtime_pm); + + spin_lock_irq(>->irq_lock); + guc->interrupts.enabled = false; + + gen6_gt_pm_disable_irq(gt, gt->pm_guc_events); + + spin_unlock_irq(>->irq_lock); + intel_synchronize_irq(gt->i915); + + gen9_reset_guc_interrupts(guc); +} + +static void gen11_reset_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + spin_lock_irq(>->irq_lock); + gen11_gt_reset_one_iir(gt, 0, GEN11_GUC); + spin_unlock_irq(>->irq_lock); +} + +static void gen11_enable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + spin_lock_irq(>->irq_lock); + if (!guc->interrupts.enabled) { + u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); + + WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC)); + intel_uncore_write(gt->uncore, + GEN11_GUC_SG_INTR_ENABLE, events); + intel_uncore_write(gt->uncore, + GEN11_GUC_SG_INTR_MASK, ~events); + guc->interrupts.enabled = true; + } + spin_unlock_irq(>->irq_lock); +} + +static void gen11_disable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + spin_lock_irq(>->irq_lock); + guc->interrupts.enabled = false; + + intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0); + intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0); + + spin_unlock_irq(>->irq_lock); + intel_synchronize_irq(gt->i915); + + gen11_reset_guc_interrupts(guc); +} + void intel_guc_init_early(struct intel_guc *guc) { struct drm_i915_private *i915 = guc_to_gt(guc)->i915; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8a3aed17c3b7..0a3861c873ee 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -321,99 +321,6 @@ void ilk_update_display_irq(struct drm_i915_private *dev_priv, } } -static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv) -{ - WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11); - - return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR; -} - -void gen9_reset_guc_interrupts(struct intel_guc *guc) -{ - struct intel_gt *gt = guc_to_gt(guc); - - assert_rpm_wakelock_held(gt->uncore->rpm); - - spin_lock_irq(>->irq_lock); - gen6_gt_pm_reset_iir(gt, gt->pm_guc_events); - spin_unlock_irq(>->irq_lock); -} - -void gen9_enable_guc_interrupts(struct intel_guc *guc) -{ - struct intel_gt *gt = guc_to_gt(guc); - - assert_rpm_wakelock_held(gt->uncore->rpm); - - spin_lock_irq(>->irq_lock); - if (!guc->interrupts.enabled) { - WARN_ON_ONCE(intel_uncore_read(gt->uncore, - gen6_pm_iir(gt->i915)) & - gt->pm_guc_events); - guc->interrupts.enabled = true; - gen6_gt_pm_enable_irq(gt, gt->pm_guc_events); - } - spin_unlock_irq(>->irq_lock); -} - -void gen9_disable_guc_interrupts(struct intel_guc *guc) -{ - struct intel_gt *gt = guc_to_gt(guc); - - assert_rpm_wakelock_held(gt->uncore->rpm); - - spin_lock_irq(>->irq_lock); - guc->interrupts.enabled = false; - - gen6_gt_pm_disable_irq(gt, gt->pm_guc_events); - - spin_unlock_irq(>->irq_lock); - intel_synchronize_irq(gt->i915); - - gen9_reset_guc_interrupts(guc); -} - -void gen11_reset_guc_interrupts(struct intel_guc *guc) -{ - struct intel_gt *gt = guc_to_gt(guc); - - spin_lock_irq(>->irq_lock); - gen11_gt_reset_one_iir(gt, 0, GEN11_GUC); - spin_unlock_irq(>->irq_lock); -} - -void gen11_enable_guc_interrupts(struct intel_guc *guc) -{ - struct intel_gt *gt = guc_to_gt(guc); - - spin_lock_irq(>->irq_lock); - if (!guc->interrupts.enabled) { - u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); - - WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC)); - intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, events); - intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~events); - guc->interrupts.enabled = true; - } - spin_unlock_irq(>->irq_lock); -} - -void gen11_disable_guc_interrupts(struct intel_guc *guc) -{ - struct intel_gt *gt = guc_to_gt(guc); - - spin_lock_irq(>->irq_lock); - guc->interrupts.enabled = false; - - intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0); - intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0); - - spin_unlock_irq(>->irq_lock); - intel_synchronize_irq(gt->i915); - - gen11_reset_guc_interrupts(guc); -} - /** * bdw_update_port_irq - update DE port interrupt * @dev_priv: driver private diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h index d0d91c6e00d7..812c47a9c2d6 100644 --- a/drivers/gpu/drm/i915/i915_irq.h +++ b/drivers/gpu/drm/i915/i915_irq.h @@ -17,9 +17,6 @@ struct drm_device; struct drm_display_mode; struct drm_i915_private; struct intel_crtc; -struct intel_crtc; -struct intel_gt; -struct intel_guc; struct intel_uncore; void intel_irq_init(struct drm_i915_private *dev_priv); @@ -103,12 +100,6 @@ void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, u8 pipe_mask); void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv, u8 pipe_mask); -void gen9_reset_guc_interrupts(struct intel_guc *guc); -void gen9_enable_guc_interrupts(struct intel_guc *guc); -void gen9_disable_guc_interrupts(struct intel_guc *guc); -void gen11_reset_guc_interrupts(struct intel_guc *guc); -void gen11_enable_guc_interrupts(struct intel_guc *guc); -void gen11_disable_guc_interrupts(struct intel_guc *guc); bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, bool in_vblank_irq, int *vpos, int *hpos, From 3fc794f27fec8f020907090fb866602a1c64a73c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 26 Oct 2019 21:20:32 +0100 Subject: [PATCH 085/154] drm/i915: Split memory_region initialisation into its own file Pull the memory region bookkeeping into its file. Let's start clean and see how long it lasts! Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191026202032.4371-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 15 ++++- drivers/gpu/drm/i915/i915_drv.h | 3 - drivers/gpu/drm/i915/i915_gem_gtt.c | 63 ------------------- drivers/gpu/drm/i915/intel_memory_region.c | 56 +++++++++++++++++ drivers/gpu/drm/i915/intel_memory_region.h | 3 + .../gpu/drm/i915/selftests/mock_gem_device.c | 9 ++- 6 files changed, 75 insertions(+), 74 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 355526a35d58..21273b516dbe 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -76,6 +76,7 @@ #include "i915_trace.h" #include "i915_vgpu.h" #include "intel_csr.h" +#include "intel_memory_region.h" #include "intel_pm.h" static struct drm_driver driver; @@ -1172,12 +1173,16 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) goto err_ggtt; + ret = intel_memory_regions_hw_probe(dev_priv); + if (ret) + goto err_ggtt; + intel_gt_init_hw_early(dev_priv); ret = i915_ggtt_enable_hw(dev_priv); if (ret) { DRM_ERROR("failed to enable GGTT\n"); - goto err_ggtt; + goto err_mem_regions; } pci_set_master(pdev); @@ -1194,7 +1199,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) { DRM_ERROR("failed to set DMA mask\n"); - goto err_ggtt; + goto err_mem_regions; } } @@ -1212,7 +1217,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) { DRM_ERROR("failed to set DMA mask\n"); - goto err_ggtt; + goto err_mem_regions; } } @@ -1264,6 +1269,8 @@ err_msi: if (pdev->msi_enabled) pci_disable_msi(pdev); pm_qos_remove_request(&dev_priv->pm_qos); +err_mem_regions: + intel_memory_regions_driver_release(dev_priv); err_ggtt: i915_ggtt_driver_release(dev_priv); err_perf: @@ -1510,6 +1517,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) out_cleanup_hw: i915_driver_hw_remove(dev_priv); + intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_release(dev_priv); out_cleanup_mmio: i915_driver_mmio_release(dev_priv); @@ -1567,6 +1575,7 @@ static void i915_driver_release(struct drm_device *dev) i915_gem_driver_release(dev_priv); + intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_release(dev_priv); i915_driver_mmio_release(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 96fe0e8d468a..a22d969cb352 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1916,9 +1916,6 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm); -void i915_gem_cleanup_memory_regions(struct drm_i915_private *i915); -int i915_gem_init_memory_regions(struct drm_i915_private *i915); - /* i915_gem_internal.c */ struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3d3a8db18a07..eebc7fee81e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2744,59 +2744,6 @@ int i915_init_ggtt(struct drm_i915_private *i915) return 0; } -void i915_gem_cleanup_memory_regions(struct drm_i915_private *i915) -{ - int i; - - for (i = 0; i < INTEL_REGION_UNKNOWN; i++) { - struct intel_memory_region *region = i915->mm.regions[i]; - - if (region) - intel_memory_region_put(region); - } -} - -int i915_gem_init_memory_regions(struct drm_i915_private *i915) -{ - int err, i; - - for (i = 0; i < INTEL_REGION_UNKNOWN; i++) { - struct intel_memory_region *mem = ERR_PTR(-ENODEV); - u32 type; - - if (!HAS_REGION(i915, BIT(i))) - continue; - - type = MEMORY_TYPE_FROM_REGION(intel_region_map[i]); - switch (type) { - case INTEL_MEMORY_SYSTEM: - mem = i915_gem_shmem_setup(i915); - break; - case INTEL_MEMORY_STOLEN: - mem = i915_gem_stolen_setup(i915); - break; - } - - if (IS_ERR(mem)) { - err = PTR_ERR(mem); - DRM_ERROR("Failed to setup region(%d) type=%d\n", err, type); - goto out_cleanup; - } - - mem->id = intel_region_map[i]; - mem->type = type; - mem->instance = MEMORY_INSTANCE_FROM_REGION(intel_region_map[i]); - - i915->mm.regions[i] = mem; - } - - return 0; - -out_cleanup: - i915_gem_cleanup_memory_regions(i915); - return err; -} - static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) { struct i915_vma *vma, *vn; @@ -2834,8 +2781,6 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915) { struct pagevec *pvec; - i915_gem_cleanup_memory_regions(i915); - fini_aliasing_ppgtt(&i915->ggtt); ggtt_cleanup_hw(&i915->ggtt); @@ -3311,15 +3256,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) if (ret) return ret; - ret = i915_gem_init_memory_regions(dev_priv); - if (ret) - goto out_gtt_cleanup; - return 0; - -out_gtt_cleanup: - dev_priv->ggtt.vm.cleanup(&dev_priv->ggtt.vm); - return ret; } int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 72f98a111de1..a60f77ff58d4 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -207,6 +207,62 @@ void intel_memory_region_put(struct intel_memory_region *mem) kref_put(&mem->kref, __intel_memory_region_destroy); } +/* Global memory region registration -- only slight layer inversions! */ + +int intel_memory_regions_hw_probe(struct drm_i915_private *i915) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(i915->mm.regions); i++) { + struct intel_memory_region *mem = ERR_PTR(-ENODEV); + u32 type; + + if (!HAS_REGION(i915, BIT(i))) + continue; + + type = MEMORY_TYPE_FROM_REGION(intel_region_map[i]); + switch (type) { + case INTEL_MEMORY_SYSTEM: + mem = i915_gem_shmem_setup(i915); + break; + case INTEL_MEMORY_STOLEN: + mem = i915_gem_stolen_setup(i915); + break; + } + + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + DRM_ERROR("Failed to setup region(%d) type=%d\n", err, type); + goto out_cleanup; + } + + mem->id = intel_region_map[i]; + mem->type = type; + mem->instance = MEMORY_INSTANCE_FROM_REGION(intel_region_map[i]); + + i915->mm.regions[i] = mem; + } + + return 0; + +out_cleanup: + intel_memory_regions_driver_release(i915); + return err; +} + +void intel_memory_regions_driver_release(struct drm_i915_private *i915) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(i915->mm.regions); i++) { + struct intel_memory_region *region = + fetch_and_zero(&i915->mm.regions[i]); + + if (region) + intel_memory_region_put(region); + } +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/intel_memory_region.c" #include "selftests/mock_region.c" diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 49b059a2be70..19920c256ede 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -117,4 +117,7 @@ struct intel_memory_region * intel_memory_region_get(struct intel_memory_region *mem); void intel_memory_region_put(struct intel_memory_region *mem); +int intel_memory_regions_hw_probe(struct drm_i915_private *i915); +void intel_memory_regions_driver_release(struct drm_i915_private *i915); + #endif diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index c2f5775b6d58..a0da5944dd33 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -28,6 +28,7 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_requests.h" #include "gt/mock_engine.h" +#include "intel_memory_region.h" #include "mock_request.h" #include "mock_gem_device.h" @@ -72,7 +73,7 @@ static void mock_device_release(struct drm_device *dev) mock_fini_ggtt(&i915->ggtt); destroy_workqueue(i915->wq); - i915_gem_cleanup_memory_regions(i915); + intel_memory_regions_driver_release(i915); drm_mode_config_cleanup(&i915->drm); @@ -164,6 +165,7 @@ struct drm_i915_private *mock_gem_device(void) I915_GTT_PAGE_SIZE_2M; mkwrite_device_info(i915)->memory_regions = REGION_SMEM; + intel_memory_regions_hw_probe(i915); mock_uncore_init(&i915->uncore, i915); @@ -198,10 +200,6 @@ struct drm_i915_private *mock_gem_device(void) intel_engines_driver_register(i915); - err = i915_gem_init_memory_regions(i915); - if (err) - goto err_context; - return i915; err_context: @@ -212,6 +210,7 @@ err_unlock: intel_timelines_fini(i915); destroy_workqueue(i915->wq); err_drv: + intel_memory_regions_driver_release(i915); drm_mode_config_cleanup(&i915->drm); drm_dev_fini(&i915->drm); put_device: From d9d54a530a70eee6f003bd3ade38817cf85b9325 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 27 Oct 2019 15:43:14 +0000 Subject: [PATCH 086/154] drm/i915: Put future HW and their uAPIs under STAGING & BROKEN We would like some freedom to break the user API/ABI for future HW but yet still expose the driver for upstream development on that HW. Currently, we have the i915.force_probe module parameter to avoid binding to HW while the driver is under development, but that is still a little too soft with respect to the stringent no-regression rules if we also plan to be redesigning the uAPI to go along with the new HW. To allow the uAPI to be changed during development, only expose that API and in development HW under STAGING (and BROKEN). Hopefully, making it explicit that such interfaces to that HW are under development and not to be blindly enabled by distributions. Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Joonas Lahtinen Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Dave Airlie Acked-by: Dave Airlie Acked-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20191027154314.11139-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig | 6 ++++++ drivers/gpu/drm/i915/Kconfig.debug | 1 + drivers/gpu/drm/i915/Kconfig.unstable | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 drivers/gpu/drm/i915/Kconfig.unstable diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 3c6d57df262d..ba9595960bbe 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -148,3 +148,9 @@ menu "drm/i915 Profile Guided Optimisation" depends on DRM_I915 source "drivers/gpu/drm/i915/Kconfig.profile" endmenu + +menu "drm/i915 Unstable Evolution" + visible if EXPERT && STAGING && BROKEN + depends on DRM_I915 + source "drivers/gpu/drm/i915/Kconfig.unstable" +endmenu diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index eea79125b3ea..80815a5229a1 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -36,6 +36,7 @@ config DRM_I915_DEBUG select DRM_I915_SELFTEST select DRM_I915_DEBUG_RUNTIME_PM select DRM_I915_DEBUG_MMIO + select BROKEN # for prototype uAPI default n help Choose this option to turn on extra driver debugging that may affect diff --git a/drivers/gpu/drm/i915/Kconfig.unstable b/drivers/gpu/drm/i915/Kconfig.unstable new file mode 100644 index 000000000000..cf151a297ed7 --- /dev/null +++ b/drivers/gpu/drm/i915/Kconfig.unstable @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0-only +config DRM_I915_UNSTABLE + bool "Enable unstable API for early prototype development" + depends on EXPERT + depends on STAGING + depends on BROKEN # should never be enabled by distros! + # We use the dependency on !COMPILE_TEST to not be enabled in + # allmodconfig or allyesconfig configurations + depends on !COMPILE_TEST + default n + help + Enable prototype uAPI under general discussion before they are + finalized. Such prototypes may be withdrawn or substantially + changed before release. They are only enabled here so that a wide + number of interested parties (userspace driver developers) can + verify that the uAPI meet their expectations. These uAPI should + never be used in production. + + Recommended for driver developers _only_. + + If in the slightest bit of doubt, say "N". From dd095afc88d557f11e5c0613848d97568d675fb7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 26 Oct 2019 21:09:17 +0100 Subject: [PATCH 087/154] drm/i915/rps: Flip interpretation of ips fmin/fmax to max rps ips uses clock delays as opposed to rps frequency bins. To fit the delays into the same rps calculations, we need to invert the ips delays. Fixes: 3e7abf814193 ("drm/i915: Extract GT render power state management") Signed-off-by: Chris Wilson Cc: Andi Shyti Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20191026200917.1780-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_rps.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 30f56c786468..032a0c6389f9 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -180,8 +180,8 @@ static void gen5_rps_init(struct intel_rps *rps) DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", fmax, fmin, fstart); - rps->min_freq = -fstart; - rps->max_freq = -fmin; + rps->min_freq = fmax; + rps->max_freq = fmin; rps->idle_freq = rps->min_freq; rps->cur_freq = rps->idle_freq; @@ -307,7 +307,9 @@ static bool gen5_rps_set(struct intel_rps *rps, u8 val) return false; /* still busy with another command */ } - val = -val; + /* Invert the frequency bin into an ips delay */ + val = rps->max_freq - val; + val = rps->min_freq + val; rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | From 12c4d4c18c5dcc971dc4946cbceb7174320bd978 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 24 Oct 2019 15:21:36 +0300 Subject: [PATCH 088/154] drm/i915: Use _PICK() for CHICKEN_TRANS() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make CHICKEN_TRANS() a bit less special looking by using _PICK(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191024122138.25065-1-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_ddi.c | 14 +++++++------- drivers/gpu/drm/i915/display/intel_psr.c | 22 +--------------------- drivers/gpu/drm/i915/i915_reg.h | 13 +++++++++---- 3 files changed, 17 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index c596b1f74fee..281594bcbfae 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3872,12 +3872,12 @@ static i915_reg_t gen9_chicken_trans_reg_by_port(struct drm_i915_private *dev_priv, enum port port) { - static const i915_reg_t regs[] = { - [PORT_A] = CHICKEN_TRANS_EDP, - [PORT_B] = CHICKEN_TRANS_A, - [PORT_C] = CHICKEN_TRANS_B, - [PORT_D] = CHICKEN_TRANS_C, - [PORT_E] = CHICKEN_TRANS_A, + static const enum transcoder trans[] = { + [PORT_A] = TRANSCODER_EDP, + [PORT_B] = TRANSCODER_A, + [PORT_C] = TRANSCODER_B, + [PORT_D] = TRANSCODER_C, + [PORT_E] = TRANSCODER_A, }; WARN_ON(INTEL_GEN(dev_priv) < 9); @@ -3885,7 +3885,7 @@ gen9_chicken_trans_reg_by_port(struct drm_i915_private *dev_priv, if (WARN_ON(port < PORT_A || port > PORT_E)) port = PORT_A; - return regs[port]; + return CHICKEN_TRANS(trans[port]); } static void intel_enable_ddi_hdmi(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 5d3cdd7d4a8d..6a9f322d3fca 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -740,25 +740,6 @@ static void intel_psr_activate(struct intel_dp *intel_dp) dev_priv->psr.active = true; } -static i915_reg_t gen9_chicken_trans_reg(struct drm_i915_private *dev_priv, - enum transcoder cpu_transcoder) -{ - static const i915_reg_t regs[] = { - [TRANSCODER_A] = CHICKEN_TRANS_A, - [TRANSCODER_B] = CHICKEN_TRANS_B, - [TRANSCODER_C] = CHICKEN_TRANS_C, - [TRANSCODER_EDP] = CHICKEN_TRANS_EDP, - }; - - WARN_ON(INTEL_GEN(dev_priv) < 9); - - if (WARN_ON(cpu_transcoder >= ARRAY_SIZE(regs) || - !regs[cpu_transcoder].reg)) - cpu_transcoder = TRANSCODER_A; - - return regs[cpu_transcoder]; -} - static void intel_psr_enable_source(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { @@ -774,8 +755,7 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, if (dev_priv->psr.psr2_enabled && (IS_GEN(dev_priv, 9) && !IS_GEMINILAKE(dev_priv))) { - i915_reg_t reg = gen9_chicken_trans_reg(dev_priv, - cpu_transcoder); + i915_reg_t reg = CHICKEN_TRANS(cpu_transcoder); u32 chicken = I915_READ(reg); chicken |= PSR2_VSC_ENABLE_PROG_HEADER | diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7f04b6689ee0..446bebf3236c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7627,10 +7627,15 @@ enum { #define BDW_DPRS_MASK_VBLANK_SRD (1 << 0) #define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B) -#define CHICKEN_TRANS_A _MMIO(0x420c0) -#define CHICKEN_TRANS_B _MMIO(0x420c4) -#define CHICKEN_TRANS_C _MMIO(0x420c8) -#define CHICKEN_TRANS_EDP _MMIO(0x420cc) +#define _CHICKEN_TRANS_A 0x420c0 +#define _CHICKEN_TRANS_B 0x420c4 +#define _CHICKEN_TRANS_C 0x420c8 +#define _CHICKEN_TRANS_EDP 0x420cc +#define CHICKEN_TRANS(trans) _MMIO(_PICK((trans), \ + [TRANSCODER_EDP] = _CHICKEN_TRANS_EDP, \ + [TRANSCODER_A] = _CHICKEN_TRANS_A, \ + [TRANSCODER_B] = _CHICKEN_TRANS_B, \ + [TRANSCODER_C] = _CHICKEN_TRANS_C)) #define VSC_DATA_SEL_SOFTWARE_CONTROL (1 << 25) /* GLK and CNL+ */ #define DDI_TRAINING_OVERRIDE_ENABLE (1 << 19) #define DDI_TRAINING_OVERRIDE_VALUE (1 << 18) From 1d581dc3f50555a36219a192c9ea4d46abc40e70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 24 Oct 2019 15:21:37 +0300 Subject: [PATCH 089/154] drm/i915: Add CHICKEN_TRANS_D MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add CHICKEN_TRANS definition for transcoder D. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191024122138.25065-2-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/i915_reg.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 446bebf3236c..60dfad33f9ee 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7631,11 +7631,13 @@ enum { #define _CHICKEN_TRANS_B 0x420c4 #define _CHICKEN_TRANS_C 0x420c8 #define _CHICKEN_TRANS_EDP 0x420cc +#define _CHICKEN_TRANS_D 0x420d8 #define CHICKEN_TRANS(trans) _MMIO(_PICK((trans), \ [TRANSCODER_EDP] = _CHICKEN_TRANS_EDP, \ [TRANSCODER_A] = _CHICKEN_TRANS_A, \ [TRANSCODER_B] = _CHICKEN_TRANS_B, \ - [TRANSCODER_C] = _CHICKEN_TRANS_C)) + [TRANSCODER_C] = _CHICKEN_TRANS_C, \ + [TRANSCODER_D] = _CHICKEN_TRANS_D)) #define VSC_DATA_SEL_SOFTWARE_CONTROL (1 << 25) /* GLK and CNL+ */ #define DDI_TRAINING_OVERRIDE_ENABLE (1 << 19) #define DDI_TRAINING_OVERRIDE_VALUE (1 << 18) From 39f9547a339a261c74f845017654ced1de116b5f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Oct 2019 11:22:07 +0000 Subject: [PATCH 090/154] drm/i915/selftests: Measure basic throughput of blit routines We need to verify that our blitter routines perform as expected, so measure it. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191028112207.5464-1-chris@chris-wilson.co.uk --- .../i915/gem/selftests/i915_gem_object_blt.c | 172 ++++++++++++++++++ 1 file changed, 172 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index 9666c0aeb6de..ac40c704514e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -3,7 +3,10 @@ * Copyright © 2019 Intel Corporation */ +#include + #include "gt/intel_gt.h" +#include "gt/intel_engine_user.h" #include "i915_selftest.h" @@ -14,6 +17,173 @@ #include "huge_gem_object.h" #include "mock_context.h" +static int wrap_ktime_compare(const void *A, const void *B) +{ + const ktime_t *a = A, *b = B; + + return ktime_compare(*a, *b); +} + +static int __perf_fill_blt(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + int inst = 0; + + do { + struct intel_engine_cs *engine; + ktime_t t[5]; + int pass; + int err; + + engine = intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_COPY, + inst++); + if (!engine) + return 0; + + for (pass = 0; pass < ARRAY_SIZE(t); pass++) { + struct intel_context *ce = engine->kernel_context; + ktime_t t0, t1; + + t0 = ktime_get(); + + err = i915_gem_object_fill_blt(obj, ce, 0); + if (err) + return err; + + err = i915_gem_object_wait(obj, + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + t1 = ktime_get(); + t[pass] = ktime_sub(t1, t0); + } + + sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + pr_info("%s: blt %zd KiB fill: %lld MiB/s\n", + engine->name, + obj->base.size >> 10, + div64_u64(mul_u32_u32(4 * obj->base.size, + 1000 * 1000 * 1000), + t[1] + 2 * t[2] + t[3]) >> 20); + } while (1); +} + +static int perf_fill_blt(void *arg) +{ + struct drm_i915_private *i915 = arg; + static const unsigned long sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + SZ_64M + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_internal(i915, sizes[i]); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __perf_fill_blt(obj); + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + +static int __perf_copy_blt(struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst) +{ + struct drm_i915_private *i915 = to_i915(src->base.dev); + int inst = 0; + + do { + struct intel_engine_cs *engine; + ktime_t t[5]; + int pass; + + engine = intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_COPY, + inst++); + if (!engine) + return 0; + + for (pass = 0; pass < ARRAY_SIZE(t); pass++) { + struct intel_context *ce = engine->kernel_context; + ktime_t t0, t1; + int err; + + t0 = ktime_get(); + + err = i915_gem_object_copy_blt(src, dst, ce); + if (err) + return err; + + err = i915_gem_object_wait(dst, + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + t1 = ktime_get(); + t[pass] = ktime_sub(t1, t0); + } + + sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + pr_info("%s: blt %zd KiB copy: %lld MiB/s\n", + engine->name, + src->base.size >> 10, + div64_u64(mul_u32_u32(4 * src->base.size, + 1000 * 1000 * 1000), + t[1] + 2 * t[2] + t[3]) >> 20); + } while (1); +} + +static int perf_copy_blt(void *arg) +{ + struct drm_i915_private *i915 = arg; + static const unsigned long sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + SZ_64M + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *src, *dst; + int err; + + src = i915_gem_object_create_internal(i915, sizes[i]); + if (IS_ERR(src)) + return PTR_ERR(src); + + dst = i915_gem_object_create_internal(i915, sizes[i]); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto err_src; + } + + err = __perf_copy_blt(src, dst); + + i915_gem_object_put(dst); +err_src: + i915_gem_object_put(src); + if (err) + return err; + } + + return 0; +} + struct igt_thread_arg { struct drm_i915_private *i915; struct rnd_state prng; @@ -335,6 +505,8 @@ static int igt_copy_blt(void *arg) int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { + SUBTEST(perf_fill_blt), + SUBTEST(perf_copy_blt), SUBTEST(igt_fill_blt), SUBTEST(igt_copy_blt), }; From 1f9f6353e8b83676bbf23c2bbb07d447563e5e83 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 27 Oct 2019 22:58:04 +0000 Subject: [PATCH 091/154] drm/i915/selftests: Drop global engine lookup for gt selftests As we are inside the gt, we have a local gt->engine[] lookup we should be using in preference over the i915->engine[] copy. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191027225808.19437-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index ba761fcf397b..b7207b488391 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1016,7 +1016,7 @@ static int igt_reset_wait(void *arg) { struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine = gt->i915->engine[RCS0]; + struct intel_engine_cs *engine = gt->engine[RCS0]; struct i915_request *rq; unsigned int reset_count; struct hang h; @@ -1143,7 +1143,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, int (*fn)(void *), unsigned int flags) { - struct intel_engine_cs *engine = gt->i915->engine[RCS0]; + struct intel_engine_cs *engine = gt->engine[RCS0]; struct drm_i915_gem_object *obj; struct task_struct *tsk = NULL; struct i915_request *rq; @@ -1493,7 +1493,7 @@ static int igt_handle_error(void *arg) { struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine = gt->i915->engine[RCS0]; + struct intel_engine_cs *engine = gt->engine[RCS0]; struct hang h; struct i915_request *rq; struct i915_gpu_state *error; From 52aac377e782d0e082fba76ede20ad00000a768b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 27 Oct 2019 22:58:06 +0000 Subject: [PATCH 092/154] drm/i915/selftests: Check all blitter engines for client blt Check all user accessible engines that can blit work with our blitter client. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191027225808.19437-3-chris@chris-wilson.co.uk --- .../i915/gem/selftests/i915_gem_client_blt.c | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index d8804a847945..da8edee4fe0a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -5,6 +5,7 @@ #include "i915_selftest.h" +#include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "selftests/igt_flush_test.h" @@ -12,10 +13,9 @@ #include "huge_gem_object.h" #include "mock_context.h" -static int igt_client_fill(void *arg) +static int __igt_client_fill(struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = arg; - struct intel_context *ce = i915->engine[BCS0]->kernel_context; + struct intel_context *ce = engine->kernel_context; struct drm_i915_gem_object *obj; struct rnd_state prng; IGT_TIMEOUT(end); @@ -37,7 +37,7 @@ static int igt_client_fill(void *arg) pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, phys_sz, sz, val); - obj = huge_gem_object(i915, phys_sz, sz); + obj = huge_gem_object(engine->i915, phys_sz, sz); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_flush; @@ -103,6 +103,28 @@ err_flush: return err; } +static int igt_client_fill(void *arg) +{ + int inst = 0; + + do { + struct intel_engine_cs *engine; + int err; + + engine = intel_engine_lookup_user(arg, + I915_ENGINE_CLASS_COPY, + inst++); + if (!engine) + return 0; + + err = __igt_client_fill(engine); + if (err == -ENOMEM) + err = 0; + if (err) + return err; + } while (1); +} + int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { From 96815f3d8b50542ab168d94f018461cfae93eae1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 24 Oct 2019 10:56:08 +0300 Subject: [PATCH 093/154] drm/i915/bios: add compression parameter block definition Add definition for block 56, the compression parameters. v2: add missing slice_height (Vandita) Cc: Vandita Kulkarni Reviewed-by: Vandita Kulkarni Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191024075608.11511-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_vbt_defs.h | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index e3045ced4bfe..69a7cb1fa121 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -114,6 +114,7 @@ enum bdb_block_id { BDB_LVDS_POWER = 44, BDB_MIPI_CONFIG = 52, BDB_MIPI_SEQUENCE = 53, + BDB_COMPRESSION_PARAMETERS = 56, BDB_SKIP = 254, /* VBIOS private block, ignore */ }; @@ -811,4 +812,55 @@ struct bdb_mipi_sequence { u8 data[0]; /* up to 6 variable length blocks */ } __packed; +/* + * Block 56 - Compression Parameters + */ + +#define VBT_RC_BUFFER_BLOCK_SIZE_1KB 0 +#define VBT_RC_BUFFER_BLOCK_SIZE_4KB 1 +#define VBT_RC_BUFFER_BLOCK_SIZE_16KB 2 +#define VBT_RC_BUFFER_BLOCK_SIZE_64KB 3 + +#define VBT_DSC_LINE_BUFFER_DEPTH(vbt_value) ((vbt_value) + 8) /* bits */ +#define VBT_DSC_MAX_BPP(vbt_value) (6 + (vbt_value) * 2) + +struct dsc_compression_parameters_entry { + u8 version_major:4; + u8 version_minor:4; + + u8 rc_buffer_block_size:2; + u8 reserved1:6; + + /* + * Buffer size in bytes: + * + * 4 ^ rc_buffer_block_size * 1024 * (rc_buffer_size + 1) bytes + */ + u8 rc_buffer_size; + u32 slices_per_line; + + u8 line_buffer_depth:4; + u8 reserved2:4; + + /* Flag Bits 1 */ + u8 block_prediction_enable:1; + u8 reserved3:7; + + u8 max_bpp; /* mapping */ + + /* Color depth capabilities */ + u8 reserved4:1; + u8 support_8bpc:1; + u8 support_10bpc:1; + u8 support_12bpc:1; + u8 reserved5:4; + + u16 slice_height; +} __packed; + +struct bdb_compression_parameters { + u16 entry_size; + struct dsc_compression_parameters_entry data[16]; +} __packed; + #endif /* _INTEL_VBT_DEFS_H_ */ From c8c197d42646c2a9ebc0ff114b4e916d12208d09 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 27 Oct 2019 22:58:07 +0000 Subject: [PATCH 094/154] drm/i915/selftests: Use a random engine for GEM coherency tests Select a random user accessible engine for checking coherency results. While we should check all engines, we use a random selection so that over repeated runs we cover all. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191027225808.19437-4-chris@chris-wilson.co.uk --- .../i915/gem/selftests/i915_gem_coherency.c | 168 ++++++++++-------- 1 file changed, 89 insertions(+), 79 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 0877ef4dff63..0a195e5b98e6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -13,9 +13,12 @@ #include "i915_selftest.h" #include "selftests/i915_random.h" -static int cpu_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +struct context { + struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine; +}; + +static int cpu_set(struct context *ctx, unsigned long offset, u32 v) { unsigned int needs_clflush; struct page *page; @@ -23,11 +26,11 @@ static int cpu_set(struct drm_i915_gem_object *obj, u32 *cpu; int err; - err = i915_gem_object_prepare_write(obj, &needs_clflush); + err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush); if (err) return err; - page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); cpu = map + offset_in_page(offset); @@ -40,14 +43,12 @@ static int cpu_set(struct drm_i915_gem_object *obj, drm_clflush_virt_range(cpu, sizeof(*cpu)); kunmap_atomic(map); - i915_gem_object_finish_access(obj); + i915_gem_object_finish_access(ctx->obj); return 0; } -static int cpu_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) +static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) { unsigned int needs_clflush; struct page *page; @@ -55,11 +56,11 @@ static int cpu_get(struct drm_i915_gem_object *obj, u32 *cpu; int err; - err = i915_gem_object_prepare_read(obj, &needs_clflush); + err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush); if (err) return err; - page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); cpu = map + offset_in_page(offset); @@ -69,26 +70,24 @@ static int cpu_get(struct drm_i915_gem_object *obj, *v = *cpu; kunmap_atomic(map); - i915_gem_object_finish_access(obj); + i915_gem_object_finish_access(ctx->obj); return 0; } -static int gtt_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +static int gtt_set(struct context *ctx, unsigned long offset, u32 v) { struct i915_vma *vma; u32 __iomem *map; int err = 0; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); + i915_gem_object_unlock(ctx->obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -109,21 +108,19 @@ out_rpm: return err; } -static int gtt_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) +static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) { struct i915_vma *vma; u32 __iomem *map; int err = 0; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, false); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_gtt_domain(ctx->obj, false); + i915_gem_object_unlock(ctx->obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -144,73 +141,66 @@ out_rpm: return err; } -static int wc_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +static int wc_set(struct context *ctx, unsigned long offset, u32 v) { u32 *map; int err; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_wc_domain(obj, true); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_wc_domain(ctx->obj, true); + i915_gem_object_unlock(ctx->obj); if (err) return err; - map = i915_gem_object_pin_map(obj, I915_MAP_WC); + map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); map[offset / sizeof(*map)] = v; - i915_gem_object_unpin_map(obj); + i915_gem_object_unpin_map(ctx->obj); return 0; } -static int wc_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) +static int wc_get(struct context *ctx, unsigned long offset, u32 *v) { u32 *map; int err; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_wc_domain(obj, false); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_wc_domain(ctx->obj, false); + i915_gem_object_unlock(ctx->obj); if (err) return err; - map = i915_gem_object_pin_map(obj, I915_MAP_WC); + map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); *v = map[offset / sizeof(*map)]; - i915_gem_object_unpin_map(obj); + i915_gem_object_unpin_map(ctx->obj); return 0; } -static int gpu_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +static int gpu_set(struct context *ctx, unsigned long offset, u32 v) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_request *rq; struct i915_vma *vma; u32 *cs; int err; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); + i915_gem_object_unlock(ctx->obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_request_create(i915->engine[RCS0]->kernel_context); + rq = i915_request_create(ctx->engine->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); @@ -223,12 +213,12 @@ static int gpu_set(struct drm_i915_gem_object *obj, return PTR_ERR(cs); } - if (INTEL_GEN(i915) >= 8) { + if (INTEL_GEN(ctx->engine->i915) >= 8) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = v; - } else if (INTEL_GEN(i915) >= 4) { + } else if (INTEL_GEN(ctx->engine->i915) >= 4) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = 0; *cs++ = i915_ggtt_offset(vma) + offset; @@ -253,32 +243,29 @@ static int gpu_set(struct drm_i915_gem_object *obj, return err; } -static bool always_valid(struct drm_i915_private *i915) +static bool always_valid(struct context *ctx) { return true; } -static bool needs_fence_registers(struct drm_i915_private *i915) +static bool needs_fence_registers(struct context *ctx) { - return !intel_gt_is_wedged(&i915->gt); + return !intel_gt_is_wedged(ctx->engine->gt); } -static bool needs_mi_store_dword(struct drm_i915_private *i915) +static bool needs_mi_store_dword(struct context *ctx) { - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(ctx->engine->gt)) return false; - if (!HAS_ENGINE(i915, RCS0)) - return false; - - return intel_engine_can_store_dword(i915->engine[RCS0]); + return intel_engine_can_store_dword(ctx->engine); } static const struct igt_coherency_mode { const char *name; - int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); - int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); - bool (*valid)(struct drm_i915_private *i915); + int (*set)(struct context *ctx, unsigned long offset, u32 v); + int (*get)(struct context *ctx, unsigned long offset, u32 *v); + bool (*valid)(struct context *ctx); } igt_coherency_mode[] = { { "cpu", cpu_set, cpu_get, always_valid }, { "gtt", gtt_set, gtt_get, needs_fence_registers }, @@ -287,18 +274,37 @@ static const struct igt_coherency_mode { { }, }; +static struct intel_engine_cs * +random_engine(struct drm_i915_private *i915, struct rnd_state *prng) +{ + struct intel_engine_cs *engine; + unsigned int count; + + count = 0; + for_each_uabi_engine(engine, i915) + count++; + + count = i915_prandom_u32_max_state(count, prng); + for_each_uabi_engine(engine, i915) + if (count-- == 0) + return engine; + + return NULL; +} + static int igt_gem_coherency(void *arg) { const unsigned int ncachelines = PAGE_SIZE/64; - I915_RND_STATE(prng); struct drm_i915_private *i915 = arg; const struct igt_coherency_mode *read, *write, *over; - struct drm_i915_gem_object *obj; unsigned long count, n; u32 *offsets, *values; + I915_RND_STATE(prng); + struct context ctx; int err = 0; - /* We repeatedly write, overwrite and read from a sequence of + /* + * We repeatedly write, overwrite and read from a sequence of * cachelines in order to try and detect incoherency (unflushed writes * from either the CPU or GPU). Each setter/getter uses our cache * domain API which should prevent incoherency. @@ -312,31 +318,35 @@ static int igt_gem_coherency(void *arg) values = offsets + ncachelines; + ctx.engine = random_engine(i915, &prng); + GEM_BUG_ON(!ctx.engine); + pr_info("%s: using %s\n", __func__, ctx.engine->name); + for (over = igt_coherency_mode; over->name; over++) { if (!over->set) continue; - if (!over->valid(i915)) + if (!over->valid(&ctx)) continue; for (write = igt_coherency_mode; write->name; write++) { if (!write->set) continue; - if (!write->valid(i915)) + if (!write->valid(&ctx)) continue; for (read = igt_coherency_mode; read->name; read++) { if (!read->get) continue; - if (!read->valid(i915)) + if (!read->valid(&ctx)) continue; for_each_prime_number_from(count, 1, ncachelines) { - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); + ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(ctx.obj)) { + err = PTR_ERR(ctx.obj); goto free; } @@ -345,7 +355,7 @@ static int igt_gem_coherency(void *arg) values[n] = prandom_u32_state(&prng); for (n = 0; n < count; n++) { - err = over->set(obj, offsets[n], ~values[n]); + err = over->set(&ctx, offsets[n], ~values[n]); if (err) { pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", n, count, over->name, err); @@ -354,7 +364,7 @@ static int igt_gem_coherency(void *arg) } for (n = 0; n < count; n++) { - err = write->set(obj, offsets[n], values[n]); + err = write->set(&ctx, offsets[n], values[n]); if (err) { pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", n, count, write->name, err); @@ -365,7 +375,7 @@ static int igt_gem_coherency(void *arg) for (n = 0; n < count; n++) { u32 found; - err = read->get(obj, offsets[n], &found); + err = read->get(&ctx, offsets[n], &found); if (err) { pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", n, count, read->name, err); @@ -383,7 +393,7 @@ static int igt_gem_coherency(void *arg) } } - i915_gem_object_put(obj); + i915_gem_object_put(ctx.obj); } } } @@ -393,7 +403,7 @@ free: return err; put_object: - i915_gem_object_put(obj); + i915_gem_object_put(ctx.obj); goto free; } From 5a3e2b82af47ce43b7ea975c86d36b6f1226025f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 27 Oct 2019 17:55:05 +0000 Subject: [PATCH 095/154] drm/i915/gt: Tidy up rps irq handler to use intel_gt Since the rps is tied to its intel_gt, use that backpointer to find the right engine rather than delving into i915. Signed-off-by: Chris Wilson Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20191027175505.25470-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_rps.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 032a0c6389f9..f6de19456c54 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1555,11 +1555,9 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) { - struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_gt *gt = rps_to_gt(rps); if (pm_iir & rps->pm_events) { - struct intel_gt *gt = rps_to_gt(rps); - spin_lock(>->irq_lock); gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events); rps->pm_iir |= pm_iir & rps->pm_events; @@ -1567,11 +1565,11 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) spin_unlock(>->irq_lock); } - if (INTEL_GEN(i915) >= 8) + if (INTEL_GEN(gt->i915) >= 8) return; if (pm_iir & PM_VEBOX_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(i915->engine[VECS0]); + intel_engine_breadcrumbs_irq(gt->engine[VECS0]); if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); From 6804da20bb549e3dbe5033b3c7837c23260add5b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 27 Oct 2019 22:58:08 +0000 Subject: [PATCH 096/154] drm/i915/selftests: Select a random engine for testing memory regions Use any blitter engine at random for prefilling the memory region. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191027225808.19437-5-chris@chris-wilson.co.uk --- .../drm/i915/selftests/intel_memory_region.c | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 8bc6fadd14fb..19e1cca8f143 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -17,6 +17,7 @@ #include "gem/i915_gem_object_blt.h" #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +#include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "selftests/igt_flush_test.h" #include "selftests/i915_random.h" @@ -442,6 +443,25 @@ out_file: return err; } +static struct intel_engine_cs * +random_engine_class(struct drm_i915_private *i915, + unsigned int class, + struct rnd_state *prng) +{ + struct intel_engine_cs *engine; + unsigned int count; + + count = 0; + for (engine = intel_engine_lookup_user(i915, class, 0); + engine && engine->uabi_class == class; + engine = rb_entry_safe(rb_next(&engine->uabi_node), + typeof(*engine), uabi_node)) + count++; + + count = i915_prandom_u32_max_state(count, prng); + return intel_engine_lookup_user(i915, class, count); +} + static int igt_lmem_write_cpu(void *arg) { struct drm_i915_private *i915 = arg; @@ -458,6 +478,7 @@ static int igt_lmem_write_cpu(void *arg) PAGE_SIZE - sizeof(u64), PAGE_SIZE - 64, }; + struct intel_engine_cs *engine; u32 *vaddr; u32 sz; u32 i; @@ -465,9 +486,12 @@ static int igt_lmem_write_cpu(void *arg) int count; int err; - if (!HAS_ENGINE(i915, BCS0)) + engine = random_engine_class(i915, I915_ENGINE_CLASS_COPY, &prng); + if (!engine) return 0; + pr_info("%s: using %s\n", __func__, engine->name); + sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); sz = max_t(u32, 2 * PAGE_SIZE, sz); @@ -482,8 +506,7 @@ static int igt_lmem_write_cpu(void *arg) } /* Put the pages into a known state -- from the gpu for added fun */ - err = i915_gem_object_fill_blt(obj, i915->engine[BCS0]->kernel_context, - 0xdeadbeaf); + err = i915_gem_object_fill_blt(obj, engine->kernel_context, 0xdeadbeaf); if (err) goto out_unpin; From 746078b33492a7429ff2a0d8f0474b27005152d7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 27 Oct 2019 22:58:05 +0000 Subject: [PATCH 097/154] drm/i915/selftests: Exercise adjusting rpcs over all render-class engines Iterate over all user-accessible render engines when checking whether they can be adjusted for sseu. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191027225808.19437-2-chris@chris-wilson.co.uk --- .../drm/i915/gem/selftests/i915_gem_context.c | 127 +++++++++--------- 1 file changed, 61 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 2c65c9e95cc5..d1d873f23338 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1186,93 +1186,90 @@ __igt_ctx_sseu(struct drm_i915_private *i915, const char *name, unsigned int flags) { - struct intel_engine_cs *engine = i915->engine[RCS0]; struct drm_i915_gem_object *obj; - struct i915_gem_context *ctx; - struct intel_context *ce; - struct intel_sseu pg_sseu; - struct drm_file *file; + int inst = 0; int ret; - if (INTEL_GEN(i915) < 9 || !engine) + if (INTEL_GEN(i915) < 9 || !RUNTIME_INFO(i915)->sseu.has_slice_pg) return 0; - if (!RUNTIME_INFO(i915)->sseu.has_slice_pg) - return 0; - - if (hweight32(engine->sseu.slice_mask) < 2) - return 0; - - /* - * Gen11 VME friendly power-gated configuration with half enabled - * sub-slices. - */ - pg_sseu = engine->sseu; - pg_sseu.slice_mask = 1; - pg_sseu.subslice_mask = - ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); - - pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", - name, flags, hweight32(engine->sseu.slice_mask), - hweight32(pg_sseu.slice_mask)); - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - if (flags & TEST_RESET) igt_global_reset_lock(&i915->gt); - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto out_unlock; - } - i915_gem_context_clear_bannable(ctx); /* to reset and beyond! */ - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto out_unlock; } - ce = i915_gem_context_get_engine(ctx, RCS0); - if (IS_ERR(ce)) { - ret = PTR_ERR(ce); - goto out_put; - } + do { + struct intel_engine_cs *engine; + struct intel_context *ce; + struct intel_sseu pg_sseu; - ret = intel_context_pin(ce); - if (ret) - goto out_context; + engine = intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_RENDER, + inst++); + if (!engine) + break; - /* First set the default mask. */ - ret = __sseu_test(name, flags, ce, obj, engine->sseu); - if (ret) - goto out_fail; + if (hweight32(engine->sseu.slice_mask) < 2) + continue; - /* Then set a power-gated configuration. */ - ret = __sseu_test(name, flags, ce, obj, pg_sseu); - if (ret) - goto out_fail; + /* + * Gen11 VME friendly power-gated configuration with + * half enabled sub-slices. + */ + pg_sseu = engine->sseu; + pg_sseu.slice_mask = 1; + pg_sseu.subslice_mask = + ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); - /* Back to defaults. */ - ret = __sseu_test(name, flags, ce, obj, engine->sseu); - if (ret) - goto out_fail; + pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", + engine->name, name, flags, + hweight32(engine->sseu.slice_mask), + hweight32(pg_sseu.slice_mask)); - /* One last power-gated configuration for the road. */ - ret = __sseu_test(name, flags, ce, obj, pg_sseu); - if (ret) - goto out_fail; + ce = intel_context_create(engine->kernel_context->gem_context, + engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + goto out_put; + } + + ret = intel_context_pin(ce); + if (ret) + goto out_ce; + + /* First set the default mask. */ + ret = __sseu_test(name, flags, ce, obj, engine->sseu); + if (ret) + goto out_unpin; + + /* Then set a power-gated configuration. */ + ret = __sseu_test(name, flags, ce, obj, pg_sseu); + if (ret) + goto out_unpin; + + /* Back to defaults. */ + ret = __sseu_test(name, flags, ce, obj, engine->sseu); + if (ret) + goto out_unpin; + + /* One last power-gated configuration for the road. */ + ret = __sseu_test(name, flags, ce, obj, pg_sseu); + if (ret) + goto out_unpin; + +out_unpin: + intel_context_unpin(ce); +out_ce: + intel_context_put(ce); + } while (!ret); -out_fail: if (igt_flush_test(i915)) ret = -EIO; - intel_context_unpin(ce); -out_context: - intel_context_put(ce); out_put: i915_gem_object_put(obj); @@ -1280,8 +1277,6 @@ out_unlock: if (flags & TEST_RESET) igt_global_reset_unlock(&i915->gt); - mock_file_free(i915, file); - if (ret) pr_err("%s: Failed with %d!\n", name, ret); From e5df52dcf88cb9e900d6409da035a3c419e242cb Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 24 Oct 2019 10:30:23 -0700 Subject: [PATCH 098/154] drm/i915/tgl: Handle AUX interrupts for TC ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're currently only processing AUX interrupts on the combo ports; make sure we handle the TC ports as well. v2: Drop stale comment Fixes: f663769a5eef ("drm/i915/tgl: initialize TC and TBT ports") Cc: José Roberto de Souza Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20191024173023.22113-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 10 ++++++++-- drivers/gpu/drm/i915/i915_reg.h | 6 ++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0a3861c873ee..dae00f7dd7df 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2161,10 +2161,16 @@ static u32 gen8_de_port_aux_mask(struct drm_i915_private *dev_priv) u32 mask; if (INTEL_GEN(dev_priv) >= 12) - /* TODO: Add AUX entries for USBC */ return TGL_DE_PORT_AUX_DDIA | TGL_DE_PORT_AUX_DDIB | - TGL_DE_PORT_AUX_DDIC; + TGL_DE_PORT_AUX_DDIC | + TGL_DE_PORT_AUX_USBC1 | + TGL_DE_PORT_AUX_USBC2 | + TGL_DE_PORT_AUX_USBC3 | + TGL_DE_PORT_AUX_USBC4 | + TGL_DE_PORT_AUX_USBC5 | + TGL_DE_PORT_AUX_USBC6; + mask = GEN8_AUX_CHANNEL_A; if (INTEL_GEN(dev_priv) >= 9) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 60dfad33f9ee..fb33b164ce55 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7439,6 +7439,12 @@ enum { #define GEN8_PORT_DP_A_HOTPLUG (1 << 3) #define BXT_DE_PORT_GMBUS (1 << 1) #define GEN8_AUX_CHANNEL_A (1 << 0) +#define TGL_DE_PORT_AUX_USBC6 (1 << 13) +#define TGL_DE_PORT_AUX_USBC5 (1 << 12) +#define TGL_DE_PORT_AUX_USBC4 (1 << 11) +#define TGL_DE_PORT_AUX_USBC3 (1 << 10) +#define TGL_DE_PORT_AUX_USBC2 (1 << 9) +#define TGL_DE_PORT_AUX_USBC1 (1 << 8) #define TGL_DE_PORT_AUX_DDIC (1 << 2) #define TGL_DE_PORT_AUX_DDIB (1 << 1) #define TGL_DE_PORT_AUX_DDIA (1 << 0) From 13670f4ce9169ddc6793e243635f8400e74bbbf7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Oct 2019 12:18:03 +0000 Subject: [PATCH 099/154] drm/i915/selftests: Check a few more fixed locations within the context image As we use hard coded offsets for a few locations within the context image, include those in the selftests to assert that they are valid. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20191028121803.29408-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 27 +++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index ba7fc4397bd9..9507d750ae14 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -3194,10 +3194,35 @@ static int live_lrc_fixed(void *arg) u32 offset; const char *name; } tbl[] = { + { + i915_mmio_reg_offset(RING_START(engine->mmio_base)), + CTX_RING_BUFFER_START - 1, + "RING_START" + }, + { + i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), + CTX_RING_BUFFER_CONTROL - 1, + "RING_CTL" + }, + { + i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), + CTX_RING_HEAD - 1, + "RING_HEAD" + }, + { + i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), + CTX_RING_TAIL - 1, + "RING_TAIL" + }, { i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), lrc_ring_mi_mode(engine), - "RING_MI_MODE", + "RING_MI_MODE" + }, + { + engine->mmio_base + 0x110, + CTX_BB_STATE - 1, + "BB_STATE" }, { }, }, *t; From a7f328fc789817a6a0e5c46411956810d5ee00ca Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Oct 2019 12:41:25 +0000 Subject: [PATCH 100/154] drm/i915/execlists: Simply walk back along request timeline on reset The request's timeline will only contain requests from this context, in order of execution. Therefore, we can simply look back along this timeline to find the currently executing request. If we do find that the current context has completed its last request, that does not imply that all requests are completed in the context, so only advance the ring->head up to the end of the known completions! Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20191028124125.25176-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 34 ++++++++++++----------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 16340740139d..c153d83511b7 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -250,24 +250,16 @@ static void mark_eio(struct i915_request *rq) i915_request_mark_complete(rq); } -static struct i915_request *active_request(struct i915_request *rq) +static struct i915_request * +active_request(const struct intel_timeline * const tl, struct i915_request *rq) { - const struct intel_context * const ce = rq->hw_context; - struct i915_request *active = NULL; - struct list_head *list; - - if (!i915_request_is_active(rq)) /* unwound, but incomplete! */ - return rq; + struct i915_request *active = rq; rcu_read_lock(); - list = &rcu_dereference(rq->timeline)->requests; - list_for_each_entry_from_reverse(rq, list, link) { + list_for_each_entry_continue_reverse(rq, &tl->requests, link) { if (i915_request_completed(rq)) break; - if (rq->hw_context != ce) - break; - active = rq; } rcu_read_unlock(); @@ -1073,6 +1065,7 @@ static void reset_active(struct i915_request *rq, struct intel_engine_cs *engine) { struct intel_context * const ce = rq->hw_context; + u32 head; /* * The executing context has been cancelled. We want to prevent @@ -1093,11 +1086,11 @@ static void reset_active(struct i915_request *rq, __func__, engine->name, rq->fence.context, rq->fence.seqno); /* On resubmission of the active request, payload will be scrubbed */ - rq = active_request(rq); - if (rq) - ce->ring->head = intel_ring_wrap(ce->ring, rq->head); + if (i915_request_completed(rq)) + head = rq->tail; else - ce->ring->head = ce->ring->tail; + head = active_request(ce->timeline, rq)->head; + ce->ring->head = intel_ring_wrap(ce->ring, head); intel_ring_update_space(ce->ring); /* Scrub the context image to prevent replaying the previous batch */ @@ -2990,16 +2983,17 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) ce = rq->hw_context; GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); - rq = active_request(rq); - if (!rq) { + if (i915_request_completed(rq)) { /* Idle context; tidy up the ring so we can restart afresh */ - ce->ring->head = ce->ring->tail; + ce->ring->head = intel_ring_wrap(ce->ring, rq->tail); goto out_replay; } /* Context has requests still in-flight; it should not be idle! */ GEM_BUG_ON(i915_active_is_idle(&ce->active)); + rq = active_request(ce->timeline, rq); ce->ring->head = intel_ring_wrap(ce->ring, rq->head); + GEM_BUG_ON(ce->ring->head == ce->ring->tail); /* * If this request hasn't started yet, e.g. it is waiting on a @@ -3043,7 +3037,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) restore_default_state(ce, engine); out_replay: - GEM_TRACE("%s replay {head:%04x, tail:%04x\n", + GEM_TRACE("%s replay {head:%04x, tail:%04x}\n", engine->name, ce->ring->head, ce->ring->tail); intel_ring_update_space(ce->ring); __execlists_reset_reg_state(ce, engine); From 370831fcb128f9106e75f13b340bb241fc886242 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Oct 2019 14:59:43 +0100 Subject: [PATCH 101/154] drm/i915/selftests: Initialise err in case there are no engines! drivers/gpu/drm/i915//gt/selftest_engine_heartbeat.c:255 live_heartbeat_fast() error: uninitialized symbol 'err'. drivers/gpu/drm/i915//gt/selftest_engine_heartbeat.c:320 live_heartbeat_off() error: uninitialized symbol 'err'. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191025135943.12524-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 768f032e6578..155c508024df 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -240,7 +240,7 @@ static int live_heartbeat_fast(void *arg) struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; - int err; + int err = 0; /* Check that the heartbeat ticks at the desired rate. */ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) @@ -302,7 +302,7 @@ static int live_heartbeat_off(void *arg) struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; - int err; + int err = 0; /* Check that we can turn off heartbeat and not interrupt VIP */ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) From e7f536000c4c293f4abae3adc25c7442386c5ab2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Oct 2019 14:26:51 +0000 Subject: [PATCH 102/154] drm/i915/selftests: Initialise ret Keep smatch quiet, drivers/gpu/drm/i915//gem/selftests/i915_gem_context.c:1268 __igt_ctx_sseu() error: uninitialized symbol 'ret'. drivers/gpu/drm/i915//gem/selftests/i915_gem_context.c:1280 __igt_ctx_sseu() error: uninitialized symbol 'ret'. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191028142652.1987-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index d1d873f23338..c6e61564bb5e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1188,7 +1188,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, { struct drm_i915_gem_object *obj; int inst = 0; - int ret; + int ret = 0; if (INTEL_GEN(i915) < 9 || !RUNTIME_INFO(i915)->sseu.has_slice_pg) return 0; From f9d9fece29b8b5cd4943eccd7bd73b6241d424fa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Oct 2019 14:26:52 +0000 Subject: [PATCH 103/154] drm/i915/display: Mark conn as initialised by iterator smatch complains about drivers/gpu/drm/i915//display/intel_display.c:14403 intel_set_dp_tp_ctl_normal() error: uninitialized symbol 'conn'. because it has no way to determine that the loop must have an entry. Tell the static analysers to ignore the local, it will always be set. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191028142652.1987-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 0f0c582a56d5..9dce2e9e5376 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14391,8 +14391,8 @@ static void intel_crtc_enable_trans_port_sync(struct intel_crtc *crtc, static void intel_set_dp_tp_ctl_normal(struct intel_crtc *crtc, struct intel_atomic_state *state) { + struct drm_connector *uninitialized_var(conn); struct drm_connector_state *conn_state; - struct drm_connector *conn; struct intel_dp *intel_dp; int i; From 19c17b763f0598baa72210dd3e5235ca243f0b6c Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 28 Oct 2019 16:45:20 +0000 Subject: [PATCH 104/154] drm/i915/execlists: Use vfunc to check engine submission mode While processing CSB there is no need to look at GuC submission settings, just check if engine is configured for execlists mode. While today GuC submission is disabled it's settings are still based on modparam values that might not correctly reflect actual submission status in case of any fallback. Until that is fully fixed, use alternate method to confirm that engine really runs in execlists mode by comparing set_default_submission vfunc. v2: add other immediate use of new helper Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Janusz Krzysztofik Reviewed-by: Janusz Krzysztofik Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191028164520.31772-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 9 ++++++++- drivers/gpu/drm/i915/gt/intel_lrc.h | 3 +++ drivers/gpu/drm/i915/i915_perf.c | 10 +++++----- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index c153d83511b7..2f474c1f5c54 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2015,7 +2015,7 @@ static void process_csb(struct intel_engine_cs *engine) */ GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && !reset_in_progress(execlists)); - GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915)); + GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine)); /* * Note that csb_write, csb_status may be either in HWSP or mmio. @@ -4705,6 +4705,13 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, __execlists_update_reg_state(ce, engine); } +bool +intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine) +{ + return engine->set_default_submission == + intel_execlists_set_default_submission; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_lrc.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 99dc576a4e25..7860787cb856 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -145,4 +145,7 @@ struct intel_engine_cs * intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, unsigned int sibling); +bool +intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 38d3de2dfaa6..a807b6f0dfa3 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1261,7 +1261,11 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) case 8: case 9: case 10: - if (USES_GUC_SUBMISSION(ce->engine->i915)) { + if (intel_engine_in_execlists_submission_mode(ce->engine)) { + stream->specific_ctx_id_mask = + (1U << GEN8_CTX_ID_WIDTH) - 1; + stream->specific_ctx_id = stream->specific_ctx_id_mask; + } else { /* * When using GuC, the context descriptor we write in * i915 is read by GuC and rewritten before it's @@ -1281,10 +1285,6 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) */ stream->specific_ctx_id_mask = (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1; - } else { - stream->specific_ctx_id_mask = - (1U << GEN8_CTX_ID_WIDTH) - 1; - stream->specific_ctx_id = stream->specific_ctx_id_mask; } break; From 953d57eba5192aab638f2d102e0ee7af0c7b970c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Oct 2019 20:30:12 +0000 Subject: [PATCH 105/154] drm/i915/gem: Limit the blitter sizes to ensure low preemption latency Currently we insert a arbitration point every 128MiB during a blitter copy. At 8GiB/s, this is around 30ms. This is a little on the large side if we need to inject a high priority work, so reduced it down to 8MiB or roughly 1ms. v2: Don't forget both fill/copy. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191028203012.14566-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_object_blt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index 516e61e99212..51acffd31575 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -17,7 +17,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, u32 value) { struct drm_i915_private *i915 = ce->vm->i915; - const u32 block_size = S16_MAX * PAGE_SIZE; + const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ struct intel_engine_pool_node *pool; struct i915_vma *batch; u64 offset; @@ -201,7 +201,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, struct i915_vma *dst) { struct drm_i915_private *i915 = ce->vm->i915; - const u32 block_size = S16_MAX * PAGE_SIZE; + const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ struct intel_engine_pool_node *pool; struct i915_vma *batch; u64 src_offset, dst_offset; From 3df2c830bfc4d7d5d1682d382dfd853e246fc884 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 28 Oct 2019 22:03:25 +0000 Subject: [PATCH 106/154] drm/i915/blt: fixup block_size rounding There is nothing to say that the obj->base.size is actually a multiple of the block_size. v2: Use round_up() as block_size is a power-of-two Reported-by: Chris Wilson Signed-off-by: Matthew Auld Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191028220325.9325-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_object_blt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index 51acffd31575..70809d8897cd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -30,7 +30,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); intel_engine_pm_get(ce->engine); - count = div_u64(vma->size, block_size); + count = div_u64(round_up(vma->size, block_size), block_size); size = (1 + 8 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); pool = intel_engine_get_pool(ce->engine, size); @@ -214,7 +214,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); intel_engine_pm_get(ce->engine); - count = div_u64(dst->size, block_size); + count = div_u64(round_up(dst->size, block_size), block_size); size = (1 + 11 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); pool = intel_engine_get_pool(ce->engine, size); From 773ed805b5ee3c88a889622fac48ce8e3640bcd9 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 29 Oct 2019 09:58:50 +0000 Subject: [PATCH 107/154] drm/i915: define i915_ggtt_has_aperture The following patches in the series will use it to avoid certain operations when the mappable aperture is not available in HW. Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191029095856.25431-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f074f1de66e8..402283ce2864 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -575,6 +575,11 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt); int i915_init_ggtt(struct drm_i915_private *dev_priv); void i915_ggtt_driver_release(struct drm_i915_private *dev_priv); +static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt) +{ + return ggtt->mappable_end > 0; +} + int i915_ppgtt_init_hw(struct intel_gt *gt); struct i915_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv); From 54b512cd7a6d5a14938e6a28335ce33942fdb2bb Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 29 Oct 2019 09:58:51 +0000 Subject: [PATCH 108/154] drm/i915: do not map aperture if it is not available. Skip both setup and cleanup of the aperture mapping if the HW doesn't have an aperture bar. Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191029095856.25431-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 35 +++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index eebc7fee81e2..43fad44aa674 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2770,7 +2770,9 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) i915_address_space_fini(&ggtt->vm); arch_phys_wc_del(ggtt->mtrr); - io_mapping_fini(&ggtt->iomap); + + if (ggtt->iomap.size) + io_mapping_fini(&ggtt->iomap); } /** @@ -2988,6 +2990,12 @@ static void setup_private_pat(struct intel_uncore *uncore) bdw_setup_private_ppat(uncore); } +static struct resource pci_resource(struct pci_dev *pdev, int bar) +{ + return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar), + pci_resource_len(pdev, bar)); +} + static int gen8_gmch_probe(struct i915_ggtt *ggtt) { struct drm_i915_private *dev_priv = ggtt->vm.i915; @@ -2997,10 +3005,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) int err; /* TODO: We're not aware of mappable constraints on gen8 yet */ - ggtt->gmadr = - (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2), - pci_resource_len(pdev, 2)); - ggtt->mappable_end = resource_size(&ggtt->gmadr); + if (!IS_DGFX(dev_priv)) { + ggtt->gmadr = pci_resource(pdev, 2); + ggtt->mappable_end = resource_size(&ggtt->gmadr); + } err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); if (!err) @@ -3223,14 +3231,17 @@ static int ggtt_init_hw(struct i915_ggtt *ggtt) if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; - if (!io_mapping_init_wc(&ggtt->iomap, - ggtt->gmadr.start, - ggtt->mappable_end)) { - ggtt->vm.cleanup(&ggtt->vm); - return -EIO; - } + if (ggtt->mappable_end) { + if (!io_mapping_init_wc(&ggtt->iomap, + ggtt->gmadr.start, + ggtt->mappable_end)) { + ggtt->vm.cleanup(&ggtt->vm); + return -EIO; + } - ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end); + ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, + ggtt->mappable_end); + } i915_ggtt_init_fences(ggtt); From cd20c70bb05e31a57bc5ccd10f8f188bb3508de3 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 29 Oct 2019 09:58:52 +0000 Subject: [PATCH 109/154] drm/i915: set num_fence_regs to 0 if there is no aperture We can't fence anything without aperture. Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Stuart Summers Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191029095856.25431-3-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 321189e1b0f2..71efccfde122 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -846,8 +846,10 @@ void i915_ggtt_init_fences(struct i915_ggtt *ggtt) detect_bit_6_swizzle(ggtt); - if (INTEL_GEN(i915) >= 7 && - !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))) + if (!i915_ggtt_has_aperture(ggtt)) + num_fences = 0; + else if (INTEL_GEN(i915) >= 7 && + !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))) num_fences = 32; else if (INTEL_GEN(i915) >= 4 || IS_I945G(i915) || IS_I945GM(i915) || From 895d8ebeaa92435e4214a026fa22763645564fab Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 29 Oct 2019 09:58:53 +0000 Subject: [PATCH 110/154] drm/i915: error capture with no ggtt slot If the aperture is not available in HW we can't use a ggtt slot and wc copy, so fall back to regular kmap. Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Abdiel Janulgue Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191029095856.25431-4-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 19 ++++---- drivers/gpu/drm/i915/i915_gpu_error.c | 66 ++++++++++++++++++++++----- 2 files changed, 65 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 43fad44aa674..88179202c556 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2661,7 +2661,8 @@ static void ggtt_release_guc_top(struct i915_ggtt *ggtt) static void cleanup_init_ggtt(struct i915_ggtt *ggtt) { ggtt_release_guc_top(ggtt); - drm_mm_remove_node(&ggtt->error_capture); + if (drm_mm_node_allocated(&ggtt->error_capture)) + drm_mm_remove_node(&ggtt->error_capture); } static int init_ggtt(struct i915_ggtt *ggtt) @@ -2692,13 +2693,15 @@ static int init_ggtt(struct i915_ggtt *ggtt) if (ret) return ret; - /* Reserve a mappable slot for our lockless error capture */ - ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture, - PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); - if (ret) - return ret; + if (ggtt->mappable_end) { + /* Reserve a mappable slot for our lockless error capture */ + ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture, + PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + if (ret) + return ret; + } /* * The upper portion of the GuC address space has a sizeable hole diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9bcdcebd2948..7672d4c4552f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -40,6 +40,7 @@ #include "display/intel_overlay.h" #include "gem/i915_gem_context.h" +#include "gem/i915_gem_lmem.h" #include "i915_drv.h" #include "i915_gpu_error.h" @@ -235,6 +236,7 @@ struct compress { struct pagevec pool; struct z_stream_s zstream; void *tmp; + bool wc; }; static bool compress_init(struct compress *c) @@ -292,7 +294,7 @@ static int compress_page(struct compress *c, struct z_stream_s *zstream = &c->zstream; zstream->next_in = src; - if (c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) + if (c->wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) zstream->next_in = c->tmp; zstream->avail_in = PAGE_SIZE; @@ -367,6 +369,7 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m) struct compress { struct pagevec pool; + bool wc; }; static bool compress_init(struct compress *c) @@ -389,7 +392,7 @@ static int compress_page(struct compress *c, if (!ptr) return -ENOMEM; - if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE)) + if (!(c->wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE))) memcpy(ptr, src, PAGE_SIZE); dst->pages[dst->page_count++] = ptr; @@ -966,7 +969,6 @@ i915_error_object_create(struct drm_i915_private *i915, struct drm_i915_error_object *dst; unsigned long num_pages; struct sgt_iter iter; - dma_addr_t dma; int ret; might_sleep(); @@ -992,17 +994,54 @@ i915_error_object_create(struct drm_i915_private *i915, dst->page_count = 0; dst->unused = 0; + compress->wc = i915_gem_object_is_lmem(vma->obj) || + drm_mm_node_allocated(&ggtt->error_capture); + ret = -EINVAL; - for_each_sgt_daddr(dma, iter, vma->pages) { + if (drm_mm_node_allocated(&ggtt->error_capture)) { void __iomem *s; + dma_addr_t dma; - ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); + for_each_sgt_daddr(dma, iter, vma->pages) { + ggtt->vm.insert_page(&ggtt->vm, dma, slot, + I915_CACHE_NONE, 0); - s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE); - ret = compress_page(compress, (void __force *)s, dst); - io_mapping_unmap(s); - if (ret) - break; + s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE); + ret = compress_page(compress, (void __force *)s, dst); + io_mapping_unmap(s); + if (ret) + break; + } + } else if (i915_gem_object_is_lmem(vma->obj)) { + struct intel_memory_region *mem = vma->obj->mm.region; + dma_addr_t dma; + + for_each_sgt_daddr(dma, iter, vma->pages) { + void __iomem *s; + + s = io_mapping_map_atomic_wc(&mem->iomap, dma); + ret = compress_page(compress, (void __force *)s, dst); + io_mapping_unmap_atomic(s); + if (ret) + break; + } + } else { + struct page *page; + + for_each_sgt_page(page, iter, vma->pages) { + void *s; + + drm_clflush_pages(&page, 1); + + s = kmap_atomic(page); + ret = compress_page(compress, s, dst); + kunmap_atomic(s); + + drm_clflush_pages(&page, 1); + + if (ret) + break; + } } if (ret || compress_flush(compress, dst)) { @@ -1657,9 +1696,12 @@ static void capture_params(struct i915_gpu_state *error) static void capture_finish(struct i915_gpu_state *error) { struct i915_ggtt *ggtt = &error->i915->ggtt; - const u64 slot = ggtt->error_capture.start; - ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); + if (drm_mm_node_allocated(&ggtt->error_capture)) { + const u64 slot = ggtt->error_capture.start; + + ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); + } } #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) From 4dc0a7cae212a3a9fb292e19b91f06012fc70d65 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 29 Oct 2019 09:58:54 +0000 Subject: [PATCH 111/154] drm/i915: Don't try to place HWS in non-existing mappable region HWS placement restrictions can't just rely on HAS_LLC flag. Signed-off-by: Michal Wajdeczko Signed-off-by: Matthew Auld Cc: Daniele Ceraolo Spurio Acked-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191029095856.25431-5-matthew.auld@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 9cc1ea6519ec..355523114c71 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -528,7 +528,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, unsigned int flags; flags = PIN_GLOBAL; - if (!HAS_LLC(engine->i915)) + if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt)) /* * On g33, we cannot place HWS above 256MiB, so * restrict its pinning to the low mappable arena. From 34a6baa2df9db77dc65979e9c334a3097f6b7d9f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 29 Oct 2019 09:58:55 +0000 Subject: [PATCH 112/154] drm/i915: don't allocate the ring in stolen if we lack aperture Since we have no way access it from the CPU. For such cases just fallback to internal objects. Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191029095856.25431-6-matthew.auld@intel.com --- drivers/gpu/drm/i915/gt/intel_ring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index fa01c1407760..ece20504d240 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -108,7 +108,9 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) struct drm_i915_gem_object *obj; struct i915_vma *vma; - obj = i915_gem_object_create_stolen(i915, size); + obj = ERR_PTR(-ENODEV); + if (i915_ggtt_has_aperture(ggtt)) + obj = i915_gem_object_create_stolen(i915, size); if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) From e60f7bb7ea68c46b006aa8f3d562933e7b1d4c9e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 29 Oct 2019 09:58:56 +0000 Subject: [PATCH 113/154] drm/i915/selftests: check for missing aperture We may be missing support for the mappable aperture on some platforms. Signed-off-by: Matthew Auld Cc: Daniele Ceraolo Spurio Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191029095856.25431-7-matthew.auld@intel.com --- .../drm/i915/gem/selftests/i915_gem_coherency.c | 7 ++++++- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 6 ++++++ drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 14 ++++++++++---- drivers/gpu/drm/i915/selftests/i915_gem.c | 4 ++++ drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 3 +++ 5 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 0a195e5b98e6..2b29f6b4e1dd 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -250,7 +250,12 @@ static bool always_valid(struct context *ctx) static bool needs_fence_registers(struct context *ctx) { - return !intel_gt_is_wedged(ctx->engine->gt); + struct intel_gt *gt = ctx->engine->gt; + + if (intel_gt_is_wedged(gt)) + return false; + + return gt->ggtt->num_fences; } static bool needs_mi_store_dword(struct context *ctx) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index d45a93928ff5..29b2077b73d2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -301,6 +301,9 @@ static int igt_partial_tiling(void *arg) int tiling; int err; + if (!i915_ggtt_has_aperture(&i915->ggtt)) + return 0; + /* We want to check the page mapping and fencing of a large object * mmapped through the GTT. The object we create is larger than can * possibly be mmaped as a whole, and so we must use partial GGTT vma. @@ -431,6 +434,9 @@ static int igt_smoke_tiling(void *arg) IGT_TIMEOUT(end); int err; + if (!i915_ggtt_has_aperture(&i915->ggtt)) + return 0; + /* * igt_partial_tiling() does an exhastive check of partial tiling * chunking, but will undoubtably run out of time. Here, we do a diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index b7207b488391..8abc0a1d692b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1149,8 +1149,12 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, struct i915_request *rq; struct evict_vma arg; struct hang h; + unsigned int pin_flags; int err; + if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE) + return 0; + if (!engine || !intel_engine_can_store_dword(engine)) return 0; @@ -1186,10 +1190,12 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, goto out_obj; } - err = i915_vma_pin(arg.vma, 0, 0, - i915_vma_is_ggtt(arg.vma) ? - PIN_GLOBAL | PIN_MAPPABLE : - PIN_USER); + pin_flags = i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER; + + if (flags & EXEC_OBJECT_NEEDS_FENCE) + pin_flags |= PIN_MAPPABLE; + + err = i915_vma_pin(arg.vma, 0, 0, pin_flags); if (err) { i915_request_add(rq); goto out_obj; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 6d22567ad620..e378543ed453 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -45,6 +45,10 @@ static void trash_stolen(struct drm_i915_private *i915) unsigned long page; u32 prng = 0x12345678; + /* XXX: fsck. needs some more thought... */ + if (!i915_ggtt_has_aperture(ggtt)) + return; + for (page = 0; page < size; page += PAGE_SIZE) { const dma_addr_t dma = i915->dsm.start + page; u32 __iomem *s; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index a1cb072e4a1b..3f7e80fb3bbd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1149,6 +1149,9 @@ static int igt_ggtt_page(void *arg) unsigned int *order, n; int err; + if (!i915_ggtt_has_aperture(ggtt)) + return 0; + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); From fc21523041107a4b2d47b05e1ad2a360659dbc4f Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Fri, 25 Oct 2019 12:37:45 -0700 Subject: [PATCH 114/154] drm/i915/perf: Add helper macros for comparing with whitelisted registers Add helper macros for range and equality comparisons and use them to check with whitelisted registers in oa configurations. Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Lionel Landwerlin Signed-off-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20191025193746.47155-1-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 54 +++++++++++++++++--------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a807b6f0dfa3..83327841a927 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3515,56 +3515,58 @@ static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr) return false; } +#define ADDR_IN_RANGE(addr, start, end) \ + ((addr) >= (start) && \ + (addr) <= (end)) + +#define REG_IN_RANGE(addr, start, end) \ + ((addr) >= i915_mmio_reg_offset(start) && \ + (addr) <= i915_mmio_reg_offset(end)) + +#define REG_EQUAL(addr, mmio) \ + ((addr) == i915_mmio_reg_offset(mmio)) + static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) { - return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && - addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || - (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) && - addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) || - (addr >= i915_mmio_reg_offset(OACEC0_0) && - addr <= i915_mmio_reg_offset(OACEC7_1)); + return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8) || + REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8) || + REG_IN_RANGE(addr, OACEC0_0, OACEC7_1); } static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || - (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && - addr <= i915_mmio_reg_offset(NOA_WRITE)) || - (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) && - addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) || - (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) && - addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); + return REG_EQUAL(addr, HALF_SLICE_CHICKEN2) || + REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE) || + REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI) || + REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI); } static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen7_is_valid_mux_addr(perf, addr) || - addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || - (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && - addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); + REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || + REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8)); } static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen8_is_valid_mux_addr(perf, addr) || - addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) || - (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && - addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); + REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || + REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI); } static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen7_is_valid_mux_addr(perf, addr) || - (addr >= 0x25100 && addr <= 0x2FF90) || - (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && - addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || - addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); + ADDR_IN_RANGE(addr, 0x25100, 0x2FF90) || + REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9) || + REG_EQUAL(addr, HSW_MBVID2_MISR0); } static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen7_is_valid_mux_addr(perf, addr) || - (addr >= 0x182300 && addr <= 0x1823A4); + ADDR_IN_RANGE(addr, 0x182300, 0x1823A4); } static u32 mask_reg_value(u32 reg, u32 val) @@ -3573,14 +3575,14 @@ static u32 mask_reg_value(u32 reg, u32 val) * WaDisableSTUnitPowerOptimization workaround. Make sure the value * programmed by userspace doesn't change this. */ - if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg) + if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2)) val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function * indicated by its name and a bunch of selection fields used by OA * configs. */ - if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg) + if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT)) val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); return val; From 00a7f0d7155c28ab18600bcf3f62d7cade2a870d Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 25 Oct 2019 12:37:46 -0700 Subject: [PATCH 115/154] drm/i915/tgl: Add perf support on TGL The design of the OA unit has been split into several units. We now have a global unit (OAG) and a render specific unit (OAR). This leads to some changes on how we program things. Some details : OAR: - has its own set of counter registers, they are per-context saved/restored - counters are not written to the circular OA buffer - a snapshot of the counters can be acquired with MI_RECORD_PERF_COUNT, or a single counter can be read with MI_STORE_REGISTER_MEM. OAG: - has global counters that increment across context switches - counters are written into the circular OA buffer (if requested) v2: Fix checkpatch warnings on code style (Lucas) v3: (Umesh) - Update register from which tail, status and head are read - Update logic to sample context reports - Update whitelist mux and b counter regs v4: Fix a bug when updating context image for new contexts (Umesh) v5: Squash patch enabling save/restore of counters into context image We want this so we can preempt performance queries and keep the system responsive even when long running queries are ongoing. We avoid doing it for all contexts. - use LRI to modify context control (Chris) - use MASKED_FIELD to program just the masked bits (Chris) - disable save/restore of counters on cleanup (Chris) v6: Do not use implicit parameters (Chris) BSpec: 28727, 30021 Signed-off-by: Lionel Landwerlin Signed-off-by: Umesh Nerlige Ramappa Signed-off-by: Lucas De Marchi Acked-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191025193746.47155-2-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/gt/intel_lrc.h | 1 + drivers/gpu/drm/i915/i915_perf.c | 351 +++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 103 ++++++++ drivers/gpu/drm/i915/oa/i915_oa_tgl.c | 121 +++++++++ drivers/gpu/drm/i915/oa/i915_oa_tgl.h | 16 ++ 6 files changed, 560 insertions(+), 35 deletions(-) create mode 100644 drivers/gpu/drm/i915/oa/i915_oa_tgl.c create mode 100644 drivers/gpu/drm/i915/oa/i915_oa_tgl.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 526f881325c8..90dcf09f52cc 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -247,7 +247,8 @@ i915-y += \ oa/i915_oa_cflgt2.o \ oa/i915_oa_cflgt3.o \ oa/i915_oa_cnl.o \ - oa/i915_oa_icl.o + oa/i915_oa_icl.o \ + oa/i915_oa_tgl.o i915-y += i915_perf.o # Post-mortem debug and GPU hang state capture diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 7860787cb856..faa2d56c279b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -43,6 +43,7 @@ struct intel_engine_cs; #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 83327841a927..a8c2318d3d5e 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -218,6 +218,7 @@ #include "oa/i915_oa_cflgt3.h" #include "oa/i915_oa_cnl.h" #include "oa/i915_oa_icl.h" +#include "oa/i915_oa_tgl.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -294,6 +295,7 @@ static u32 i915_perf_stream_paranoid = true; /* On Gen8+ automatically triggered OA reports include a 'reason' field... */ #define OAREPORT_REASON_MASK 0x3f +#define OAREPORT_REASON_MASK_EXTENDED 0x7f #define OAREPORT_REASON_SHIFT 19 #define OAREPORT_REASON_TIMER (1<<0) #define OAREPORT_REASON_CTX_SWITCH (1<<3) @@ -339,6 +341,10 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { [I915_OA_FORMAT_C4_B8] = { 7, 64 }, }; +static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = { + [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, +}; + #define SAMPLE_OA_REPORT (1<<0) /** @@ -419,6 +425,14 @@ static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) kfree(oa_bo); } +static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + + return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) & + GEN12_OAG_OATAILPTR_MASK; +} + static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; @@ -539,7 +553,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) aging_tail = hw_tail; stream->oa_buffer.aging_timestamp = now; } else { - DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", + DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %x\n", hw_tail); } } @@ -741,7 +755,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * it to userspace... */ reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & - OAREPORT_REASON_MASK); + (IS_GEN(stream->perf->i915, 12) ? + OAREPORT_REASON_MASK_EXTENDED : + OAREPORT_REASON_MASK)); if (reason == 0) { if (__ratelimit(&stream->perf->spurious_report_rs)) DRM_NOTE("Skipping spurious, invalid OA report\n"); @@ -758,7 +774,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. */ - if (!(report32[0] & stream->perf->gen8_valid_ctx_bit)) + if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) && + INTEL_GEN(stream->perf->i915) <= 11) ctx_id = report32[2] = INVALID_CTX_ID; /* @@ -825,6 +842,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, } if (start_offset != *offset) { + i915_reg_t oaheadptr; + + oaheadptr = IS_GEN(stream->perf->i915, 12) ? + GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR; + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); /* @@ -832,9 +854,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * relative to oa_buf_base so put back here... */ head += gtt_offset; - - intel_uncore_write(uncore, GEN8_OAHEADPTR, - head & GEN8_OAHEADPTR_MASK); + intel_uncore_write(uncore, oaheadptr, + head & GEN12_OAG_OAHEADPTR_MASK); stream->oa_buffer.head = head; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -870,12 +891,16 @@ static int gen8_oa_read(struct i915_perf_stream *stream, { struct intel_uncore *uncore = stream->uncore; u32 oastatus; + i915_reg_t oastatus_reg; int ret; if (WARN_ON(!stream->oa_buffer.vaddr)) return -EIO; - oastatus = intel_uncore_read(uncore, GEN8_OASTATUS); + oastatus_reg = IS_GEN(stream->perf->i915, 12) ? + GEN12_OAG_OASTATUS : GEN8_OASTATUS; + + oastatus = intel_uncore_read(uncore, oastatus_reg); /* * We treat OABUFFER_OVERFLOW as a significant error: @@ -907,7 +932,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream, * Note: .oa_enable() is expected to re-init the oabuffer and * reset GEN8_OASTATUS for us */ - oastatus = intel_uncore_read(uncore, GEN8_OASTATUS); + oastatus = intel_uncore_read(uncore, oastatus_reg); } if (oastatus & GEN8_OASTATUS_REPORT_LOST) { @@ -915,7 +940,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream, DRM_I915_PERF_RECORD_OA_REPORT_LOST); if (ret) return ret; - intel_uncore_write(uncore, GEN8_OASTATUS, + intel_uncore_write(uncore, oastatus_reg, oastatus & ~GEN8_OASTATUS_REPORT_LOST); } @@ -1489,6 +1514,63 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) stream->pollin = false; } +static void gen12_init_oa_buffer(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); + unsigned long flags; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + + intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0); + intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR, + gtt_offset & GEN12_OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = gtt_offset; + + /* + * PRM says: + * + * "This MMIO must be set before the OATAILPTR + * register and after the OAHEADPTR register. This is + * to enable proper functionality of the overflow + * bit." + */ + intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset | + OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); + intel_uncore_write(uncore, GEN12_OAG_OATAILPTR, + gtt_offset & GEN12_OAG_OATAILPTR_MASK); + + /* Mark that we need updated tail pointers to read from... */ + stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; + stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; + + /* + * Reset state used to recognise context switches, affecting which + * reports we will forward to userspace while filtering for a single + * context. + */ + stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; + + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + /* + * NB: although the OA buffer will initially be allocated + * zeroed via shmfs (and so this memset is redundant when + * first allocating), we may re-init the OA buffer, either + * when re-enabling a stream or in error/reset paths. + * + * The reason we clear the buffer for each re-init is for the + * sanity check in gen8_append_oa_reports() that looks at the + * reason field to make sure it's non-zero which relies on + * the assumption that new reports are being written to zeroed + * memory... + */ + memset(stream->oa_buffer.vaddr, 0, + stream->oa_buffer.vma->size); + + stream->pollin = false; +} + static int alloc_oa_buffer(struct i915_perf_stream *stream) { struct drm_i915_gem_object *bo; @@ -1991,12 +2073,20 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce, u32 *reg_state = ce->lrc_reg_state; int i; - reg_state[ctx_oactxctrl + 1] = - (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | - (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME; + if (IS_GEN(stream->perf->i915, 12)) { + u32 format = stream->oa_buffer.format; - for (i = 0; i < ARRAY_SIZE(flex_regs); i++) + reg_state[ctx_oactxctrl + 1] = + (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | + (stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); + } else { + reg_state[ctx_oactxctrl + 1] = + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME; + } + + for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++) reg_state[ctx_flexeu0 + i * 2 + 1] = oa_config_flex_reg(stream->oa_config, flex_regs[i]); @@ -2129,6 +2219,36 @@ static int gen8_configure_context(struct i915_gem_context *ctx, return err; } +static int gen12_emit_oar_config(struct intel_context *ce, bool enable) +{ + struct i915_request *rq; + u32 *cs; + int err = 0; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out; + } + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); + *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, + enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + +out: + i915_request_add(rq); + + return err; +} + /* * Manages updating the per-context aspects of the OA stream * configuration across all contexts. @@ -2153,8 +2273,8 @@ static int gen8_configure_context(struct i915_gem_context *ctx, * * Note: it's only the RCS/Render context that has any OA state. */ -static int gen8_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config) +static int lrc_configure_all_contexts(struct i915_perf_stream *stream, + const struct i915_oa_config *oa_config) { struct drm_i915_private *i915 = stream->perf->i915; /* The MMIO offsets for Flex EU registers aren't contiguous */ @@ -2166,11 +2286,9 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, CTX_R_PWR_CLK_STATE, }, { - GEN8_OACTXCONTROL, + IS_GEN(i915, 12) ? + GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL, stream->perf->ctx_oactxctrl_offset + 1, - ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | - (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME) }, { EU_PERF_CNTL0, ctx_flexeuN(0) }, { EU_PERF_CNTL1, ctx_flexeuN(1) }, @@ -2183,9 +2301,23 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, #undef ctx_flexeuN struct intel_engine_cs *engine; struct i915_gem_context *ctx, *cn; + size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs); int i, err; - for (i = 2; i < ARRAY_SIZE(regs); i++) + if (IS_GEN(i915, 12)) { + u32 format = stream->oa_buffer.format; + + regs[1].value = + (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | + (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); + } else { + regs[1].value = + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME; + } + + for (i = 2; !!ctx_flexeu0 && i < array_size; i++) regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); lockdep_assert_held(&stream->perf->lock); @@ -2216,7 +2348,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, spin_unlock(&i915->gem.contexts.lock); - err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs)); + err = gen8_configure_context(ctx, regs, array_size); if (err) { i915_gem_context_put(ctx); return err; @@ -2241,7 +2373,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); - err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs)); + err = gen8_modify_self(ce, regs, array_size); if (err) return err; } @@ -2289,19 +2421,69 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = gen8_configure_all_contexts(stream, oa_config); + ret = lrc_configure_all_contexts(stream, oa_config); if (ret) return ret; return emit_oa_config(stream, oa_config, oa_context(stream)); } +static int gen12_enable_metric_set(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + struct i915_oa_config *oa_config = stream->oa_config; + bool periodic = stream->periodic; + u32 period_exponent = stream->period_exponent; + int ret; + + intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, + /* Disable clk ratio reports, like previous Gens. */ + _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | + GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) | + /* + * If the user didn't require OA reports, instruct the + * hardware not to emit ctx switch reports. + */ + !(stream->sample_flags & SAMPLE_OA_REPORT) ? + _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS) : + _MASKED_BIT_DISABLE(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS)); + + intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ? + (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME | + GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE | + (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) + : 0); + + /* + * Update all contexts prior writing the mux configurations as we need + * to make sure all slices/subslices are ON before writing to NOA + * registers. + */ + ret = lrc_configure_all_contexts(stream, oa_config); + if (ret) + return ret; + + /* + * For Gen12, performance counters are context + * saved/restored. Only enable it for the context that + * requested this. + */ + if (stream->ctx) { + ret = gen12_emit_oar_config(stream->pinned_ctx, + oa_config != NULL); + if (ret) + return ret; + } + + return emit_oa_config(stream, oa_config, oa_context(stream)); +} + static void gen8_disable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - gen8_configure_all_contexts(stream, NULL); + lrc_configure_all_contexts(stream, NULL); intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } @@ -2311,7 +2493,22 @@ static void gen10_disable_metric_set(struct i915_perf_stream *stream) struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - gen8_configure_all_contexts(stream, NULL); + lrc_configure_all_contexts(stream, NULL); + + /* Make sure we disable noa to save power. */ + intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); +} + +static void gen12_disable_metric_set(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + + /* Reset all contexts' slices/subslices configurations. */ + lrc_configure_all_contexts(stream, NULL); + + /* disable the context save/restore or OAR counters */ + if (stream->ctx) + gen12_emit_oar_config(stream->pinned_ctx, false); /* Make sure we disable noa to save power. */ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); @@ -2373,6 +2570,25 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) GEN8_OA_COUNTER_ENABLE); } +static void gen12_oa_enable(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + u32 report_format = stream->oa_buffer.format; + + /* + * If we don't want OA reports from the OA buffer, then we don't even + * need to program the OAG unit. + */ + if (!(stream->sample_flags & SAMPLE_OA_REPORT)) + return; + + gen12_init_oa_buffer(stream); + + intel_uncore_write(uncore, GEN12_OAG_OACONTROL, + (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) | + GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE); +} + /** * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream * @stream: An i915 perf stream opened for OA metrics @@ -2414,6 +2630,18 @@ static void gen8_oa_disable(struct i915_perf_stream *stream) DRM_ERROR("wait for OA to be disabled timed out\n"); } +static void gen12_oa_disable(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + + intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0); + if (intel_wait_for_register(uncore, + GEN12_OAG_OACONTROL, + GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0, + 50)) + DRM_ERROR("wait for OA to be disabled timed out\n"); +} + /** * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream * @stream: An i915 perf stream opened for OA metrics @@ -2615,7 +2843,7 @@ void i915_oa_init_reg_state(const struct intel_context *ce, { struct i915_perf_stream *stream; - /* perf.exclusive_stream serialised by gen8_configure_all_contexts() */ + /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */ if (engine->class != RENDER_CLASS) return; @@ -3094,16 +3322,24 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, * rest of the system, which we consider acceptable for a * non-privileged client. * - * For Gen8+ the OA unit no longer supports clock gating off for a + * For Gen8->11 the OA unit no longer supports clock gating off for a * specific context and the kernel can't securely stop the counters * from updating as system-wide / global values. Even though we can * filter reports based on the included context ID we can't block * clients from seeing the raw / global counter values via * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to * enable the OA unit by default. + * + * For Gen12+ we gain a new OAR unit that only monitors the RCS on a + * per context basis. So we can relax requirements there if the user + * doesn't request global stream access (i.e. query based sampling + * using MI_RECORD_PERF_COUNT. */ if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption) privileged_op = false; + else if (IS_GEN(perf->i915, 12) && specific_ctx && + (props->sample_flags & SAMPLE_OA_REPORT) == 0) + privileged_op = false; /* Similar to perf's kernel.perf_paranoid_cpu sysctl option * we check a dev.i915.perf_stream_paranoid sysctl option @@ -3418,7 +3654,9 @@ void i915_perf_register(struct drm_i915_private *i915) sysfs_attr_init(&perf->test_config.sysfs_metric_id.attr); - if (INTEL_GEN(i915) >= 11) { + if (IS_TIGERLAKE(i915)) { + i915_perf_load_test_config_tgl(i915); + } else if (INTEL_GEN(i915) >= 11) { i915_perf_load_test_config_icl(i915); } else if (IS_CANNONLAKE(i915)) { i915_perf_load_test_config_cnl(i915); @@ -3569,6 +3807,28 @@ static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) ADDR_IN_RANGE(addr, 0x182300, 0x1823A4); } +static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) +{ + return REG_IN_RANGE(addr, GEN12_OAG_OASTARTTRIG1, GEN12_OAG_OASTARTTRIG8) || + REG_IN_RANGE(addr, GEN12_OAG_OAREPORTTRIG1, GEN12_OAG_OAREPORTTRIG8) || + REG_IN_RANGE(addr, GEN12_OAG_CEC0_0, GEN12_OAG_CEC7_1) || + REG_IN_RANGE(addr, GEN12_OAG_SCEC0_0, GEN12_OAG_SCEC7_1) || + REG_EQUAL(addr, GEN12_OAA_DBG_REG) || + REG_EQUAL(addr, GEN12_OAG_OA_PESS) || + REG_EQUAL(addr, GEN12_OAG_SPCTR_CNF); +} + +static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) +{ + return REG_EQUAL(addr, NOA_WRITE) || + REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || + REG_EQUAL(addr, GDT_CHICKEN_BITS) || + REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || + REG_EQUAL(addr, RPM_CONFIG0) || + REG_EQUAL(addr, RPM_CONFIG1) || + REG_IN_RANGE(addr, NOA_CONFIG(0), NOA_CONFIG(8)); +} + static u32 mask_reg_value(u32 reg, u32 val) { /* HALF_SLICE_CHICKEN2 is programmed with a the @@ -3961,14 +4221,11 @@ void i915_perf_init(struct drm_i915_private *i915) * worth the complexity to maintain now that BDW+ enable * execlist mode by default. */ - perf->oa_formats = gen8_plus_oa_formats; - - perf->ops.oa_enable = gen8_oa_enable; - perf->ops.oa_disable = gen8_oa_disable; perf->ops.read = gen8_oa_read; - perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; if (IS_GEN_RANGE(i915, 8, 9)) { + perf->oa_formats = gen8_plus_oa_formats; + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; perf->ops.is_valid_mux_reg = @@ -3981,8 +4238,11 @@ void i915_perf_init(struct drm_i915_private *i915) chv_is_valid_mux_addr; } + perf->ops.oa_enable = gen8_oa_enable; + perf->ops.oa_disable = gen8_oa_disable; perf->ops.enable_metric_set = gen8_enable_metric_set; perf->ops.disable_metric_set = gen8_disable_metric_set; + perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; if (IS_GEN(i915, 8)) { perf->ctx_oactxctrl_offset = 0x120; @@ -3996,6 +4256,8 @@ void i915_perf_init(struct drm_i915_private *i915) perf->gen8_valid_ctx_bit = BIT(16); } } else if (IS_GEN_RANGE(i915, 10, 11)) { + perf->oa_formats = gen8_plus_oa_formats; + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; perf->ops.is_valid_mux_reg = @@ -4003,8 +4265,11 @@ void i915_perf_init(struct drm_i915_private *i915) perf->ops.is_valid_flex_reg = gen8_is_valid_flex_addr; + perf->ops.oa_enable = gen8_oa_enable; + perf->ops.oa_disable = gen8_oa_disable; perf->ops.enable_metric_set = gen8_enable_metric_set; perf->ops.disable_metric_set = gen10_disable_metric_set; + perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; if (IS_GEN(i915, 10)) { perf->ctx_oactxctrl_offset = 0x128; @@ -4014,6 +4279,24 @@ void i915_perf_init(struct drm_i915_private *i915) perf->ctx_flexeu0_offset = 0x78e; } perf->gen8_valid_ctx_bit = BIT(16); + } else if (IS_GEN(i915, 12)) { + perf->oa_formats = gen12_oa_formats; + + perf->ops.is_valid_b_counter_reg = + gen12_is_valid_b_counter_addr; + perf->ops.is_valid_mux_reg = + gen12_is_valid_mux_addr; + perf->ops.is_valid_flex_reg = + gen8_is_valid_flex_addr; + + perf->ops.oa_enable = gen12_oa_enable; + perf->ops.oa_disable = gen12_oa_disable; + perf->ops.enable_metric_set = gen12_enable_metric_set; + perf->ops.disable_metric_set = gen12_disable_metric_set; + perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read; + + perf->ctx_flexeu0_offset = 0; + perf->ctx_oactxctrl_offset = 0x144; } } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index fb33b164ce55..42d2c0b08eff 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -684,6 +684,45 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OABUFFER_SIZE_8M (6 << 3) #define OABUFFER_SIZE_16M (7 << 3) +/* Gen12 OAR unit */ +#define GEN12_OAR_OACONTROL _MMIO(0x2960) +#define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1 +#define GEN12_OAR_OACONTROL_COUNTER_ENABLE (1 << 0) + +#define GEN12_OACTXCONTROL _MMIO(0x2360) +#define GEN12_OAR_OASTATUS _MMIO(0x2968) + +/* Gen12 OAG unit */ +#define GEN12_OAG_OAHEADPTR _MMIO(0xdb00) +#define GEN12_OAG_OAHEADPTR_MASK 0xffffffc0 +#define GEN12_OAG_OATAILPTR _MMIO(0xdb04) +#define GEN12_OAG_OATAILPTR_MASK 0xffffffc0 + +#define GEN12_OAG_OABUFFER _MMIO(0xdb08) +#define GEN12_OAG_OABUFFER_BUFFER_SIZE_MASK (0x7) +#define GEN12_OAG_OABUFFER_BUFFER_SIZE_SHIFT (3) +#define GEN12_OAG_OABUFFER_MEMORY_SELECT (1 << 0) /* 0: PPGTT, 1: GGTT */ + +#define GEN12_OAG_OAGLBCTXCTRL _MMIO(0x2b28) +#define GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT 2 +#define GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE (1 << 1) +#define GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME (1 << 0) + +#define GEN12_OAG_OACONTROL _MMIO(0xdaf4) +#define GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT 2 +#define GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE (1 << 0) + +#define GEN12_OAG_OA_DEBUG _MMIO(0xdaf8) +#define GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO (1 << 6) +#define GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS (1 << 5) +#define GEN12_OAG_OA_DEBUG_DISABLE_GO_1_0_REPORTS (1 << 2) +#define GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS (1 << 1) + +#define GEN12_OAG_OASTATUS _MMIO(0xdafc) +#define GEN12_OAG_OASTATUS_COUNTER_OVERFLOW (1 << 2) +#define GEN12_OAG_OASTATUS_BUFFER_OVERFLOW (1 << 1) +#define GEN12_OAG_OASTATUS_REPORT_LOST (1 << 0) + /* * Flexible, Aggregate EU Counter Registers. * Note: these aren't contiguous @@ -920,6 +959,26 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OAREPORTTRIG8_NOA_SELECT_6_SHIFT 24 #define OAREPORTTRIG8_NOA_SELECT_7_SHIFT 28 +/* Same layout as OASTARTTRIGX */ +#define GEN12_OAG_OASTARTTRIG1 _MMIO(0xd900) +#define GEN12_OAG_OASTARTTRIG2 _MMIO(0xd904) +#define GEN12_OAG_OASTARTTRIG3 _MMIO(0xd908) +#define GEN12_OAG_OASTARTTRIG4 _MMIO(0xd90c) +#define GEN12_OAG_OASTARTTRIG5 _MMIO(0xd910) +#define GEN12_OAG_OASTARTTRIG6 _MMIO(0xd914) +#define GEN12_OAG_OASTARTTRIG7 _MMIO(0xd918) +#define GEN12_OAG_OASTARTTRIG8 _MMIO(0xd91c) + +/* Same layout as OAREPORTTRIGX */ +#define GEN12_OAG_OAREPORTTRIG1 _MMIO(0xd920) +#define GEN12_OAG_OAREPORTTRIG2 _MMIO(0xd924) +#define GEN12_OAG_OAREPORTTRIG3 _MMIO(0xd928) +#define GEN12_OAG_OAREPORTTRIG4 _MMIO(0xd92c) +#define GEN12_OAG_OAREPORTTRIG5 _MMIO(0xd930) +#define GEN12_OAG_OAREPORTTRIG6 _MMIO(0xd934) +#define GEN12_OAG_OAREPORTTRIG7 _MMIO(0xd938) +#define GEN12_OAG_OAREPORTTRIG8 _MMIO(0xd93c) + /* CECX_0 */ #define OACEC_COMPARE_LESS_OR_EQUAL 6 #define OACEC_COMPARE_NOT_EQUAL 5 @@ -936,6 +995,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OACEC_SELECT_PREV (1 << 19) #define OACEC_SELECT_BOOLEAN (2 << 19) +/* 11-bit array 0: pass-through, 1: negated */ +#define GEN12_OASCEC_NEGATE_MASK 0x7ff +#define GEN12_OASCEC_NEGATE_SHIFT 21 + /* CECX_1 */ #define OACEC_MASK_MASK 0xffff #define OACEC_CONSIDERATIONS_MASK 0xffff @@ -958,6 +1021,42 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OACEC7_0 _MMIO(0x27a8) #define OACEC7_1 _MMIO(0x27ac) +/* Same layout as CECX_Y */ +#define GEN12_OAG_CEC0_0 _MMIO(0xd940) +#define GEN12_OAG_CEC0_1 _MMIO(0xd944) +#define GEN12_OAG_CEC1_0 _MMIO(0xd948) +#define GEN12_OAG_CEC1_1 _MMIO(0xd94c) +#define GEN12_OAG_CEC2_0 _MMIO(0xd950) +#define GEN12_OAG_CEC2_1 _MMIO(0xd954) +#define GEN12_OAG_CEC3_0 _MMIO(0xd958) +#define GEN12_OAG_CEC3_1 _MMIO(0xd95c) +#define GEN12_OAG_CEC4_0 _MMIO(0xd960) +#define GEN12_OAG_CEC4_1 _MMIO(0xd964) +#define GEN12_OAG_CEC5_0 _MMIO(0xd968) +#define GEN12_OAG_CEC5_1 _MMIO(0xd96c) +#define GEN12_OAG_CEC6_0 _MMIO(0xd970) +#define GEN12_OAG_CEC6_1 _MMIO(0xd974) +#define GEN12_OAG_CEC7_0 _MMIO(0xd978) +#define GEN12_OAG_CEC7_1 _MMIO(0xd97c) + +/* Same layout as CECX_Y + negate 11-bit array */ +#define GEN12_OAG_SCEC0_0 _MMIO(0xdc00) +#define GEN12_OAG_SCEC0_1 _MMIO(0xdc04) +#define GEN12_OAG_SCEC1_0 _MMIO(0xdc08) +#define GEN12_OAG_SCEC1_1 _MMIO(0xdc0c) +#define GEN12_OAG_SCEC2_0 _MMIO(0xdc10) +#define GEN12_OAG_SCEC2_1 _MMIO(0xdc14) +#define GEN12_OAG_SCEC3_0 _MMIO(0xdc18) +#define GEN12_OAG_SCEC3_1 _MMIO(0xdc1c) +#define GEN12_OAG_SCEC4_0 _MMIO(0xdc20) +#define GEN12_OAG_SCEC4_1 _MMIO(0xdc24) +#define GEN12_OAG_SCEC5_0 _MMIO(0xdc28) +#define GEN12_OAG_SCEC5_1 _MMIO(0xdc2c) +#define GEN12_OAG_SCEC6_0 _MMIO(0xdc30) +#define GEN12_OAG_SCEC6_1 _MMIO(0xdc34) +#define GEN12_OAG_SCEC7_0 _MMIO(0xdc38) +#define GEN12_OAG_SCEC7_1 _MMIO(0xdc3c) + /* OA perf counters */ #define OA_PERFCNT1_LO _MMIO(0x91B8) #define OA_PERFCNT1_HI _MMIO(0x91BC) @@ -1038,6 +1137,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MICRO_BP3_COUNT_STATUS23 _MMIO(0x9838) #define MICRO_BP_FIRED_ARMED _MMIO(0x983C) +#define GEN12_OAA_DBG_REG _MMIO(0xdc44) +#define GEN12_OAG_OA_PESS _MMIO(0x2b2c) +#define GEN12_OAG_SPCTR_CNF _MMIO(0xdc40) + #define GDT_CHICKEN_BITS _MMIO(0x9840) #define GT_NOA_ENABLE 0x00000080 diff --git a/drivers/gpu/drm/i915/oa/i915_oa_tgl.c b/drivers/gpu/drm/i915/oa/i915_oa_tgl.c new file mode 100644 index 000000000000..a29d93707345 --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_tgl.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2018 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#include + +#include "i915_drv.h" +#include "i915_oa_tgl.h" + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0xD920), 0x00000000 }, + { _MMIO(0xD900), 0x00000000 }, + { _MMIO(0xD904), 0xF0800000 }, + { _MMIO(0xD910), 0x00000000 }, + { _MMIO(0xD914), 0xF0800000 }, + { _MMIO(0xDC40), 0x00FF0000 }, + { _MMIO(0xD940), 0x00000004 }, + { _MMIO(0xD944), 0x0000FFFF }, + { _MMIO(0xDC00), 0x00000004 }, + { _MMIO(0xDC04), 0x0000FFFF }, + { _MMIO(0xD948), 0x00000003 }, + { _MMIO(0xD94C), 0x0000FFFF }, + { _MMIO(0xDC08), 0x00000003 }, + { _MMIO(0xDC0C), 0x0000FFFF }, + { _MMIO(0xD950), 0x00000007 }, + { _MMIO(0xD954), 0x0000FFFF }, + { _MMIO(0xDC10), 0x00000007 }, + { _MMIO(0xDC14), 0x0000FFFF }, + { _MMIO(0xD958), 0x00100002 }, + { _MMIO(0xD95C), 0x0000FFF7 }, + { _MMIO(0xDC18), 0x00100002 }, + { _MMIO(0xDC1C), 0x0000FFF7 }, + { _MMIO(0xD960), 0x00100002 }, + { _MMIO(0xD964), 0x0000FFCF }, + { _MMIO(0xDC20), 0x00100002 }, + { _MMIO(0xDC24), 0x0000FFCF }, + { _MMIO(0xD968), 0x00100082 }, + { _MMIO(0xD96C), 0x0000FFEF }, + { _MMIO(0xDC28), 0x00100082 }, + { _MMIO(0xDC2C), 0x0000FFEF }, + { _MMIO(0xD970), 0x001000C2 }, + { _MMIO(0xD974), 0x0000FFE7 }, + { _MMIO(0xDC30), 0x001000C2 }, + { _MMIO(0xDC34), 0x0000FFE7 }, + { _MMIO(0xD978), 0x00100001 }, + { _MMIO(0xD97C), 0x0000FFE7 }, + { _MMIO(0xDC38), 0x00100001 }, + { _MMIO(0xDC3C), 0x0000FFE7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x0D04), 0x00000200 }, + { _MMIO(0x9840), 0x00000000 }, + { _MMIO(0x9884), 0x00000000 }, + { _MMIO(0x9888), 0x280E0000 }, + { _MMIO(0x9888), 0x1E0E0147 }, + { _MMIO(0x9888), 0x180E0000 }, + { _MMIO(0x9888), 0x160E0000 }, + { _MMIO(0x9888), 0x1E0F1000 }, + { _MMIO(0x9888), 0x1E104000 }, + { _MMIO(0x9888), 0x2E020100 }, + { _MMIO(0x9888), 0x2C030004 }, + { _MMIO(0x9888), 0x38003000 }, + { _MMIO(0x9888), 0x1E0A8000 }, + { _MMIO(0x9884), 0x00000003 }, + { _MMIO(0x9888), 0x49110000 }, + { _MMIO(0x9888), 0x5D101400 }, + { _MMIO(0x9888), 0x1D140020 }, + { _MMIO(0x9888), 0x1D1103A3 }, + { _MMIO(0x9888), 0x01110000 }, + { _MMIO(0x9888), 0x61111000 }, + { _MMIO(0x9888), 0x1F128000 }, + { _MMIO(0x9888), 0x17100000 }, + { _MMIO(0x9888), 0x55100630 }, + { _MMIO(0x9888), 0x57100000 }, + { _MMIO(0x9888), 0x31100000 }, + { _MMIO(0x9884), 0x00000003 }, + { _MMIO(0x9888), 0x65100002 }, + { _MMIO(0x9884), 0x00000000 }, + { _MMIO(0x9888), 0x42000001 }, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "1\n"); +} + +void +i915_perf_load_test_config_tgl(struct drm_i915_private *dev_priv) +{ + strlcpy(dev_priv->perf.test_config.uuid, + "80a833f0-2504-4321-8894-e9277844ce7b", + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; + + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + + dev_priv->perf.test_config.sysfs_metric.name = "80a833f0-2504-4321-8894-e9277844ce7b"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; + + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; + + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; +} diff --git a/drivers/gpu/drm/i915/oa/i915_oa_tgl.h b/drivers/gpu/drm/i915/oa/i915_oa_tgl.h new file mode 100644 index 000000000000..4c25f0be825c --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_tgl.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_TGL_H__ +#define __I915_OA_TGL_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_tgl(struct drm_i915_private *dev_priv); + +#endif From 7f9d4c08846e6c34f82627fbc108a65664f33964 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 28 Oct 2019 13:30:31 +0200 Subject: [PATCH 116/154] drm/i915: Fix i845/i865 cursor width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The change from the uapi coordinates to the internal coordinates broke the cursor on i845/i865 due to src and dst getting swapped. Fix it. Cc: Maarten Lankhorst Fixes: 3a612765f423 ("drm/i915: Remove cursor use of properties for coordinates") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191028113036.27553-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 9dce2e9e5376..e56a75c07043 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -10947,7 +10947,7 @@ static void i845_update_cursor(struct intel_plane *plane, unsigned long irqflags; if (plane_state && plane_state->base.visible) { - unsigned int width = drm_rect_width(&plane_state->base.src); + unsigned int width = drm_rect_width(&plane_state->base.dst); unsigned int height = drm_rect_height(&plane_state->base.dst); cntl = plane_state->ctl | From dd6e38dfc1286d307489a1ad890a87e096d9873b Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 29 Oct 2019 11:20:35 +0100 Subject: [PATCH 117/154] drm/i915: Fix i915_inject_load_error() name to read *_probe_* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 50d84418f586 ("drm/i915: Add i915 to i915_inject_probe_failure") introduced new functions unfortunately named incompatibly with rules established by commit f2db53f14d3d ("drm/i915: Replace "_load" with "_probe" consequently"). Fix it for consistency. Suggested-by: Michał Wajdeczko Signed-off-by: Janusz Krzysztofik Cc: Michał Wajdeczko Cc: Michał Winiarski Cc: Piotr Piórkowski Cc: Tomasz Lis Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191029102036.6326-2-janusz.krzysztofik@linux.intel.com --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 4 ++-- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 6 +++--- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 20 ++++++++++--------- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- drivers/gpu/drm/i915/i915_utils.c | 4 ++-- drivers/gpu/drm/i915/i915_utils.h | 12 +++++------ 7 files changed, 27 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index b7f8514e9b1d..2498c55e0ea5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1126,7 +1126,7 @@ int intel_guc_submission_enable(struct intel_guc *guc) enum intel_engine_id id; int err; - err = i915_inject_load_error(gt->i915, -ENXIO); + err = i915_inject_probe_error(gt->i915, -ENXIO); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 8be515c8d0f0..32a069841c14 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -63,7 +63,7 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc) void *vaddr; int err; - err = i915_inject_load_error(gt->i915, -ENXIO); + err = i915_inject_probe_error(gt->i915, -ENXIO); if (err) return err; @@ -161,7 +161,7 @@ int intel_huc_auth(struct intel_huc *huc) if (!intel_uc_fw_is_loaded(&huc->fw)) return -ENOEXEC; - ret = i915_inject_load_error(gt->i915, -ENXIO); + ret = i915_inject_probe_error(gt->i915, -ENXIO); if (ret) goto fail; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 3fdbc935d155..629b19377a29 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -20,7 +20,7 @@ static int __intel_uc_reset_hw(struct intel_uc *uc) int ret; u32 guc_status; - ret = i915_inject_load_error(gt->i915, -ENXIO); + ret = i915_inject_probe_error(gt->i915, -ENXIO); if (ret) return ret; @@ -197,7 +197,7 @@ static int guc_enable_communication(struct intel_guc *guc) GEM_BUG_ON(guc_communication_enabled(guc)); - ret = i915_inject_load_error(i915, -ENXIO); + ret = i915_inject_probe_error(i915, -ENXIO); if (ret) return ret; @@ -372,7 +372,7 @@ static int uc_init_wopcm(struct intel_uc *uc) GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK)); GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK); - err = i915_inject_load_error(gt->i915, -ENXIO); + err = i915_inject_probe_error(gt->i915, -ENXIO); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index bb4889d2346d..26beccff0908 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -220,29 +220,31 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, { bool user = e == -EINVAL; - if (i915_inject_load_error(i915, e)) { + if (i915_inject_probe_error(i915, e)) { /* non-existing blob */ uc_fw->path = ""; uc_fw->user_overridden = user; - } else if (i915_inject_load_error(i915, e)) { + } else if (i915_inject_probe_error(i915, e)) { /* require next major version */ uc_fw->major_ver_wanted += 1; uc_fw->minor_ver_wanted = 0; uc_fw->user_overridden = user; - } else if (i915_inject_load_error(i915, e)) { + } else if (i915_inject_probe_error(i915, e)) { /* require next minor version */ uc_fw->minor_ver_wanted += 1; uc_fw->user_overridden = user; - } else if (uc_fw->major_ver_wanted && i915_inject_load_error(i915, e)) { + } else if (uc_fw->major_ver_wanted && + i915_inject_probe_error(i915, e)) { /* require prev major version */ uc_fw->major_ver_wanted -= 1; uc_fw->minor_ver_wanted = 0; uc_fw->user_overridden = user; - } else if (uc_fw->minor_ver_wanted && i915_inject_load_error(i915, e)) { + } else if (uc_fw->minor_ver_wanted && + i915_inject_probe_error(i915, e)) { /* require prev minor version - hey, this should work! */ uc_fw->minor_ver_wanted -= 1; uc_fw->user_overridden = user; - } else if (user && i915_inject_load_error(i915, e)) { + } else if (user && i915_inject_probe_error(i915, e)) { /* officially unsupported platform */ uc_fw->major_ver_wanted = 0; uc_fw->minor_ver_wanted = 0; @@ -271,7 +273,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915) GEM_BUG_ON(!i915->wopcm.size); GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw)); - err = i915_inject_load_error(i915, -ENXIO); + err = i915_inject_probe_error(i915, -ENXIO); if (err) return err; @@ -432,7 +434,7 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt, u64 offset; int ret; - ret = i915_inject_load_error(gt->i915, -ETIMEDOUT); + ret = i915_inject_probe_error(gt->i915, -ETIMEDOUT); if (ret) return ret; @@ -493,7 +495,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt, /* make sure the status was cleared the last time we reset the uc */ GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw)); - err = i915_inject_load_error(gt->i915, -ENOEXEC); + err = i915_inject_probe_error(gt->i915, -ENOEXEC); if (err) return err; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0f271a53012d..b1574ab104d7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1300,11 +1300,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) goto err_gt; - ret = i915_inject_load_error(dev_priv, -ENODEV); + ret = i915_inject_probe_error(dev_priv, -ENODEV); if (ret) goto err_gt; - ret = i915_inject_load_error(dev_priv, -EIO); + ret = i915_inject_probe_error(dev_priv, -EIO); if (ret) goto err_gt; diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index 02e969b64505..abe81ddde20e 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -54,8 +54,8 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) static unsigned int i915_probe_fail_count; -int __i915_inject_load_error(struct drm_i915_private *i915, int err, - const char *func, int line) +int __i915_inject_probe_error(struct drm_i915_private *i915, int err, + const char *func, int line) { if (i915_probe_fail_count >= i915_modparams.inject_load_failure) return 0; diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 00b55252ef51..04139ba1191e 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -61,20 +61,20 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) -int __i915_inject_load_error(struct drm_i915_private *i915, int err, - const char *func, int line); -#define i915_inject_load_error(_i915, _err) \ - __i915_inject_load_error((_i915), (_err), __func__, __LINE__) +int __i915_inject_probe_error(struct drm_i915_private *i915, int err, + const char *func, int line); +#define i915_inject_probe_error(_i915, _err) \ + __i915_inject_probe_error((_i915), (_err), __func__, __LINE__) bool i915_error_injected(void); #else -#define i915_inject_load_error(_i915, _err) 0 +#define i915_inject_probe_error(_i915, _err) 0 #define i915_error_injected() false #endif -#define i915_inject_probe_failure(i915) i915_inject_load_error((i915), -ENODEV) +#define i915_inject_probe_failure(i915) i915_inject_probe_error((i915), -ENODEV) #define i915_probe_error(i915, fmt, ...) \ __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \ From 4ec37538a6670a4f0291e4a1de9394908e0f0d08 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 29 Oct 2019 11:20:36 +0100 Subject: [PATCH 118/154] drm/i915: Rename "inject_load_failure" module parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit f2db53f14d3d ("drm/i915: Replace "_load" with "_probe" consequently") deliberately left the name of the module parameter unchanged as that would require a corresponding change on IGT size. Now as the IGT side change has been submitted, complete the switch to the "probe" nomenclature. Suggested-by: Joonas Lahtinen Signed-off-by: Janusz Krzysztofik Cc: Michał Wajdeczko Cc: Michał Winiarski Cc: Piotr Piórkowski Cc: Tomasz Lis Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191029102036.6326-3-janusz.krzysztofik@linux.intel.com --- drivers/gpu/drm/i915/i915_params.c | 2 +- drivers/gpu/drm/i915/i915_params.h | 2 +- drivers/gpu/drm/i915/i915_utils.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 4f1806f65040..3fa79adb2c1c 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -166,7 +166,7 @@ i915_param_named_unsafe(enable_dp_mst, bool, 0600, "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) -i915_param_named_unsafe(inject_load_failure, uint, 0400, +i915_param_named_unsafe(inject_probe_failure, uint, 0400, "Force an error after a number of failure check points (0:disabled (default), N:force failure at the Nth failure check point)"); #endif diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 56058978bb27..a276317ad74b 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -62,7 +62,7 @@ struct drm_printer; param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO)) \ param(int, edp_vswing, 0) \ param(int, reset, 3) \ - param(unsigned int, inject_load_failure, 0) \ + param(unsigned int, inject_probe_failure, 0) \ param(int, fastboot, -1) \ param(int, enable_dpcd_backlight, 0) \ param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE) \ diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index abe81ddde20e..0348c6d0ef5f 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -57,22 +57,22 @@ static unsigned int i915_probe_fail_count; int __i915_inject_probe_error(struct drm_i915_private *i915, int err, const char *func, int line) { - if (i915_probe_fail_count >= i915_modparams.inject_load_failure) + if (i915_probe_fail_count >= i915_modparams.inject_probe_failure) return 0; - if (++i915_probe_fail_count < i915_modparams.inject_load_failure) + if (++i915_probe_fail_count < i915_modparams.inject_probe_failure) return 0; __i915_printk(i915, KERN_INFO, "Injecting failure %d at checkpoint %u [%s:%d]\n", - err, i915_modparams.inject_load_failure, func, line); - i915_modparams.inject_load_failure = 0; + err, i915_modparams.inject_probe_failure, func, line); + i915_modparams.inject_probe_failure = 0; return err; } bool i915_error_injected(void) { - return i915_probe_fail_count && !i915_modparams.inject_load_failure; + return i915_probe_fail_count && !i915_modparams.inject_probe_failure; } #endif From b79029b2e859d8cef534643a1254a833459038f1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 29 Oct 2019 09:16:32 +0000 Subject: [PATCH 119/154] drm/i915/gt: Make timeslice duration configurable Execlists uses a scheduling quantum (a timeslice) to alternate execution between ready-to-run contexts of equal priority. This ensures that all users (though only if they of equal importance) have the opportunity to run and prevents livelocks where contexts may have implicit ordering due to userspace semaphores. However, not all workloads necessarily benefit from timeslicing and in the extreme some sysadmin may want to disable or reduce the timeslicing granularity. The timeslicing mechanism can be compiled out^W^W disabled (but should DCE!) with ./scripts/config --set-val DRM_I915_TIMESLICE_DURATION 0 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191029091632.26281-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.profile | 15 ++++++++ drivers/gpu/drm/i915/gt/intel_engine.h | 13 ++++++- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 39 ++++++++++++++------ drivers/gpu/drm/i915/gt/selftest_lrc.c | 17 +++++++-- 6 files changed, 69 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 8ab7af5eb311..1799537a3228 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -59,3 +59,18 @@ config DRM_I915_STOP_TIMEOUT damage as the system is reset in order to recover. The corollary is that the reset itself may take longer and so be more disruptive to interactive or low latency workloads. + +config DRM_I915_TIMESLICE_DURATION + int "Scheduling quantum for userspace batches (ms, jiffy granularity)" + default 1 # milliseconds + help + When two user batches of equal priority are executing, we will + alternate execution of each batch to ensure forward progress of + all users. This is necessary in some cases where there may be + an implicit dependency between those batches that requires + concurrent execution in order for them to proceed, e.g. they + interact with each other via userspace semaphores. Each context + is scheduled for execution for the timeslice duration, before + switching to the next context. + + May be 0 to disable timeslicing. diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index c6895938b626..bc3b72bfa9e3 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -329,10 +329,19 @@ void intel_engine_init_active(struct intel_engine_cs *engine, static inline bool intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) { - if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) - return 0; + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + return false; return intel_engine_has_preemption(engine); } +static inline bool +intel_engine_has_timeslices(const struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return false; + + return intel_engine_has_semaphores(engine); +} + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 355523114c71..f8113bc756c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -315,6 +315,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) CONFIG_DRM_I915_PREEMPT_TIMEOUT; engine->props.stop_timeout_ms = CONFIG_DRM_I915_STOP_TIMEOUT; + engine->props.timeslice_duration_ms = + CONFIG_DRM_I915_TIMESLICE_DURATION; /* * To be overridden by the backend on setup. However to facilitate diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index e8ea12b96755..c5d1047a4bc5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -523,6 +523,7 @@ struct intel_engine_cs { unsigned long heartbeat_interval_ms; unsigned long preempt_timeout_ms; unsigned long stop_timeout_ms; + unsigned long timeslice_duration_ms; } props; }; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 2f474c1f5c54..6fb3def5ba16 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1467,7 +1467,7 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) { int hint; - if (!intel_engine_has_semaphores(engine)) + if (!intel_engine_has_timeslices(engine)) return false; if (list_is_last(&rq->sched.link, &engine->active.requests)) @@ -1488,15 +1488,32 @@ switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) return rq_prio(list_next_entry(rq, sched.link)); } -static bool -enable_timeslice(const struct intel_engine_execlists *execlists) +static inline unsigned long +timeslice(const struct intel_engine_cs *engine) { - const struct i915_request *rq = *execlists->active; + return READ_ONCE(engine->props.timeslice_duration_ms); +} + +static unsigned long +active_timeslice(const struct intel_engine_cs *engine) +{ + const struct i915_request *rq = *engine->execlists.active; if (i915_request_completed(rq)) - return false; + return 0; - return execlists->switch_priority_hint >= effective_prio(rq); + if (engine->execlists.switch_priority_hint < effective_prio(rq)) + return 0; + + return timeslice(engine); +} + +static void set_timeslice(struct intel_engine_cs *engine) +{ + if (!intel_engine_has_timeslices(engine)) + return; + + set_timer_ms(&engine->execlists.timer, active_timeslice(engine)); } static void record_preemption(struct intel_engine_execlists *execlists) @@ -1667,8 +1684,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ if (!execlists->timer.expires && need_timeslice(engine, last)) - mod_timer(&execlists->timer, - jiffies + 1); + set_timer_ms(&execlists->timer, + timeslice(engine)); + return; } @@ -2092,10 +2110,7 @@ static void process_csb(struct intel_engine_cs *engine) execlists_num_ports(execlists) * sizeof(*execlists->pending)); - if (enable_timeslice(execlists)) - mod_timer(&execlists->timer, jiffies + 1); - else - cancel_timer(&execlists->timer); + set_timeslice(engine); WRITE_ONCE(execlists->pending[0], NULL); } else { diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 9507d750ae14..5e8c365548f0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -440,6 +440,8 @@ static int live_timeslice_preempt(void *arg) * need to preempt the current task and replace it with another * ready task. */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) @@ -514,6 +516,11 @@ static void wait_for_submit(struct intel_engine_cs *engine, } while (!i915_request_is_active(rq)); } +static long timeslice_threshold(const struct intel_engine_cs *engine) +{ + return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1; +} + static int live_timeslice_queue(void *arg) { struct intel_gt *gt = arg; @@ -531,6 +538,8 @@ static int live_timeslice_queue(void *arg) * ELSP[1] is already occupied, so must rely on timeslicing to * eject ELSP[0] in favour of the queue.) */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) @@ -608,8 +617,8 @@ static int live_timeslice_queue(void *arg) err = -EINVAL; } - /* Timeslice every jiffie, so within 2 we should signal */ - if (i915_request_wait(rq, 0, 3) < 0) { + /* Timeslice every jiffy, so within 2 we should signal */ + if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -1383,7 +1392,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) int err; /* Preempt cancel non-preemptible spinner in ELSP0 */ - if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) return 0; GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); @@ -2030,7 +2039,7 @@ static int live_preempt_timeout(void *arg) * Check that we force preemption to occur by cancelling the previous * context if it refuses to yield the GPU. */ - if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) return 0; if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) From 47c41af7069668eed9bf7e51467557bd6cb3d599 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 25 Oct 2019 22:12:26 -0700 Subject: [PATCH 120/154] drm/i915: Drop unused AUX register offsets We reference DP AUX registers via the DP_AUX_CH_CTL() and DP_AUX_CH_DATA() macros that calculate all the register offsets for us automatically; there's no need to explicitly define every offset in i915_reg.h if they're never going to be used by the driver code. v2: Apparently GVT was directly using these raw definitions in a couple places. Switch GVT code over to using our preferred macros. Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20191026051226.30807-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gvt/handlers.c | 17 ++++++++------ drivers/gpu/drm/i915/i915_reg.h | 36 ----------------------------- 2 files changed, 10 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 45a9124e53b6..afd7f66bdc2d 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -819,13 +819,16 @@ static int trigger_aux_channel_interrupt(struct intel_vgpu *vgpu, struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; enum intel_gvt_event_type event; - if (reg == _DPA_AUX_CH_CTL) + if (reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_A))) event = AUX_CHANNEL_A; - else if (reg == _PCH_DPB_AUX_CH_CTL || reg == _DPB_AUX_CH_CTL) + else if (reg == _PCH_DPB_AUX_CH_CTL || + reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_B))) event = AUX_CHANNEL_B; - else if (reg == _PCH_DPC_AUX_CH_CTL || reg == _DPC_AUX_CH_CTL) + else if (reg == _PCH_DPC_AUX_CH_CTL || + reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_C))) event = AUX_CHANNEL_C; - else if (reg == _PCH_DPD_AUX_CH_CTL || reg == _DPD_AUX_CH_CTL) + else if (reg == _PCH_DPD_AUX_CH_CTL || + reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_D))) event = AUX_CHANNEL_D; else { WARN_ON(true); @@ -2872,11 +2875,11 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DH(FORCEWAKE_MEDIA_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write); MMIO_DH(FORCEWAKE_ACK_MEDIA_GEN9, D_SKL_PLUS, NULL, NULL); - MMIO_F(_MMIO(_DPB_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(DP_AUX_CH_CTL(AUX_CH_B), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_MMIO(_DPC_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(DP_AUX_CH_CTL(AUX_CH_C), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_MMIO(_DPD_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(DP_AUX_CH_CTL(AUX_CH_D), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); MMIO_D(HSW_PWR_WELL_CTL1, D_SKL_PLUS); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 42d2c0b08eff..ad646d740049 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5641,45 +5641,9 @@ enum { */ #define _DPA_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64010) #define _DPA_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64014) -#define _DPA_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64018) -#define _DPA_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6401c) -#define _DPA_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64020) -#define _DPA_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64024) #define _DPB_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64110) #define _DPB_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64114) -#define _DPB_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64118) -#define _DPB_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6411c) -#define _DPB_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64120) -#define _DPB_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64124) - -#define _DPC_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64210) -#define _DPC_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64214) -#define _DPC_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64218) -#define _DPC_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6421c) -#define _DPC_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64220) -#define _DPC_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64224) - -#define _DPD_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64310) -#define _DPD_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64314) -#define _DPD_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64318) -#define _DPD_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6431c) -#define _DPD_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64320) -#define _DPD_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64324) - -#define _DPE_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64410) -#define _DPE_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64414) -#define _DPE_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64418) -#define _DPE_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6441c) -#define _DPE_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64420) -#define _DPE_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64424) - -#define _DPF_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64510) -#define _DPF_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64514) -#define _DPF_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64518) -#define _DPF_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6451c) -#define _DPF_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64520) -#define _DPF_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64524) #define DP_AUX_CH_CTL(aux_ch) _MMIO_PORT(aux_ch, _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL) #define DP_AUX_CH_DATA(aux_ch, i) _MMIO(_PORT(aux_ch, _DPA_AUX_CH_DATA1, _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ From 6a3552527d431ae3281ce0dfa25107e71cc681e2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 25 Oct 2019 16:06:23 -0700 Subject: [PATCH 121/154] drm/i915/tgl: Add AUX B & C to DC_OFF_POWER_DOMAINS Our TGL CI platforms are running into cases where aux transactions have failed to complete or declare a timeout well after the timeout limit that the hardware is supposed to enforce. From the logs it appears that these failures arise when aux transactions happen after we've entered DC6: <7> [622.523650] [drm:skl_enable_dc6 [i915]] Enabling DC6 <7> [622.523685] [drm:gen9_set_dc_state [i915]] Setting DC state from 00 to 02 ... <3> [622.535753] [drm:intel_dp_aux_xfer [i915]] *ERROR* dp aux hw did not signal timeout! <3> [622.547745] [drm:intel_dp_aux_xfer [i915]] *ERROR* dp aux hw did not signal timeout! <3> [622.559746] [drm:intel_dp_aux_xfer [i915]] *ERROR* dp aux hw did not signal timeout! <3> [622.571744] [drm:intel_dp_aux_xfer [i915]] *ERROR* dp aux hw did not signal timeout! <3> [622.583743] [drm:intel_dp_aux_xfer [i915]] *ERROR* dp aux hw did not signal timeout! <3> [622.583780] [drm:intel_dp_aux_xfer [i915]] *ERROR* dp_aux_ch not done status 0xad400bff <7> [622.863725] [drm:drm_dp_dpcd_access] Too many retries, giving up. First error: -110 On TGL AUX B & C are in PG1 (managed by the DMC firmware) rather than PG3 as they were on ICL, so allowing DC6 means the DMC firmware might shut off the power wells behind our backs when we're trying to use them. Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20191025230623.27829-6-matthew.d.roper@intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_display_power.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 6f9e7927e248..707ac110e271 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -2682,6 +2682,8 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, TGL_PW_2_POWER_DOMAINS | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define TGL_DDI_IO_D_TC1_POWER_DOMAINS ( \ From bf96b515082c4b93ed706aa5edf518772fd7d394 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 29 Oct 2019 16:28:26 +0200 Subject: [PATCH 122/154] drm/i915/perf: ensure selftests select valid format Gen12 only support a single report format : I915_OA_FORMAT_A32u40_A4u32_B8_C8 Signed-off-by: Lionel Landwerlin Fixes: 00a7f0d7155c ("drm/i915/tgl: Add perf support on TGL") Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191029142826.20014-1-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/selftests/i915_perf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c index dc6d689e4251..aabd07f67e49 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf.c +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -23,7 +23,8 @@ test_stream(struct i915_perf *perf) I915_ENGINE_CLASS_RENDER, 0), .sample_flags = SAMPLE_OA_REPORT, - .oa_format = I915_OA_FORMAT_C4_B8, + .oa_format = IS_GEN(perf->i915, 12) ? + I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8, .metrics_set = 1, }; struct i915_perf_stream *stream; From a20e26d8421a4318b08d7561c817af477d3ba783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 25 Oct 2019 17:13:19 -0700 Subject: [PATCH 123/154] drm/i915: Add two spaces before the SKL_DFSM registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The next patches are going to touch this registers so here already fixing it for older registers and make it consistent with most of the other registers in this file. Cc: Ramalingam C Signed-off-by: José Roberto de Souza Reviewed-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20191026001323.216052-1-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ad646d740049..2ddeeb2c414d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7743,15 +7743,15 @@ enum { #define CNL_DDI_CLOCK_REG_ACCESS_ON (1 << 7) #define SKL_DFSM _MMIO(0x51000) -#define SKL_DFSM_CDCLK_LIMIT_MASK (3 << 23) -#define SKL_DFSM_CDCLK_LIMIT_675 (0 << 23) -#define SKL_DFSM_CDCLK_LIMIT_540 (1 << 23) -#define SKL_DFSM_CDCLK_LIMIT_450 (2 << 23) -#define SKL_DFSM_CDCLK_LIMIT_337_5 (3 << 23) -#define SKL_DFSM_PIPE_A_DISABLE (1 << 30) -#define SKL_DFSM_PIPE_B_DISABLE (1 << 21) -#define SKL_DFSM_PIPE_C_DISABLE (1 << 28) -#define TGL_DFSM_PIPE_D_DISABLE (1 << 22) +#define SKL_DFSM_CDCLK_LIMIT_MASK (3 << 23) +#define SKL_DFSM_CDCLK_LIMIT_675 (0 << 23) +#define SKL_DFSM_CDCLK_LIMIT_540 (1 << 23) +#define SKL_DFSM_CDCLK_LIMIT_450 (2 << 23) +#define SKL_DFSM_CDCLK_LIMIT_337_5 (3 << 23) +#define SKL_DFSM_PIPE_A_DISABLE (1 << 30) +#define SKL_DFSM_PIPE_B_DISABLE (1 << 21) +#define SKL_DFSM_PIPE_C_DISABLE (1 << 28) +#define TGL_DFSM_PIPE_D_DISABLE (1 << 22) #define SKL_DSSM _MMIO(0x51004) #define CNL_DSSM_CDCLK_PLL_REFCLK_24MHz (1 << 31) From 74393109a8c3965596f953a535c8fe7064201033 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 25 Oct 2019 17:13:20 -0700 Subject: [PATCH 124/154] drm/i915/display: Handle fused off HDCP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HDCP could be fused off, so not all GEN9+ platforms will support it. Cc: Ville Syrjälä Cc: Martin Peres Reviewed-by: Ramalingam C Reviewed-by: Ville Syrjälä Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20191026001323.216052-2-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_hdcp.c | 2 +- drivers/gpu/drm/i915/i915_pci.c | 2 ++ drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_device_info.c | 3 +++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 5 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index e69fa34528df..f1f41ca8402b 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -922,7 +922,7 @@ static void intel_hdcp_prop_work(struct work_struct *work) bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port) { /* PORT E doesn't have HDCP, and PORT F is disabled */ - return INTEL_GEN(dev_priv) >= 9 && port < PORT_E; + return INTEL_INFO(dev_priv)->display.has_hdcp && port < PORT_E; } static int diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 04307e111f57..430da2d4082a 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -612,6 +612,7 @@ static const struct intel_device_info intel_cherryview_info = { .has_logical_ring_preemption = 1, \ .display.has_csr = 1, \ .has_gt_uc = 1, \ + .display.has_hdcp = 1, \ .display.has_ipc = 1, \ .ddb_size = 896 @@ -655,6 +656,7 @@ static const struct intel_device_info intel_skylake_gt4_info = { .display.has_ddi = 1, \ .has_fpga_dbg = 1, \ .display.has_fbc = 1, \ + .display.has_hdcp = 1, \ .display.has_psr = 1, \ .has_runtime_pm = 1, \ .display.has_csr = 1, \ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2ddeeb2c414d..d90adf02e0a5 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7743,6 +7743,7 @@ enum { #define CNL_DDI_CLOCK_REG_ACCESS_ON (1 << 7) #define SKL_DFSM _MMIO(0x51000) +#define SKL_DFSM_DISPLAY_HDCP_DISABLE (1 << 25) #define SKL_DFSM_CDCLK_LIMIT_MASK (3 << 23) #define SKL_DFSM_CDCLK_LIMIT_675 (0 << 23) #define SKL_DFSM_CDCLK_LIMIT_540 (1 << 23) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 85e480bdc673..e7dd6092c105 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -981,6 +981,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) enabled_mask); else info->pipe_mask = enabled_mask; + + if (dfsm & SKL_DFSM_DISPLAY_HDCP_DISABLE) + info->display.has_hdcp = 0; } /* Initialize slice/subslice/EU info */ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 78a383f63957..64e4f1923c68 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -139,6 +139,7 @@ enum intel_ppgtt_type { func(has_dsb); \ func(has_fbc); \ func(has_gmch); \ + func(has_hdcp); \ func(has_hotplug); \ func(has_ipc); \ func(has_modular_fia); \ From 7a40aac1d77ab05ca375ac34de44e7972cc61dc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 25 Oct 2019 17:13:21 -0700 Subject: [PATCH 125/154] drm/i915/display: Check if FBC is fused off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check if FBC is fused off and handle it. Cc: Ville Syrjälä Cc: Martin Peres Reviewed-by: Ramalingam C Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20191026001323.216052-3-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_device_info.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d90adf02e0a5..ecbcc0e33850 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7743,6 +7743,7 @@ enum { #define CNL_DDI_CLOCK_REG_ACCESS_ON (1 << 7) #define SKL_DFSM _MMIO(0x51000) +#define SKL_DFSM_DISPLAY_PM_DISABLE (1 << 27) #define SKL_DFSM_DISPLAY_HDCP_DISABLE (1 << 25) #define SKL_DFSM_CDCLK_LIMIT_MASK (3 << 23) #define SKL_DFSM_CDCLK_LIMIT_675 (0 << 23) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index e7dd6092c105..a3e90714cfa2 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -984,6 +984,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) if (dfsm & SKL_DFSM_DISPLAY_HDCP_DISABLE) info->display.has_hdcp = 0; + + if (dfsm & SKL_DFSM_DISPLAY_PM_DISABLE) + info->display.has_fbc = 0; } /* Initialize slice/subslice/EU info */ From ee595888e1c25fb31bbc10a317a576311356babd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 25 Oct 2019 17:13:22 -0700 Subject: [PATCH 126/154] drm/i915/display/icl+: Check if DMC is fused off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check if DMC is fused off and handle it. Cc: Ville Syrjälä Cc: Martin Peres Reviewed-by: Ramalingam C Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20191026001323.216052-4-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_device_info.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ecbcc0e33850..32a5371fff4f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7750,6 +7750,7 @@ enum { #define SKL_DFSM_CDCLK_LIMIT_540 (1 << 23) #define SKL_DFSM_CDCLK_LIMIT_450 (2 << 23) #define SKL_DFSM_CDCLK_LIMIT_337_5 (3 << 23) +#define ICL_DFSM_DMC_DISABLE (1 << 23) #define SKL_DFSM_PIPE_A_DISABLE (1 << 30) #define SKL_DFSM_PIPE_B_DISABLE (1 << 21) #define SKL_DFSM_PIPE_C_DISABLE (1 << 28) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index a3e90714cfa2..fa6464879142 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -987,6 +987,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) if (dfsm & SKL_DFSM_DISPLAY_PM_DISABLE) info->display.has_fbc = 0; + + if (INTEL_GEN(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE)) + info->display.has_csr = 0; } /* Initialize slice/subslice/EU info */ From 0f9ed3b2c9ecb727d6cea803def2998e1a6e625e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 25 Oct 2019 17:13:23 -0700 Subject: [PATCH 127/154] drm/i915/display/cnl+: Handle fused off DSC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DSC could be fused off, so not all GEN10+ platforms will support it. Cc: Manasi Navare Cc: Martin Peres Reviewed-by: Ramalingam C Reviewed-by: Manasi Navare Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20191026001323.216052-5-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 3 +++ drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_device_info.c | 4 ++++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 5 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 521ce23f38ac..c24c6d3e4ab5 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1888,6 +1888,9 @@ static bool intel_dp_source_supports_dsc(struct intel_dp *intel_dp, { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + if (!INTEL_INFO(dev_priv)->display.has_dsc) + return false; + /* On TGL, DSC is supported on all Pipes */ if (INTEL_GEN(dev_priv) >= 12) return true; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 430da2d4082a..1bb701d32a5d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -737,6 +737,7 @@ static const struct intel_device_info intel_coffeelake_gt3_info = { GEN9_FEATURES, \ GEN(10), \ .ddb_size = 1024, \ + .display.has_dsc = 1, \ .has_coherent_ggtt = false, \ GLK_COLORS diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 32a5371fff4f..5894e46ef68b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7755,6 +7755,7 @@ enum { #define SKL_DFSM_PIPE_B_DISABLE (1 << 21) #define SKL_DFSM_PIPE_C_DISABLE (1 << 28) #define TGL_DFSM_PIPE_D_DISABLE (1 << 22) +#define CNL_DFSM_DISPLAY_DSC_DISABLE (1 << 7) #define SKL_DSSM _MMIO(0x51004) #define CNL_DSSM_CDCLK_PLL_REFCLK_24MHz (1 << 31) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index fa6464879142..a5b571364cf6 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -990,6 +990,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE)) info->display.has_csr = 0; + + if (INTEL_GEN(dev_priv) >= 10 && + (dfsm & CNL_DFSM_DISPLAY_DSC_DISABLE)) + info->display.has_dsc = 0; } /* Initialize slice/subslice/EU info */ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 64e4f1923c68..4bdf8a6cfb47 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -137,6 +137,7 @@ enum intel_ppgtt_type { func(has_ddi); \ func(has_dp_mst); \ func(has_dsb); \ + func(has_dsc); \ func(has_fbc); \ func(has_gmch); \ func(has_hdcp); \ From 5451646467436695f90a23274845c8d54d950f19 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 29 Oct 2019 10:31:02 -0700 Subject: [PATCH 128/154] drm/i915: Provide more information on DP AUX failures We're seeing some failures where an aux transaction still shows as 'busy' well after the timeout limit that the hardware is supposed to enforce. Improve the error message so that we can see exactly which aux channel this error happened on and what the status bits were during this case that isn't supposed to happen. v2: - Make timeout a const variable so that the timeout & message will match if we decide to change it in the future. (Lucas) - Don't bother testing intel_dp->aux.name for NULL. (Lucas) Cc: Lucas De Marchi Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20191029173102.9451-1-matthew.d.roper@intel.com Reviewed-by: Lucas De Marchi --- drivers/gpu/drm/i915/display/intel_dp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index c24c6d3e4ab5..325b70bb140d 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1179,18 +1179,20 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); + const unsigned int timeout_ms = 10; u32 status; bool done; #define C (((status = intel_uncore_read_notrace(&i915->uncore, ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0) done = wait_event_timeout(i915->gmbus_wait_queue, C, - msecs_to_jiffies_timeout(10)); + msecs_to_jiffies_timeout(timeout_ms)); /* just trace the final value */ trace_i915_reg_rw(false, ch_ctl, status, sizeof(status), true); if (!done) - DRM_ERROR("dp aux hw did not signal timeout!\n"); + DRM_ERROR("%s did not complete or timeout within %ums (status 0x%08x)\n", + intel_dp->aux.name, timeout_ms, status); #undef C return status; From a0e047156cdebbccf253768b39d7e1dbf954c449 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 29 Oct 2019 20:23:38 +0000 Subject: [PATCH 129/154] drm/i915/gem: Make context persistence optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our existing behaviour is to allow contexts and their GPU requests to persist past the point of closure until the requests are complete. This allows clients to operate in a 'fire-and-forget' manner where they can setup a rendering pipeline and hand it over to the display server and immediately exit. As the rendering pipeline is kept alive until completion, the display server (or other consumer) can use the results in the future and present them to the user. The compute model is a little different. They have little to no buffer sharing between processes as their kernels tend to operate on a continuous stream, feeding the results back to the client application. These kernels operate for an indeterminate length of time, with many clients wishing that the kernel was always running for as long as they keep feeding in the data, i.e. acting like a DSP. Not all clients want this persistent "desktop" behaviour and would prefer that the contexts are cleaned up immediately upon closure. This ensures that when clients are run without hangchecking (e.g. for compute kernels of indeterminate runtime), any GPU hang or other unexpected workloads are terminated with the process and does not continue to hog resources. The default behaviour for new contexts is the legacy persistence mode, as some desktop applications are dependent upon the existing behaviour. New clients will have to opt in to immediate cleanup on context closure. If the hangchecking modparam is disabled, so is persistent context support -- all contexts will be terminated on closure. We expect this behaviour change to be welcomed by compute users, who have often been caught between a rock and a hard place. They disable hangchecking to avoid their kernels being "unfairly" declared hung, but have also experienced true hangs that the system was then unable to clean up. Naturally, this leads to bug reports. Testcase: igt/gem_ctx_persistence Link: https://github.com/intel/compute-runtime/pull/228 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Michał Winiarski Cc: Jon Bloomfield Reviewed-by: Jon Bloomfield Reviewed-by: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Acked-by: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20191029202338.8841-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 50 ++++++++++++++++++- drivers/gpu/drm/i915/gem/i915_gem_context.h | 15 ++++++ .../gpu/drm/i915/gem/i915_gem_context_types.h | 1 + .../gpu/drm/i915/gem/selftests/mock_context.c | 2 + include/uapi/drm/i915_drm.h | 15 ++++++ 5 files changed, 82 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 414fc55c9dd0..cbdf2fb32636 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -438,12 +438,39 @@ static void context_close(struct i915_gem_context *ctx) * case we opt to forcibly kill off all remaining requests on * context close. */ - if (!i915_modparams.enable_hangcheck) + if (!i915_gem_context_is_persistent(ctx) || + !i915_modparams.enable_hangcheck) kill_context(ctx); i915_gem_context_put(ctx); } +static int __context_set_persistence(struct i915_gem_context *ctx, bool state) +{ + if (i915_gem_context_is_persistent(ctx) == state) + return 0; + + if (state) { + /* + * Only contexts that are short-lived [that will expire or be + * reset] are allowed to survive past termination. We require + * hangcheck to ensure that the persistent requests are healthy. + */ + if (!i915_modparams.enable_hangcheck) + return -EINVAL; + + i915_gem_context_set_persistence(ctx); + } else { + /* To cancel a context we use "preempt-to-idle" */ + if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) + return -ENODEV; + + i915_gem_context_clear_persistence(ctx); + } + + return 0; +} + static struct i915_gem_context * __create_context(struct drm_i915_private *i915) { @@ -478,6 +505,7 @@ __create_context(struct drm_i915_private *i915) i915_gem_context_set_bannable(ctx); i915_gem_context_set_recoverable(ctx); + __context_set_persistence(ctx, true /* cgroup hook? */); for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; @@ -634,6 +662,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) return ctx; i915_gem_context_clear_bannable(ctx); + i915_gem_context_set_persistence(ctx); ctx->sched.priority = I915_USER_PRIORITY(prio); GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); @@ -1744,6 +1773,16 @@ err_free: return err; } +static int +set_persistence(struct i915_gem_context *ctx, + const struct drm_i915_gem_context_param *args) +{ + if (args->size) + return -EINVAL; + + return __context_set_persistence(ctx, args->value); +} + static int ctx_setparam(struct drm_i915_file_private *fpriv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1821,6 +1860,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_engines(ctx, args); break; + case I915_CONTEXT_PARAM_PERSISTENCE: + ret = set_persistence(ctx, args); + break; + case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; @@ -2273,6 +2316,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, ret = get_engines(ctx, args); break; + case I915_CONTEXT_PARAM_PERSISTENCE: + args->size = 0; + args->value = i915_gem_context_is_persistent(ctx); + break; + case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index cfe80590f0ed..18e50a769a6e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -76,6 +76,21 @@ static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *c clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); } +static inline bool i915_gem_context_is_persistent(const struct i915_gem_context *ctx) +{ + return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); +} + +static inline void i915_gem_context_set_persistence(struct i915_gem_context *ctx) +{ + set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); +} + +static inline void i915_gem_context_clear_persistence(struct i915_gem_context *ctx) +{ + clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); +} + static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx) { return test_bit(CONTEXT_BANNED, &ctx->flags); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index fe97b8ba4fda..861d7d92fe9f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -137,6 +137,7 @@ struct i915_gem_context { #define UCONTEXT_NO_ERROR_CAPTURE 1 #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 +#define UCONTEXT_PERSISTENCE 4 /** * @flags: small set of booleans diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 74ddd682c9cd..29b8984f0e47 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -22,6 +22,8 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; + i915_gem_context_set_persistence(ctx); + mutex_init(&ctx->engines_mutex); e = default_engines(ctx); if (IS_ERR(e)) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 63d40cba97e0..5400d7e057f1 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1572,6 +1572,21 @@ struct drm_i915_gem_context_param { * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) */ #define I915_CONTEXT_PARAM_ENGINES 0xa + +/* + * I915_CONTEXT_PARAM_PERSISTENCE: + * + * Allow the context and active rendering to survive the process until + * completion. Persistence allows fire-and-forget clients to queue up a + * bunch of work, hand the output over to a display server and then quit. + * If the context is marked as not persistent, upon closing (either via + * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure + * or process termination), the context and any outstanding requests will be + * cancelled (and exported fences for cancelled requests marked as -EIO). + * + * By default, new contexts allow persistence. + */ +#define I915_CONTEXT_PARAM_PERSISTENCE 0xb /* Must be kept compact -- no holes and well documented */ __u64 value; From b3545e086877557b8b933c0fbf64dca065a1f5f6 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 28 Oct 2019 20:50:49 -0700 Subject: [PATCH 130/154] drm/i915/tgl: add support to one DP-MST stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the minimum change to support 1 (and only 1) DP-MST monitor connected on Tiger Lake. This change was isolated from previous patch from José. In order to support more streams we will need to create a master-slave relation on the transcoders and that is not currently working yet. v2: remove unused macro and use REG_FIELD_PREP() (Ville) Signed-off-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20191029035049.5907-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 3 +++ drivers/gpu/drm/i915/i915_reg.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 281594bcbfae..fed7fc56dd92 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1905,6 +1905,9 @@ intel_ddi_transcoder_func_reg_val_get(const struct intel_crtc_state *crtc_state) } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { temp |= TRANS_DDI_MODE_SELECT_DP_MST; temp |= DDI_PORT_WIDTH(crtc_state->lane_count); + + if (INTEL_GEN(dev_priv) >= 12) + temp |= TRANS_DDI_MST_TRANSPORT_SELECT(crtc_state->cpu_transcoder); } else { temp |= TRANS_DDI_MODE_SELECT_DP_SST; temp |= DDI_PORT_WIDTH(crtc_state->lane_count); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5894e46ef68b..55de9052add7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9657,6 +9657,9 @@ enum skl_power_gate { #define TRANS_DDI_EDP_INPUT_A_ONOFF (4 << 12) #define TRANS_DDI_EDP_INPUT_B_ONOFF (5 << 12) #define TRANS_DDI_EDP_INPUT_C_ONOFF (6 << 12) +#define TRANS_DDI_MST_TRANSPORT_SELECT_MASK REG_GENMASK(12, 10) +#define TRANS_DDI_MST_TRANSPORT_SELECT(trans) \ + REG_FIELD_PREP(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, trans) #define TRANS_DDI_HDCP_SIGNALLING (1 << 9) #define TRANS_DDI_DP_VC_PAYLOAD_ALLOC (1 << 8) #define TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE (1 << 7) From e50dbdbfd9fb2c9b151b0474205f57f2c267f9b6 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 29 Oct 2019 18:38:40 +0200 Subject: [PATCH 131/154] drm/i915/tgl: Add SFC instdone to error state On debugging media workload hangs, sfc instdone might prove useful in future. Be prepared. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191029163841.5224-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/i915_gpu_error.c | 15 +++++++++++++++ drivers/gpu/drm/i915/i915_gpu_error.h | 1 + drivers/gpu/drm/i915/i915_reg.h | 3 +++ 3 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 7672d4c4552f..8298e7ca2f71 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -740,6 +740,14 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, if (IS_GEN(m->i915, 12)) err_printf(m, "AUX_ERR_DBG: 0x%08x\n", error->aux_err); + if (INTEL_GEN(m->i915) >= 12) { + int i; + + for (i = 0; i < GEN12_SFC_DONE_MAX; i++) + err_printf(m, " SFC_DONE[%d]: 0x%08x\n", i, + error->sfc_done[i]); + } + for (ee = error->engine; ee; ee = ee->next) error_print_engine(m, ee, error->capture); @@ -1599,6 +1607,13 @@ static void capture_reg_state(struct i915_gpu_state *error) if (IS_GEN(i915, 12)) error->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG); + if (INTEL_GEN(i915) >= 12) { + for (i = 0; i < GEN12_SFC_DONE_MAX; i++) { + error->sfc_done[i] = + intel_uncore_read(uncore, GEN12_SFC_DONE(i)); + } + } + /* 4: Everything else */ if (INTEL_GEN(i915) >= 11) { error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index c7f36be2a38e..0fede700e920 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -75,6 +75,7 @@ struct i915_gpu_state { u32 gfx_mode; u32 gtt_cache; u32 aux_err; /* gen12 */ + u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */ u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 55de9052add7..046b71bb36fc 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -413,6 +413,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_VECS_SFC_USAGE(engine) _MMIO((engine)->mmio_base + 0x2014) #define GEN11_VECS_SFC_USAGE_BIT (1 << 0) +#define GEN12_SFC_DONE(n) _MMIO(0x1cc00 + (n) * 0x100) +#define GEN12_SFC_DONE_MAX 4 + #define RING_PP_DIR_BASE(base) _MMIO((base) + 0x228) #define RING_PP_DIR_BASE_READ(base) _MMIO((base) + 0x518) #define RING_PP_DIR_DCLV(base) _MMIO((base) + 0x220) From 811bb3db25ace1aa6cfa35cfb347c542a32f5a13 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 29 Oct 2019 18:38:41 +0200 Subject: [PATCH 132/154] drm/i915/tgl: Add gam instdone This has been asked from us already. Prepare for the next time. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191029163841.5224-2-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/i915_gpu_error.c | 4 ++++ drivers/gpu/drm/i915/i915_gpu_error.h | 1 + drivers/gpu/drm/i915/i915_reg.h | 1 + 3 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 8298e7ca2f71..e8b67f5e521d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -746,6 +746,8 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, for (i = 0; i < GEN12_SFC_DONE_MAX; i++) err_printf(m, " SFC_DONE[%d]: 0x%08x\n", i, error->sfc_done[i]); + + err_printf(m, " GAM_DONE: 0x%08x\n", error->gam_done); } for (ee = error->engine; ee; ee = ee->next) @@ -1612,6 +1614,8 @@ static void capture_reg_state(struct i915_gpu_state *error) error->sfc_done[i] = intel_uncore_read(uncore, GEN12_SFC_DONE(i)); } + + error->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE); } /* 4: Everything else */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 0fede700e920..5d2c3372ff99 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -76,6 +76,7 @@ struct i915_gpu_state { u32 gtt_cache; u32 aux_err; /* gen12 */ u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */ + u32 gam_done; /* gen12 */ u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 046b71bb36fc..53c280c4e741 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2561,6 +2561,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3) #define RING_FAULT_VALID (1 << 0) #define DONE_REG _MMIO(0x40b0) +#define GEN12_GAM_DONE _MMIO(0xcf68) #define GEN8_PRIVATE_PAT_LO _MMIO(0x40e0) #define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4) #define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4) From a06375a9ac98a162ce2f7ef074fc60dee851bb18 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 30 Oct 2019 10:38:23 +0000 Subject: [PATCH 133/154] drm/i915/gt: Always track callers to intel_rps_mark_interactive() During startup, we may find ourselves in an interesting position where we haven't fully enabled RPS before the display starts trying to use it. This may lead to an imbalance in our "interactive" counter: <3>[ 4.813326] intel_rps_mark_interactive:652 GEM_BUG_ON(!rps->power.interactive) <4>[ 4.813396] ------------[ cut here ]------------ <2>[ 4.813398] kernel BUG at drivers/gpu/drm/i915/gt/intel_rps.c:652! <4>[ 4.813430] invalid opcode: 0000 [#1] PREEMPT SMP PTI <4>[ 4.813438] CPU: 1 PID: 18 Comm: kworker/1:0H Not tainted 5.4.0-rc5-CI-CI_DRM_7209+ #1 <4>[ 4.813447] Hardware name: /NUC7i5BNB, BIOS BNKBL357.86A.0054.2017.1025.1822 10/25/2017 <4>[ 4.813525] Workqueue: events_highpri intel_atomic_cleanup_work [i915] <4>[ 4.813589] RIP: 0010:intel_rps_mark_interactive+0xb3/0xc0 [i915] <4>[ 4.813597] Code: bc 3f de e0 48 8b 35 84 2e 24 00 49 c7 c0 f3 d4 4e a0 b9 8c 02 00 00 48 c7 c2 80 9c 48 a0 48 c7 c7 3e 73 34 a0 e8 8d 3b e5 e0 <0f> 0b 90 66 2e 0f 1f 84 00 00 00 00 00 80 bf c0 00 00 00 00 74 32 <4>[ 4.813616] RSP: 0018:ffffc900000efe00 EFLAGS: 00010286 <4>[ 4.813623] RAX: 000000000000000e RBX: ffff8882583cc7f0 RCX: 0000000000000000 <4>[ 4.813631] RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffff888275969c00 <4>[ 4.813639] RBP: 0000000000000000 R08: 0000000000000008 R09: ffff888275ace000 <4>[ 4.813646] R10: ffffc900000efe00 R11: ffff888275969c00 R12: ffff8882583cc8d8 <4>[ 4.813654] R13: ffff888276abce00 R14: 0000000000000000 R15: ffff88825e878860 <4>[ 4.813662] FS: 0000000000000000(0000) GS:ffff888276a80000(0000) knlGS:0000000000000000 <4>[ 4.813672] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 4.813678] CR2: 00007f051d5ca0a8 CR3: 0000000262f48001 CR4: 00000000003606e0 <4>[ 4.813686] Call Trace: <4>[ 4.813755] intel_cleanup_plane_fb+0x4e/0x60 [i915] <4>[ 4.813764] drm_atomic_helper_cleanup_planes+0x4d/0x70 <4>[ 4.813833] intel_atomic_cleanup_work+0x15/0x80 [i915] <4>[ 4.813842] process_one_work+0x26a/0x620 <4>[ 4.813850] worker_thread+0x37/0x380 <4>[ 4.813857] ? process_one_work+0x620/0x620 <4>[ 4.813864] kthread+0x119/0x130 <4>[ 4.813870] ? kthread_park+0x80/0x80 <4>[ 4.813878] ret_from_fork+0x3a/0x50 <4>[ 4.813887] Modules linked in: i915(+) mei_hdcp x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul btusb btrtl btbcm btintel snd_hda_intel snd_intel_nhlt snd_hda_codec bluetooth snd_hwdep snd_hda_core ghash_clmulni_intel snd_pcm e1000e ecdh_generic ecc ptp pps_core mei_me mei prime_numbers <4>[ 4.813934] ---[ end trace c13289af88174ffc ]--- The solution employed is to not worry about RPS state and keep the tally of the interactive counter separate. When we do enable RPS, we will then take the display activity into account. Fixes: 3e7abf814193 ("drm/i915: Extract GT render power state management") Signed-off-by: Chris Wilson Cc: Andi Shyti Acked-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20191030103827.2413-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 ++ drivers/gpu/drm/i915/gt/intel_rps.c | 12 ++++++------ drivers/gpu/drm/i915/gt/intel_rps.h | 1 + 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f2440113fdf8..898662c158ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -26,6 +26,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_gt_init_reset(gt); intel_gt_init_requests(gt); intel_gt_pm_init_early(gt); + + intel_rps_init_early(>->rps); intel_uc_init_early(>->uc); } diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index f6de19456c54..20d6ee148afc 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -641,9 +641,6 @@ static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) { - if (!rps->enabled) - return; - mutex_lock(&rps->power.mutex); if (interactive) { if (!rps->power.interactive++ && rps->active) @@ -1609,16 +1606,19 @@ void gen5_rps_irq_handler(struct intel_rps *rps) spin_unlock(&mchdev_lock); } -void intel_rps_init(struct intel_rps *rps) +void intel_rps_init_early(struct intel_rps *rps) { - struct drm_i915_private *i915 = rps_to_i915(rps); - mutex_init(&rps->lock); mutex_init(&rps->power.mutex); INIT_WORK(&rps->work, rps_work); atomic_set(&rps->num_waiters, 0); +} + +void intel_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); if (IS_CHERRYVIEW(i915)) chv_rps_init(rps); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 997a4b4e0207..9518c66c9792 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -11,6 +11,7 @@ struct i915_request; +void intel_rps_init_early(struct intel_rps *rps); void intel_rps_init(struct intel_rps *rps); void intel_rps_driver_register(struct intel_rps *rps); From a8ddac7c9f06a12227a4f5febd1cbe0575a33179 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 28 Oct 2019 20:15:17 +0200 Subject: [PATCH 134/154] drm/i915: Avoid HPD poll detect triggering a new detect cycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the HPD interrupt functionality the HW depends on power wells in the display core domain to be on. Accordingly when enabling these power wells the HPD polling logic will force an HPD detection cycle to account for hotplug events that may have happened when such a power well was off. Thus a detect cycle started by polling could start a new detect cycle if a power well in the display core domain gets enabled during detect and stays enabled after detect completes. That in turn can lead to a detection cycle runaway. To prevent re-triggering a poll-detect cycle make sure we drop all power references we acquired during detect synchronously by the end of detect. This will let the poll-detect logic continue with polling (matching the off state of the corresponding power wells) instead of scheduling a new detection cycle. Fixes: 6cfe7ec02e85 ("drm/i915: Remove the unneeded AUX power ref from intel_dp_detect()") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112125 Reported-and-tested-by: Val Kulkov Reported-and-tested-by: wangqr Cc: Val Kulkov Cc: wangqr Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191028181517.22602-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_crt.c | 7 +++++++ drivers/gpu/drm/i915/display/intel_dp.c | 6 ++++++ drivers/gpu/drm/i915/display/intel_hdmi.c | 6 ++++++ 3 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index ff6126ea793c..834bf1d43bb8 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -864,6 +864,13 @@ load_detect: out: intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref); + + /* + * Make sure the refs for power wells enabled during detect are + * dropped to avoid a new detect cycle triggered by HPD polling. + */ + intel_display_power_flush_work(dev_priv); + return status; } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 325b70bb140d..077cc25b7051 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5693,6 +5693,12 @@ out: if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); + /* + * Make sure the refs for power wells enabled during detect are + * dropped to avoid a new detect cycle triggered by HPD polling. + */ + intel_display_power_flush_work(dev_priv); + return status; } diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index b54ccbb5aad5..ff71a4da3d00 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -2626,6 +2626,12 @@ out: if (status != connector_status_connected) cec_notifier_phys_addr_invalidate(intel_hdmi->cec_notifier); + /* + * Make sure the refs for power wells enabled during detect are + * dropped to avoid a new detect cycle triggered by HPD polling. + */ + intel_display_power_flush_work(dev_priv); + return status; } From 25f899544fb47490896ec92024d3c9dedc226528 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 29 Oct 2019 15:23:23 +0200 Subject: [PATCH 135/154] drm/i915: Nuke 'mode' argument to intel_get_load_detect_pipe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We always pass mode==NULL to intel_get_load_detect_pipe(). Remove the pointless function argument. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191029132323.18113-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_crt.c | 2 +- drivers/gpu/drm/i915/display/intel_display.c | 7 ++----- drivers/gpu/drm/i915/display/intel_display.h | 1 - drivers/gpu/drm/i915/display/intel_tv.c | 2 +- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 834bf1d43bb8..bf576b1e56ae 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -844,7 +844,7 @@ load_detect: } /* for pre-945g platforms use load detect */ - ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx); + ret = intel_get_load_detect_pipe(connector, &tmp, ctx); if (ret > 0) { if (intel_crt_detect_ddc(connector)) status = connector_status_connected; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index e56a75c07043..1862546356fd 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11328,7 +11328,6 @@ static int intel_modeset_disable_planes(struct drm_atomic_state *state, } int intel_get_load_detect_pipe(struct drm_connector *connector, - const struct drm_display_mode *mode, struct intel_load_detect_pipe *old, struct drm_modeset_acquire_ctx *ctx) { @@ -11435,10 +11434,8 @@ found: crtc_state->base.active = crtc_state->base.enable = true; - if (!mode) - mode = &load_detect_mode; - - ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); + ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, + &load_detect_mode); if (ret) goto fail; diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index ca7ca2804d8b..355c50088589 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -509,7 +509,6 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv, struct intel_digital_port *dport, unsigned int expected_mask); int intel_get_load_detect_pipe(struct drm_connector *connector, - const struct drm_display_mode *mode, struct intel_load_detect_pipe *old, struct drm_modeset_acquire_ctx *ctx); void intel_release_load_detect_pipe(struct drm_connector *connector, diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 70726b481244..5556d8300a88 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1701,7 +1701,7 @@ intel_tv_detect(struct drm_connector *connector, struct intel_load_detect_pipe tmp; int ret; - ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx); + ret = intel_get_load_detect_pipe(connector, &tmp, ctx); if (ret < 0) return ret; From 4e380d080be46079cc4a9a578e757ea902d81d08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 29 Oct 2019 16:55:26 +0200 Subject: [PATCH 136/154] drm/i915: Stop frobbing crtc->base.mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The core no longer uses drm_crtc_state::mode with atomic drivers, so let's stop frobbing it in the driver. For the user mode readout we'll just use an on stack mode. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191029145526.10308-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/display/intel_display.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 1862546356fd..a23375621185 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14124,9 +14124,6 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - /* drm_atomic_helper_update_legacy_modeset_state might not be called. */ - crtc->base.mode = new_crtc_state->base.mode; - /* * Update pipe size and adjust fitter if needed: the reason for this is * that in compute_mode_changes we check the native mode (not the pfit @@ -17391,13 +17388,16 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) struct intel_plane *plane; int min_cdclk = 0; - memset(&crtc->base.mode, 0, sizeof(crtc->base.mode)); if (crtc_state->base.active) { - intel_mode_from_pipe_config(&crtc->base.mode, crtc_state); - crtc->base.mode.hdisplay = crtc_state->pipe_src_w; - crtc->base.mode.vdisplay = crtc_state->pipe_src_h; - intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state); - WARN_ON(drm_atomic_set_mode_for_crtc(&crtc_state->base, &crtc->base.mode)); + struct drm_display_mode mode; + + intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, + crtc_state); + + mode = crtc_state->base.adjusted_mode; + mode.hdisplay = crtc_state->pipe_src_w; + mode.vdisplay = crtc_state->pipe_src_h; + WARN_ON(drm_atomic_set_mode_for_crtc(&crtc_state->base, &mode)); /* * The initial mode needs to be set in order to keep From 2d9c19044122b22624839fb5880885cbeb1832b7 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 25 Oct 2019 17:35:06 -0700 Subject: [PATCH 137/154] drm/i915/uc: define GuC and HuC binaries for TGL GuC 35.2.0 and HuC 7.0.3 are the first production releases for TGL. GuC 35.2 for Gen12 is interface-compatible with 33.0 on older Gens, because the differences are related to additional blocks/commands in the interface to support new Gen12 features. These parts of the interface will be added when the relevant features are enabled. v2: fix typos (Michal) Signed-off-by: Daniele Ceraolo Spurio Cc: Michal Wajdeczko Cc: Anusha Srivatsa Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20191026003507.21769-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 26beccff0908..66a30ab7044a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -37,8 +37,13 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, /* * List of required GuC and HuC binaries per-platform. * Must be ordered based on platform + revid, from newer to older. + * + * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas + * between 33.0 and 35.2 are only related to new additions to support new Gen12 + * features. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 3)) \ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ From 2b0b27418a72f9d61562269a1b3072657ae937d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 2 Oct 2019 19:25:01 +0300 Subject: [PATCH 138/154] drm/i915: Simplify LVDS crtc_mask setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to special case PCH vs. gen4 when setting up the LVDS crtc_mask. Just claim pipes A|B|C work and intel_encoder_possible_crtcs() will drop out any crtc that doesn't exist. v2: Put the special case first to match what most other encoders do Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191002162505.30716-2-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila --- drivers/gpu/drm/i915/display/intel_lvds.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 13841d7c455b..cf1cd2295b9b 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -899,12 +899,10 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; intel_encoder->cloneable = 0; - if (HAS_PCH_SPLIT(dev_priv)) - intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); - else if (IS_GEN(dev_priv, 4)) - intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); - else + if (INTEL_GEN(dev_priv) < 4) intel_encoder->crtc_mask = BIT(PIPE_B); + else + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); drm_connector_helper_add(connector, &intel_lvds_connector_helper_funcs); connector->display_info.subpixel_order = SubPixelHorizontalRGB; From 981329ce3c3e13af6b9a11ec4948df45f5f4847e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 2 Oct 2019 19:25:02 +0300 Subject: [PATCH 139/154] drm/i915: s/crtc_mask/pipe_mask/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the encoder->crtc_mask to encoder->pipe_mask to better reflect what it actually contains. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191002162505.30716-3-ville.syrjala@linux.intel.com Reviewed-by: Lucas De Marchi --- drivers/gpu/drm/i915/display/icl_dsi.c | 2 +- drivers/gpu/drm/i915/display/intel_crt.c | 4 ++-- drivers/gpu/drm/i915/display/intel_ddi.c | 2 +- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/i915/display/intel_display_types.h | 4 ++-- drivers/gpu/drm/i915/display/intel_dp.c | 6 +++--- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- drivers/gpu/drm/i915/display/intel_dvo.c | 2 +- drivers/gpu/drm/i915/display/intel_hdmi.c | 6 +++--- drivers/gpu/drm/i915/display/intel_lvds.c | 4 ++-- drivers/gpu/drm/i915/display/intel_sdvo.c | 2 +- drivers/gpu/drm/i915/display/intel_tv.c | 2 +- drivers/gpu/drm/i915/display/vlv_dsi.c | 6 +++--- 13 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 6e398c33a524..4cea8ed2bd31 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1584,7 +1584,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) encoder->get_hw_state = gen11_dsi_get_hw_state; encoder->type = INTEL_OUTPUT_DSI; encoder->cloneable = 0; - encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); encoder->power_domain = POWER_DOMAIN_PORT_DSI; encoder->get_power_domains = gen11_dsi_get_power_domains; diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index bf576b1e56ae..b704d2f9fae0 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -1001,9 +1001,9 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.type = INTEL_OUTPUT_ANALOG; crt->base.cloneable = (1 << INTEL_OUTPUT_DVO) | (1 << INTEL_OUTPUT_HDMI); if (IS_I830(dev_priv)) - crt->base.crtc_mask = BIT(PIPE_A); + crt->base.pipe_mask = BIT(PIPE_A); else - crt->base.crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + crt->base.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); if (IS_GEN(dev_priv, 2)) connector->interlace_allowed = 0; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index fed7fc56dd92..e4f9868a4add 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4741,7 +4741,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->port = port; intel_encoder->cloneable = 0; for_each_pipe(dev_priv, pipe) - intel_encoder->crtc_mask |= BIT(pipe); + intel_encoder->pipe_mask |= BIT(pipe); if (INTEL_GEN(dev_priv) >= 11) intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a23375621185..348ce0456696 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15842,7 +15842,7 @@ static u32 intel_encoder_possible_crtcs(struct intel_encoder *encoder) u32 possible_crtcs = 0; for_each_intel_crtc(dev, crtc) { - if (encoder->crtc_mask & BIT(crtc->pipe)) + if (encoder->pipe_mask & BIT(crtc->pipe)) possible_crtcs |= drm_crtc_mask(&crtc->base); } diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 40184e823c84..4341bd66a418 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -128,7 +128,8 @@ struct intel_encoder { enum intel_output_type type; enum port port; - unsigned int cloneable; + u16 cloneable; + u8 pipe_mask; enum intel_hotplug_state (*hotplug)(struct intel_encoder *encoder, struct intel_connector *connector, bool irq_received); @@ -187,7 +188,6 @@ struct intel_encoder { * device interrupts are disabled. */ void (*suspend)(struct intel_encoder *); - int crtc_mask; enum hpd_pin hpd_pin; enum intel_display_power_domain power_domain; /* for communication with audio component; protected by av_mutex */ diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 077cc25b7051..ec3b37f23894 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -7571,11 +7571,11 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_encoder->power_domain = intel_port_to_power_domain(port); if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) - intel_encoder->crtc_mask = BIT(PIPE_C); + intel_encoder->pipe_mask = BIT(PIPE_C); else - intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); + intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B); } else { - intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); } intel_encoder->cloneable = 0; intel_encoder->port = port; diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index bbcab27644dc..17f3693fae7e 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -618,7 +618,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum intel_encoder->port = intel_dig_port->base.port; intel_encoder->cloneable = 0; for_each_pipe(dev_priv, pipe_iter) - intel_encoder->crtc_mask |= BIT(pipe_iter); + intel_encoder->pipe_mask |= BIT(pipe_iter); intel_encoder->compute_config = intel_dp_mst_compute_config; intel_encoder->disable = intel_mst_disable_dp; diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index 9827f99491d1..a6c165d857e0 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -505,7 +505,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) intel_encoder->type = INTEL_OUTPUT_DVO; intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; - intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); + intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B); switch (dvo->type) { case INTEL_DVO_CHIP_TMDS: diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index ff71a4da3d00..7605337105b9 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -3283,11 +3283,11 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, intel_encoder->port = port; if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) - intel_encoder->crtc_mask = BIT(PIPE_C); + intel_encoder->pipe_mask = BIT(PIPE_C); else - intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); + intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B); } else { - intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); } intel_encoder->cloneable = 1 << INTEL_OUTPUT_ANALOG; /* diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index cf1cd2295b9b..e358a6da5aaf 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -900,9 +900,9 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) intel_encoder->port = PORT_NONE; intel_encoder->cloneable = 0; if (INTEL_GEN(dev_priv) < 4) - intel_encoder->crtc_mask = BIT(PIPE_B); + intel_encoder->pipe_mask = BIT(PIPE_B); else - intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); drm_connector_helper_add(connector, &intel_lvds_connector_helper_funcs); connector->display_info.subpixel_order = SubPixelHorizontalRGB; diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 47f5d87a938a..fed738effbc3 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2921,7 +2921,7 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) bytes[0], bytes[1]); return false; } - intel_sdvo->base.crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + intel_sdvo->base.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); return true; } diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 5556d8300a88..a329cfcb501d 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1947,7 +1947,7 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_encoder->type = INTEL_OUTPUT_TVOUT; intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; - intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); + intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B); intel_encoder->cloneable = 0; intel_tv->type = DRM_MODE_CONNECTOR_Unknown; diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 50064cde0724..11a7c77d64bc 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1870,11 +1870,11 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) * port C. BXT isn't limited like this. */ if (IS_GEN9_LP(dev_priv)) - intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); else if (port == PORT_A) - intel_encoder->crtc_mask = BIT(PIPE_A); + intel_encoder->pipe_mask = BIT(PIPE_A); else - intel_encoder->crtc_mask = BIT(PIPE_B); + intel_encoder->pipe_mask = BIT(PIPE_B); if (dev_priv->vbt.dsi.config->dual_link) intel_dsi->ports = BIT(PORT_A) | BIT(PORT_C); From 4d19505ed28e68973ee68771e63cf4e4c03c5c05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 2 Oct 2019 19:25:03 +0300 Subject: [PATCH 140/154] drm/i915: Allow ICL+ DSI on any pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no longer any pipe<->DSI port limitations on icl+. Populate the pipe_mask accordingly. Cc: José Roberto de Souza Cc: Lucas De Marchi Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191002162505.30716-4-ville.syrjala@linux.intel.com Reviewed-by: Lucas De Marchi --- drivers/gpu/drm/i915/display/icl_dsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 4cea8ed2bd31..99a75c611387 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1552,6 +1552,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) struct drm_connector *connector; struct drm_display_mode *fixed_mode; enum port port; + enum pipe pipe; if (!intel_bios_is_dsi_present(dev_priv, &port)) return; @@ -1584,7 +1585,8 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) encoder->get_hw_state = gen11_dsi_get_hw_state; encoder->type = INTEL_OUTPUT_DSI; encoder->cloneable = 0; - encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + for_each_pipe(dev_priv, pipe) + encoder->pipe_mask |= BIT(pipe); encoder->power_domain = POWER_DOMAIN_PORT_DSI; encoder->get_power_domains = gen11_dsi_get_power_domains; From 34053ee18974102152128df22f5596fb74229a90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 2 Oct 2019 19:25:04 +0300 Subject: [PATCH 141/154] drm/i915: Simplify pipe_mask setup even further MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just set pipe_mask=~0 for the non-special cases where any pipe will do. intel_encoder_possible_crtcs() will anyway drop out anything that doesn't exist. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191002162505.30716-5-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila --- drivers/gpu/drm/i915/display/icl_dsi.c | 4 +--- drivers/gpu/drm/i915/display/intel_crt.c | 2 +- drivers/gpu/drm/i915/display/intel_ddi.c | 4 +--- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- drivers/gpu/drm/i915/display/intel_dp_mst.c | 5 +---- drivers/gpu/drm/i915/display/intel_dvo.c | 2 +- drivers/gpu/drm/i915/display/intel_hdmi.c | 2 +- drivers/gpu/drm/i915/display/intel_lvds.c | 2 +- drivers/gpu/drm/i915/display/intel_sdvo.c | 2 +- drivers/gpu/drm/i915/display/intel_tv.c | 2 +- drivers/gpu/drm/i915/display/vlv_dsi.c | 2 +- 11 files changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 99a75c611387..325df29b0447 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1552,7 +1552,6 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) struct drm_connector *connector; struct drm_display_mode *fixed_mode; enum port port; - enum pipe pipe; if (!intel_bios_is_dsi_present(dev_priv, &port)) return; @@ -1585,8 +1584,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) encoder->get_hw_state = gen11_dsi_get_hw_state; encoder->type = INTEL_OUTPUT_DSI; encoder->cloneable = 0; - for_each_pipe(dev_priv, pipe) - encoder->pipe_mask |= BIT(pipe); + encoder->pipe_mask = ~0; encoder->power_domain = POWER_DOMAIN_PORT_DSI; encoder->get_power_domains = gen11_dsi_get_power_domains; diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index b704d2f9fae0..39cc6d79dc85 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -1003,7 +1003,7 @@ void intel_crt_init(struct drm_i915_private *dev_priv) if (IS_I830(dev_priv)) crt->base.pipe_mask = BIT(PIPE_A); else - crt->base.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + crt->base.pipe_mask = ~0; if (IS_GEN(dev_priv, 2)) connector->interlace_allowed = 0; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index e4f9868a4add..b51f244ad7a5 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4688,7 +4688,6 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) struct intel_encoder *intel_encoder; struct drm_encoder *encoder; bool init_hdmi, init_dp, init_lspcon = false; - enum pipe pipe; enum phy phy = intel_port_to_phy(dev_priv, port); init_hdmi = port_info->supports_dvi || port_info->supports_hdmi; @@ -4740,8 +4739,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->power_domain = intel_port_to_power_domain(port); intel_encoder->port = port; intel_encoder->cloneable = 0; - for_each_pipe(dev_priv, pipe) - intel_encoder->pipe_mask |= BIT(pipe); + intel_encoder->pipe_mask = ~0; if (INTEL_GEN(dev_priv) >= 11) intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index ec3b37f23894..c80621ad138b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -7575,7 +7575,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, else intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B); } else { - intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + intel_encoder->pipe_mask = ~0; } intel_encoder->cloneable = 0; intel_encoder->port = port; diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 17f3693fae7e..7370c9dfa8a1 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -598,8 +598,6 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum struct intel_dp_mst_encoder *intel_mst; struct intel_encoder *intel_encoder; struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum pipe pipe_iter; intel_mst = kzalloc(sizeof(*intel_mst), GFP_KERNEL); @@ -617,8 +615,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum intel_encoder->power_domain = intel_dig_port->base.power_domain; intel_encoder->port = intel_dig_port->base.port; intel_encoder->cloneable = 0; - for_each_pipe(dev_priv, pipe_iter) - intel_encoder->pipe_mask |= BIT(pipe_iter); + intel_encoder->pipe_mask = ~0; intel_encoder->compute_config = intel_dp_mst_compute_config; intel_encoder->disable = intel_mst_disable_dp; diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index a6c165d857e0..bcfbcb743e7d 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -505,7 +505,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) intel_encoder->type = INTEL_OUTPUT_DVO; intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; - intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B); + intel_encoder->pipe_mask = ~0; switch (dvo->type) { case INTEL_DVO_CHIP_TMDS: diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 7605337105b9..f6f5312205c4 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -3287,7 +3287,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, else intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B); } else { - intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + intel_encoder->pipe_mask = ~0; } intel_encoder->cloneable = 1 << INTEL_OUTPUT_ANALOG; /* diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index e358a6da5aaf..b1bc78623647 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -902,7 +902,7 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) < 4) intel_encoder->pipe_mask = BIT(PIPE_B); else - intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + intel_encoder->pipe_mask = ~0; drm_connector_helper_add(connector, &intel_lvds_connector_helper_funcs); connector->display_info.subpixel_order = SubPixelHorizontalRGB; diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index fed738effbc3..5b7f4baf7348 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2921,7 +2921,7 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) bytes[0], bytes[1]); return false; } - intel_sdvo->base.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + intel_sdvo->base.pipe_mask = ~0; return true; } diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index a329cfcb501d..9983fadf6c28 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1947,7 +1947,7 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_encoder->type = INTEL_OUTPUT_TVOUT; intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; - intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B); + intel_encoder->pipe_mask = ~0; intel_encoder->cloneable = 0; intel_tv->type = DRM_MODE_CONNECTOR_Unknown; diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 11a7c77d64bc..0ca49b1604c6 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1870,7 +1870,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) * port C. BXT isn't limited like this. */ if (IS_GEN9_LP(dev_priv)) - intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + intel_encoder->pipe_mask = ~0; else if (port == PORT_A) intel_encoder->pipe_mask = BIT(PIPE_A); else From 29b27657dbae8d4c4bdc6bfbf6dd8c69f648e075 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 2 Oct 2019 19:25:05 +0300 Subject: [PATCH 142/154] drm/i915/mst: Document the userspace fail with possible_crtcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid accidentally breaking things in the future add a comment explaining why we misconfigure the pipe_mask. Also toss in a TODO for investigating a single encoder approach as opposed to the encoder-per-pipe approach. v2: Drop a bogus TODO comment Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191002162505.30716-6-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 7370c9dfa8a1..715b7109c388 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -615,6 +615,14 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum intel_encoder->power_domain = intel_dig_port->base.power_domain; intel_encoder->port = intel_dig_port->base.port; intel_encoder->cloneable = 0; + /* + * This is wrong, but broken userspace uses the intersection + * of possible_crtcs of all the encoders of a given connector + * to figure out which crtcs can drive said connector. What + * should be used instead is the union of possible_crtcs. + * To keep such userspace functioning we must misconfigure + * this to make sure the intersection is not empty :( + */ intel_encoder->pipe_mask = ~0; intel_encoder->compute_config = intel_dp_mst_compute_config; From 1db257c55f0c9f54a429eb603ffa30bd8b0e06e6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 31 Oct 2019 09:42:59 +0000 Subject: [PATCH 143/154] drm/i915/selftests: Assert that the idle_pulse is sent When checking the heartbeat pulse, we expect it to have been sent by the time we have slept. We can verify this by checking the engine serial number to see if that matches the predicted pulse serial. It will always be true if, and only if, the pulse was sent by itself (as designed by the test). Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191031094259.23028-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 155c508024df..8453bf4a8b8e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -97,6 +97,8 @@ static int __live_idle_pulse(struct intel_engine_cs *engine, goto out; } + GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial); + pulse_unlock_wait(p); /* synchronize with the retirement callback */ if (!i915_active_is_idle(&p->active)) { @@ -337,7 +339,7 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915) saved_hangcheck = i915_modparams.enable_hangcheck; i915_modparams.enable_hangcheck = INT_MAX; - err = intel_gt_live_subtests(tests, &i915->gt); + err = intel_gt_live_subtests(tests, &i915->gt); i915_modparams.enable_hangcheck = saved_hangcheck; return err; From 164a4128869ffcef33dfed82b641471b14e48b5d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 31 Oct 2019 10:11:16 +0000 Subject: [PATCH 144/154] drm/i915/selftests: Pretty print the i915_active If the idle_pulse fails to flush the i915_active, dump the tree to see if that has any clues. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191031101116.19894-1-chris@chris-wilson.co.uk --- .../drm/i915/gt/selftest_engine_heartbeat.c | 4 ++ drivers/gpu/drm/i915/i915_active.h | 2 + drivers/gpu/drm/i915/selftests/i915_active.c | 45 +++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 8453bf4a8b8e..e864406bd2d9 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -102,8 +102,12 @@ static int __live_idle_pulse(struct intel_engine_cs *engine, pulse_unlock_wait(p); /* synchronize with the retirement callback */ if (!i915_active_is_idle(&p->active)) { + struct drm_printer m = drm_err_printer("pulse"); + pr_err("%s: heartbeat pulse did not flush idle tasks\n", engine->name); + i915_active_print(&p->active, &m); + err = -EINVAL; goto out; } diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 4f52fe6146d2..44859356ce97 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -214,4 +214,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, void i915_active_acquire_barrier(struct i915_active *ref); void i915_request_add_active_barriers(struct i915_request *rq); +void i915_active_print(struct i915_active *ref, struct drm_printer *m); + #endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 96513a7d4739..260b0ee5d1e3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -205,3 +205,48 @@ int i915_active_live_selftests(struct drm_i915_private *i915) return i915_subtests(tests, i915); } + +static struct intel_engine_cs *node_to_barrier(struct active_node *it) +{ + struct intel_engine_cs *engine; + + if (!is_barrier(&it->base)) + return NULL; + + engine = __barrier_to_engine(it); + smp_rmb(); /* serialise with add_active_barriers */ + if (!is_barrier(&it->base)) + return NULL; + + return engine; +} + +void i915_active_print(struct i915_active *ref, struct drm_printer *m) +{ + drm_printf(m, "active %pS:%pS\n", ref->active, ref->retire); + drm_printf(m, "\tcount: %d\n", atomic_read(&ref->count)); + drm_printf(m, "\tpreallocated barriers? %s\n", + yesno(!llist_empty(&ref->preallocated_barriers))); + + if (i915_active_acquire_if_busy(ref)) { + struct active_node *it, *n; + + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { + struct intel_engine_cs *engine; + + engine = node_to_barrier(it); + if (engine) { + drm_printf(m, "\tbarrier: %s\n", engine->name); + continue; + } + + if (i915_active_fence_isset(&it->base)) { + drm_printf(m, + "\ttimeline: %llx\n", it->timeline); + continue; + } + } + + i915_active_release(ref); + } +} From dde01d943559f6b853d97a2744433d9ad1b12ace Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 30 Oct 2019 19:21:49 +0000 Subject: [PATCH 145/154] drm/i915: Split detaching and removing the vma In order to keep the assert_bind_count() valid, we need to hold the vma page reference until after we drop the bind count. However, we must also keep the drm_mm_remove_node() as the last action of i915_vma_unbind() so that it serialises with the unlocked check inside i915_vma_destroy(). So we need to split up i915_vma_remove() so that we order the detach, drop pages and remove as required during unbind. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112067 Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191030192159.18404-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 37 +++++++++++++++------------------ 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index d733bcf262f0..e5512f26e20a 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -700,41 +700,35 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color)); - list_add_tail(&vma->vm_link, &vma->vm->bound_list); - if (vma->obj) { - atomic_inc(&vma->obj->bind_count); - assert_bind_count(vma->obj); + struct drm_i915_gem_object *obj = vma->obj; + + atomic_inc(&obj->bind_count); + assert_bind_count(obj); } + list_add_tail(&vma->vm_link, &vma->vm->bound_list); return 0; } static void -i915_vma_remove(struct i915_vma *vma) +i915_vma_detach(struct i915_vma *vma) { GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); - list_del(&vma->vm_link); - /* - * Since the unbound list is global, only move to that list if - * no more VMAs exist. + * And finally now the object is completely decoupled from this + * vma, we can drop its hold on the backing storage and allow + * it to be reaped by the shrinker. */ + list_del(&vma->vm_link); if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; - /* - * And finally now the object is completely decoupled from this - * vma, we can drop its hold on the backing storage and allow - * it to be reaped by the shrinker. - */ - atomic_dec(&obj->bind_count); assert_bind_count(obj); + atomic_dec(&obj->bind_count); } - - drm_mm_remove_node(&vma->node); } static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) @@ -929,8 +923,10 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); err_remove: - if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) - i915_vma_remove(vma); + if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) { + i915_vma_detach(vma); + drm_mm_remove_node(&vma->node); + } err_active: i915_active_release(&vma->active); err_unlock: @@ -1187,9 +1183,10 @@ int __i915_vma_unbind(struct i915_vma *vma) } atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR), &vma->flags); + i915_vma_detach(vma); vma_unbind_pages(vma); - i915_vma_remove(vma); + drm_mm_remove_node(&vma->node); /* pairs with i915_vma_destroy() */ return 0; } From 9f37940756b1b7f56bcfb53de17b78f55fb18e48 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 30 Oct 2019 18:30:39 -0700 Subject: [PATCH 146/154] drm/i915: drop lrc header page Recent GuC binaries (including all the ones we're currently using) don't require this shared area anymore, having moved the relevant entries into the stage pool instead. i915 itself doesn't write anything into it either, so we can safely drop it. Signed-off-by: Daniele Ceraolo Spurio Cc: Chris Wilson Cc: Michal Wajdeczko Cc: John Harrison Cc: Matthew Brost Acked-by: Chris Wilson Reviewed-by: Matthew Brost Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191031013040.25803-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 22 +++------------------ drivers/gpu/drm/i915/gt/intel_lrc.h | 23 ++-------------------- drivers/gpu/drm/i915/gt/selftest_context.c | 3 --- drivers/gpu/drm/i915/gvt/scheduler.c | 4 ++-- 4 files changed, 7 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 6fb3def5ba16..51aef2a233cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -471,8 +471,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_GEN(engine->i915, 8)) desc |= GEN8_CTX_L3LLC_COHERENT; - desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; - /* bits 12-31 */ + desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */ /* * The following 32bits are copied into the OA reports (dword 2). * Consider updating oa_get_render_ctx_id in i915_perf.c when changing @@ -2316,7 +2315,6 @@ set_redzone(void *vaddr, const struct intel_engine_cs *engine) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; - vaddr += LRC_HEADER_PAGES * PAGE_SIZE; vaddr += engine->context_size; memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); @@ -2328,7 +2326,6 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; - vaddr += LRC_HEADER_PAGES * PAGE_SIZE; vaddr += engine->context_size; if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) @@ -3995,12 +3992,6 @@ populate_lr_context(struct intel_context *ce, set_redzone(vaddr, engine); if (engine->default_state) { - /* - * We only want to copy over the template context state; - * skipping over the headers reserved for GuC communication, - * leaving those as zero. - */ - const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE; void *defaults; defaults = i915_gem_object_pin_map(engine->default_state, @@ -4010,7 +4001,7 @@ populate_lr_context(struct intel_context *ce, goto err_unpin_ctx; } - memcpy(vaddr + start, defaults + start, engine->context_size); + memcpy(vaddr, defaults, engine->context_size); i915_gem_object_unpin_map(engine->default_state); inhibit = false; } @@ -4025,9 +4016,7 @@ populate_lr_context(struct intel_context *ce, ret = 0; err_unpin_ctx: - __i915_gem_object_flush_map(ctx_obj, - LRC_HEADER_PAGES * PAGE_SIZE, - engine->context_size); + __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size); i915_gem_object_unpin_map(ctx_obj); return ret; } @@ -4044,11 +4033,6 @@ static int __execlists_context_alloc(struct intel_context *ce, GEM_BUG_ON(ce->state); context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); - /* - * Before the actual start of the context image, we insert a few pages - * for our own use and for sharing with the GuC. - */ - context_size += LRC_HEADER_PAGES * PAGE_SIZE; if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) context_size += I915_GTT_PAGE_SIZE; /* for redzone */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index faa2d56c279b..04511d8ebdc1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -86,31 +86,12 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine); int intel_execlists_submission_init(struct intel_engine_cs *engine); /* Logical Ring Contexts */ - -/* - * We allocate a header at the start of the context image for our own - * use, therefore the actual location of the logical state is offset - * from the start of the VMA. The layout is - * - * | [guc] | [hwsp] [logical state] | - * |<- our header ->|<- context image ->| - * - */ -/* The first page is used for sharing data with the GuC */ -#define LRC_GUCSHR_PN (0) -#define LRC_GUCSHR_SZ (1) /* At the start of the context image is its per-process HWS page */ -#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + LRC_GUCSHR_SZ) +#define LRC_PPHWSP_PN (0) #define LRC_PPHWSP_SZ (1) -/* Finally we have the logical state for the context */ +/* After the PPHWSP we have the logical state for the context */ #define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ) -/* - * Currently we include the PPHWSP in __intel_engine_context_size() so - * the size of the header is synonymous with the start of the PPHWSP. - */ -#define LRC_HEADER_PAGES LRC_PPHWSP_PN - /* Space within PPHWSP reserved to be used as scratch */ #define LRC_PPHWSP_SCRATCH 0x34 #define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32)) diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index f63a26a3e620..bc720defc6b8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -103,9 +103,6 @@ static int __live_context_size(struct intel_engine_cs *engine, * * TLDR; this overlaps with the execlists redzone. */ - if (HAS_EXECLISTS(engine->i915)) - vaddr += LRC_HEADER_PAGES * PAGE_SIZE; - vaddr += engine->context_size - I915_GTT_PAGE_SIZE; memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 377811f8853f..5b2a7d072ec9 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -195,7 +195,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) return -EFAULT; } - page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); + page = i915_gem_object_get_page(ctx_obj, i); dst = kmap(page); intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst, I915_GTT_PAGE_SIZE); @@ -835,7 +835,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) return; } - page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); + page = i915_gem_object_get_page(ctx_obj, i); src = kmap(page); intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src, I915_GTT_PAGE_SIZE); From 034982cff1a12f9030f2f25dfbbedf81c57c08d2 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 30 Oct 2019 18:30:40 -0700 Subject: [PATCH 147/154] drm/i915/guc: drop guc shared area Recent GuC doesn't require the shared area. We still have one user in i915 (engine reset via guc) because we haven't updated the command to match the current guc submission flow [1]. Since the flow in guc is about to change again, just disable the command for now and add a note that we'll implement it as part of the new flow. [1] https://patchwork.freedesktop.org/patch/295038/ Signed-off-by: Daniele Ceraolo Spurio Cc: Michal Wajdeczko Cc: John Harrison Cc: Matthew Brost Cc: Fernando Pacheco Acked-by: Chris Wilson Reviewed-by: Matthew Brost Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191031013040.25803-2-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 50 ++------------------------ drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 -- 2 files changed, 3 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index f12959182182..019ae6486e8d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -192,32 +192,6 @@ void intel_guc_init_early(struct intel_guc *guc) } } -static int guc_shared_data_create(struct intel_guc *guc) -{ - struct i915_vma *vma; - void *vaddr; - - vma = intel_guc_allocate_vma(guc, PAGE_SIZE); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - i915_vma_unpin_and_release(&vma, 0); - return PTR_ERR(vaddr); - } - - guc->shared_data = vma; - guc->shared_data_vaddr = vaddr; - - return 0; -} - -static void guc_shared_data_destroy(struct intel_guc *guc) -{ - i915_vma_unpin_and_release(&guc->shared_data, I915_VMA_RELEASE_MAP); -} - static u32 guc_ctl_debug_flags(struct intel_guc *guc) { u32 level = intel_guc_log_get_level(&guc->log); @@ -364,14 +338,9 @@ int intel_guc_init(struct intel_guc *guc) if (ret) goto err_fetch; - ret = guc_shared_data_create(guc); - if (ret) - goto err_fw; - GEM_BUG_ON(!guc->shared_data); - ret = intel_guc_log_create(&guc->log); if (ret) - goto err_shared; + goto err_fw; ret = intel_guc_ads_create(guc); if (ret) @@ -406,8 +375,6 @@ err_ads: intel_guc_ads_destroy(guc); err_log: intel_guc_log_destroy(&guc->log); -err_shared: - guc_shared_data_destroy(guc); err_fw: intel_uc_fw_fini(&guc->fw); err_fetch: @@ -432,7 +399,6 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ads_destroy(guc); intel_guc_log_destroy(&guc->log); - guc_shared_data_destroy(guc); intel_uc_fw_fini(&guc->fw); intel_uc_fw_cleanup_fetch(&guc->fw); } @@ -628,19 +594,9 @@ int intel_guc_suspend(struct intel_guc *guc) int intel_guc_reset_engine(struct intel_guc *guc, struct intel_engine_cs *engine) { - u32 data[7]; + /* XXX: to be implemented with submission interface rework */ - GEM_BUG_ON(!guc->execbuf_client); - - data[0] = INTEL_GUC_ACTION_REQUEST_ENGINE_RESET; - data[1] = engine->guc_id; - data[2] = 0; - data[3] = 0; - data[4] = 0; - data[5] = guc->execbuf_client->stage_id; - data[6] = intel_guc_ggtt_offset(guc, guc->shared_data); - - return intel_guc_send(guc, data, ARRAY_SIZE(data)); + return -ENODEV; } /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 2b2f046d3cc3..e6400204a2bd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -47,8 +47,6 @@ struct intel_guc { struct i915_vma *stage_desc_pool; void *stage_desc_pool_vaddr; struct ida stage_ids; - struct i915_vma *shared_data; - void *shared_data_vaddr; struct intel_guc_client *execbuf_client; From 49748264826ff4cc7f0ebbdd6b0d1a36b13b1cee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Mon, 28 Oct 2019 18:10:14 -0700 Subject: [PATCH 148/154] drm/i915/dp: Do not switch aux to TBT mode for non-TC ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Non-TC ports always have tc_mode == TC_PORT_TBT_ALT so it was switching aux to TBT mode for all combo-phy ports, happily this did not caused any issue but is better follow BSpec. Also this is reserved bit before ICL. Cc: Imre Deak Signed-off-by: José Roberto de Souza Fixes: e9b7e1422d40 ("drm/i915: Sanitize the terminology used for TypeC port modes") Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20191029011014.286885-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index c80621ad138b..d958e789ab96 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1293,6 +1293,9 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp, u32 unused) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *i915 = + to_i915(intel_dig_port->base.base.dev); + enum phy phy = intel_port_to_phy(i915, intel_dig_port->base.port); u32 ret; ret = DP_AUX_CH_CTL_SEND_BUSY | @@ -1305,7 +1308,8 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp, DP_AUX_CH_CTL_FW_SYNC_PULSE_SKL(32) | DP_AUX_CH_CTL_SYNC_PULSE_SKL(32); - if (intel_dig_port->tc_mode == TC_PORT_TBT_ALT) + if (intel_phy_is_tc(i915, phy) && + intel_dig_port->tc_mode == TC_PORT_TBT_ALT) ret |= DP_AUX_CH_CTL_TBT_IO; return ret; From 1629224324b6cab6f7f96e839c9b57b74cfd8349 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 30 Oct 2019 17:33:20 +0000 Subject: [PATCH 149/154] drm/i915/lmem: add the fake lmem region Intended for upstream testing so that we can still exercise the LMEM plumbing and !i915_ggtt_has_aperture paths. Smoke tested on Skull Canyon device. This works by allocating an intel_memory_region for a reserved portion of system memory, which we treat like LMEM. For the LMEMBAR we steal the aperture and 1:1 it map to the stolen region. To enable simply set the i915 modparam fake_lmem_start= on the kernel cmdline with the start of reserved region(see memmap=). The size of the region we can use is determined by the size of the mappable aperture, so the size of reserved region should be >= mappable_end. For now we only enable for the selftests. Depends on CONFIG_DRM_I915_UNSTABLE being enabled. eg. memmap=2G$16G i915.fake_lmem_start=0x400000000 v2: make fake_lmem_start an i915 modparam Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Abdiel Janulgue Cc: Arkadiusz Hiler Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191030173320.8850-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/Kconfig.unstable | 8 ++ drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 3 + drivers/gpu/drm/i915/i915_drv.c | 15 ++++ drivers/gpu/drm/i915/i915_params.c | 7 ++ drivers/gpu/drm/i915/i915_params.h | 1 + drivers/gpu/drm/i915/intel_memory_region.c | 3 + drivers/gpu/drm/i915/intel_memory_region.h | 6 ++ drivers/gpu/drm/i915/intel_region_lmem.c | 92 ++++++++++++++++++++++ drivers/gpu/drm/i915/intel_region_lmem.h | 5 ++ 9 files changed, 140 insertions(+) diff --git a/drivers/gpu/drm/i915/Kconfig.unstable b/drivers/gpu/drm/i915/Kconfig.unstable index cf151a297ed7..0c2276155c2b 100644 --- a/drivers/gpu/drm/i915/Kconfig.unstable +++ b/drivers/gpu/drm/i915/Kconfig.unstable @@ -19,3 +19,11 @@ config DRM_I915_UNSTABLE Recommended for driver developers _only_. If in the slightest bit of doubt, say "N". + +config DRM_I915_UNSTABLE_FAKE_LMEM + bool "Enable the experimental fake lmem" + depends on DRM_I915_UNSTABLE + default n + help + Convert some system memory into a fake local memory region for + testing. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 926f6c940e0d..0e2bf6b7e143 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -24,6 +24,7 @@ i915_gem_object_lmem_io_map_page(struct drm_i915_gem_object *obj, resource_size_t offset; offset = i915_gem_object_get_dma_address(obj, n); + offset -= obj->mm.region->region.start; return io_mapping_map_wc(&obj->mm.region->iomap, offset, PAGE_SIZE); } @@ -35,6 +36,7 @@ i915_gem_object_lmem_io_map_page_atomic(struct drm_i915_gem_object *obj, resource_size_t offset; offset = i915_gem_object_get_dma_address(obj, n); + offset -= obj->mm.region->region.start; return io_mapping_map_atomic_wc(&obj->mm.region->iomap, offset); } @@ -49,6 +51,7 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, GEM_BUG_ON(!i915_gem_object_is_contiguous(obj)); offset = i915_gem_object_get_dma_address(obj, n); + offset -= obj->mm.region->region.start; return io_mapping_map_wc(&obj->mm.region->iomap, offset, size); } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 21273b516dbe..480e2054f628 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1483,6 +1483,21 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!i915_modparams.nuclear_pageflip && match_info->gen < 5) dev_priv->drm.driver_features &= ~DRIVER_ATOMIC; + /* + * Check if we support fake LMEM -- for now we only unleash this for + * the live selftests(test-and-exit). + */ + if (IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) { + if (INTEL_GEN(dev_priv) >= 9 && i915_selftest.live < 0 && + i915_modparams.fake_lmem_start) { + mkwrite_device_info(dev_priv)->memory_regions = + REGION_SMEM | REGION_LMEM | REGION_STOLEN; + mkwrite_device_info(dev_priv)->is_dgfx = true; + GEM_BUG_ON(!HAS_LMEM(dev_priv)); + GEM_BUG_ON(!IS_DGFX(dev_priv)); + } + } + ret = pci_enable_device(pdev); if (ret) goto out_fini; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 3fa79adb2c1c..1dd1f3652795 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -179,6 +179,11 @@ i915_param_named(enable_gvt, bool, 0400, "Enable support for Intel GVT-g graphics virtualization host support(default:false)"); #endif +#if IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM) +i915_param_named_unsafe(fake_lmem_start, ulong, 0600, + "Fake LMEM start offset (default: 0)"); +#endif + static __always_inline void _print_param(struct drm_printer *p, const char *name, const char *type, @@ -190,6 +195,8 @@ static __always_inline void _print_param(struct drm_printer *p, drm_printf(p, "i915.%s=%d\n", name, *(const int *)x); else if (!__builtin_strcmp(type, "unsigned int")) drm_printf(p, "i915.%s=%u\n", name, *(const unsigned int *)x); + else if (!__builtin_strcmp(type, "unsigned long")) + drm_printf(p, "i915.%s=%lu\n", name, *(const unsigned long *)x); else if (!__builtin_strcmp(type, "char *")) drm_printf(p, "i915.%s=%s\n", name, *(const char **)x); else diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index a276317ad74b..31b88f297fbc 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -66,6 +66,7 @@ struct drm_printer; param(int, fastboot, -1) \ param(int, enable_dpcd_backlight, 0) \ param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE) \ + param(unsigned long, fake_lmem_start, 0) \ /* leave bools at the end to not create holes */ \ param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \ param(bool, enable_hangcheck, true) \ diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index a60f77ff58d4..baaeaecc64af 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -228,6 +228,9 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915) case INTEL_MEMORY_STOLEN: mem = i915_gem_stolen_setup(i915); break; + case INTEL_MEMORY_LOCAL: + mem = intel_setup_fake_lmem(i915); + break; } if (IS_ERR(mem)) { diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 19920c256ede..238722009677 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -10,6 +10,7 @@ #include #include #include +#include #include "i915_buddy.h" @@ -71,6 +72,9 @@ struct intel_memory_region { struct io_mapping iomap; struct resource region; + /* For fake LMEM */ + struct drm_mm_node fake_mappable; + struct i915_buddy_mm mm; struct mutex mm_lock; @@ -83,6 +87,8 @@ struct intel_memory_region { unsigned int instance; unsigned int id; + dma_addr_t remap_addr; + struct { struct mutex lock; /* Protects access to objects */ struct list_head list; diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/intel_region_lmem.c index 9a351af45ce6..583118095635 100644 --- a/drivers/gpu/drm/i915/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/intel_region_lmem.c @@ -9,9 +9,62 @@ #include "gem/i915_gem_region.h" #include "intel_region_lmem.h" +static int init_fake_lmem_bar(struct intel_memory_region *mem) +{ + struct drm_i915_private *i915 = mem->i915; + struct i915_ggtt *ggtt = &i915->ggtt; + unsigned long n; + int ret; + + /* We want to 1:1 map the mappable aperture to our reserved region */ + + mem->fake_mappable.start = 0; + mem->fake_mappable.size = resource_size(&mem->region); + mem->fake_mappable.color = I915_COLOR_UNEVICTABLE; + + ret = drm_mm_reserve_node(&ggtt->vm.mm, &mem->fake_mappable); + if (ret) + return ret; + + mem->remap_addr = dma_map_resource(&i915->drm.pdev->dev, + mem->region.start, + mem->fake_mappable.size, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_FORCE_CONTIGUOUS); + if (dma_mapping_error(&i915->drm.pdev->dev, mem->remap_addr)) { + drm_mm_remove_node(&mem->fake_mappable); + return -EINVAL; + } + + for (n = 0; n < mem->fake_mappable.size >> PAGE_SHIFT; ++n) { + ggtt->vm.insert_page(&ggtt->vm, + mem->remap_addr + (n << PAGE_SHIFT), + n << PAGE_SHIFT, + I915_CACHE_NONE, 0); + } + + mem->region = (struct resource)DEFINE_RES_MEM(mem->remap_addr, + mem->fake_mappable.size); + + return 0; +} + +static void release_fake_lmem_bar(struct intel_memory_region *mem) +{ + if (drm_mm_node_allocated(&mem->fake_mappable)) + drm_mm_remove_node(&mem->fake_mappable); + + dma_unmap_resource(&mem->i915->drm.pdev->dev, + mem->remap_addr, + mem->fake_mappable.size, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_FORCE_CONTIGUOUS); +} + static void region_lmem_release(struct intel_memory_region *mem) { + release_fake_lmem_bar(mem); io_mapping_fini(&mem->iomap); intel_memory_region_release_buddy(mem); } @@ -21,6 +74,11 @@ region_lmem_init(struct intel_memory_region *mem) { int ret; + if (i915_modparams.fake_lmem_start) { + ret = init_fake_lmem_bar(mem); + GEM_BUG_ON(ret); + } + if (!io_mapping_init_wc(&mem->iomap, mem->io_start, resource_size(&mem->region))) @@ -38,3 +96,37 @@ const struct intel_memory_region_ops intel_region_lmem_ops = { .release = region_lmem_release, .create_object = __i915_gem_lmem_object_create, }; + +struct intel_memory_region * +intel_setup_fake_lmem(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + struct intel_memory_region *mem; + resource_size_t mappable_end; + resource_size_t io_start; + resource_size_t start; + + GEM_BUG_ON(i915_ggtt_has_aperture(&i915->ggtt)); + GEM_BUG_ON(!i915_modparams.fake_lmem_start); + + /* Your mappable aperture belongs to me now! */ + mappable_end = pci_resource_len(pdev, 2); + io_start = pci_resource_start(pdev, 2), + start = i915_modparams.fake_lmem_start; + + mem = intel_memory_region_create(i915, + start, + mappable_end, + PAGE_SIZE, + io_start, + &intel_region_lmem_ops); + if (!IS_ERR(mem)) { + DRM_INFO("Intel graphics fake LMEM: %pR\n", &mem->region); + DRM_INFO("Intel graphics fake LMEM IO start: %llx\n", + (u64)mem->io_start); + DRM_INFO("Intel graphics fake LMEM size: %llx\n", + (u64)resource_size(&mem->region)); + } + + return mem; +} diff --git a/drivers/gpu/drm/i915/intel_region_lmem.h b/drivers/gpu/drm/i915/intel_region_lmem.h index ed2a3bab6443..213def7c7b8a 100644 --- a/drivers/gpu/drm/i915/intel_region_lmem.h +++ b/drivers/gpu/drm/i915/intel_region_lmem.h @@ -6,6 +6,11 @@ #ifndef __INTEL_REGION_LMEM_H #define __INTEL_REGION_LMEM_H +struct drm_i915_private; + extern const struct intel_memory_region_ops intel_region_lmem_ops; +struct intel_memory_region * +intel_setup_fake_lmem(struct drm_i915_private *i915); + #endif /* !__INTEL_REGION_LMEM_H */ From 2b73b3503b0a1393ff1e4a214a3f815d2c6dbffb Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 1 Nov 2019 09:04:29 +0200 Subject: [PATCH 150/154] drm/i915: Update DRIVER_DATE to 20191101 Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a22d969cb352..4bfd842e2914 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -110,8 +110,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20191021" -#define DRIVER_TIMESTAMP 1571651766 +#define DRIVER_DATE "20191101" +#define DRIVER_TIMESTAMP 1572591869 struct drm_i915_gem_object; From 4a3174152147da1159f7135e90e1831fba74da34 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 31 Oct 2019 09:01:04 +0000 Subject: [PATCH 151/154] drm/i915/gem: Refine occupancy test in kill_context() Don't just look at the very last request in a queue when deciding if we need to evict the context from the GPU, as that request may still be in the submission queue while the rest of the context is running! Instead, walk back along the queued requests looking for the active request and checking that. Fixes: 2e0986a58cc4 ("drm/i915/gem: Cancel contexts when hangchecking is disabled") Testcase: igt/gem_ctx_persistence/queued Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191031090104.22245-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 47 ++++++++++++++------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index cbdf2fb32636..de6e55af82cf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -333,10 +333,8 @@ static bool __cancel_engine(struct intel_engine_cs *engine) return __reset_engine(engine); } -static struct intel_engine_cs * -active_engine(struct dma_fence *fence, struct intel_context *ce) +static struct intel_engine_cs *__active_engine(struct i915_request *rq) { - struct i915_request *rq = to_request(fence); struct intel_engine_cs *engine, *locked; /* @@ -360,6 +358,29 @@ active_engine(struct dma_fence *fence, struct intel_context *ce) return engine; } +static struct intel_engine_cs *active_engine(struct intel_context *ce) +{ + struct intel_engine_cs *engine = NULL; + struct i915_request *rq; + + if (!ce->timeline) + return NULL; + + rcu_read_lock(); + list_for_each_entry_reverse(rq, &ce->timeline->requests, link) { + if (i915_request_completed(rq)) + break; + + /* Check with the backend if the request is inflight */ + engine = __active_engine(rq); + if (engine) + break; + } + rcu_read_unlock(); + + return engine; +} + static void kill_context(struct i915_gem_context *ctx) { struct i915_gem_engines_iter it; @@ -383,17 +404,15 @@ static void kill_context(struct i915_gem_context *ctx) */ for_each_gem_engine(ce, __context_engines_static(ctx), it) { struct intel_engine_cs *engine; - struct dma_fence *fence; - if (!ce->timeline) - continue; - - fence = i915_active_fence_get(&ce->timeline->last_request); - if (!fence) - continue; - - /* Check with the backend if the request is still inflight */ - engine = active_engine(fence, ce); + /* + * Check the current active state of this context; if we + * are currently executing on the GPU we need to evict + * ourselves. On the other hand, if we haven't yet been + * submitted to the GPU or if everything is complete, + * we have nothing to do. + */ + engine = active_engine(ce); /* First attempt to gracefully cancel the context */ if (engine && !__cancel_engine(engine)) @@ -403,8 +422,6 @@ static void kill_context(struct i915_gem_context *ctx) * reset. We hope the collateral damage is worth it. */ __reset_context(ctx, engine); - - dma_fence_put(fence); } } From 292a27b0a807615f4c140b990802f109c72d6c8c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Nov 2019 09:51:47 +0000 Subject: [PATCH 152/154] drm/i915/lmem: Check against i915_selftest only under CONFIG_SELFTEST The i915_selftest module parameters only exist when CONFIG_DRM_I915_SELFTEST is set. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191101095147.9769-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 480e2054f628..3340485c12e3 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1487,6 +1487,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * Check if we support fake LMEM -- for now we only unleash this for * the live selftests(test-and-exit). */ +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) if (IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) { if (INTEL_GEN(dev_priv) >= 9 && i915_selftest.live < 0 && i915_modparams.fake_lmem_start) { @@ -1497,6 +1498,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) GEM_BUG_ON(!IS_DGFX(dev_priv)); } } +#endif ret = pci_enable_device(pdev); if (ret) From e5661c6ab0efd0dd0140a0f521b6e9f6a26a5071 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Nov 2019 08:49:40 +0000 Subject: [PATCH 153/154] drm/i915/selftests: Start kthreads before stopping An interesting observation made with our parallel selftests was that on our small/single cpu systems we would call kthread_stop() before the kthreads were spawned. If this happens, the kthread is never run at all; completely bypassing the test. A simple yield() from the parent will ensure that all children have the opportunity to start before we reap them. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191101084940.31838-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 2 ++ drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c | 2 ++ drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 ++ drivers/gpu/drm/i915/gt/selftest_lrc.c | 2 ++ drivers/gpu/drm/i915/selftests/i915_request.c | 4 ++++ 5 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index c6e61564bb5e..62fabc023a83 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -345,6 +345,8 @@ static int live_parallel_switch(void *arg) get_task_struct(data[n].tsk); } + yield(); /* start all threads before we kthread_stop() */ + for (n = 0; n < count; n++) { int status; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index ac40c704514e..e8132aca0bb6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -473,6 +473,8 @@ static int igt_threaded_blt(struct drm_i915_private *i915, get_task_struct(tsk[i]); } + yield(); /* start all threads before we kthread_stop() */ + for (i = 0; i < n_cpus; ++i) { int status; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 8abc0a1d692b..85e9ccf5c304 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -826,6 +826,8 @@ static int __igt_reset_engines(struct intel_gt *gt, get_task_struct(tsk); } + yield(); /* start all threads before we begin */ + intel_engine_pm_get(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 5e8c365548f0..eb71ac2f992c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -2252,6 +2252,8 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) get_task_struct(tsk[id]); } + yield(); /* start all threads before we kthread_stop() */ + count = 0; for_each_engine(engine, smoke->gt, id) { int status; diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 30ae34f62176..8618a4dc0701 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -464,6 +464,7 @@ static int mock_breadcrumbs_smoketest(void *arg) get_task_struct(threads[n]); } + yield(); /* start all threads before we begin */ msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); for (n = 0; n < ncpus; n++) { @@ -1158,6 +1159,8 @@ static int live_parallel_engines(void *arg) get_task_struct(tsk[idx++]); } + yield(); /* start all threads before we kthread_stop() */ + idx = 0; for_each_uabi_engine(engine, i915) { int status; @@ -1314,6 +1317,7 @@ static int live_breadcrumbs_smoketest(void *arg) idx++; } + yield(); /* start all threads before we begin */ msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); out_flush: From 1883e2999f045e1fd6f76a7f30288a5312085289 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 1 Nov 2019 12:41:13 +0200 Subject: [PATCH 154/154] drm/i915: Update DRIVER_DATE to 20191101 Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4bfd842e2914..1e6118f62b29 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -111,7 +111,7 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" #define DRIVER_DATE "20191101" -#define DRIVER_TIMESTAMP 1572591869 +#define DRIVER_TIMESTAMP 1572604873 struct drm_i915_gem_object;