diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 0f2c22a3bcb6..35871c8a42a6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -646,7 +646,7 @@ static void init_contexts(struct drm_i915_private *i915) static bool needs_preempt_context(struct drm_i915_private *i915) { - return HAS_EXECLISTS(i915); + return USES_GUC_SUBMISSION(i915); } int i915_gem_contexts_init(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 08049ee91cee..4c0e211c715d 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -13,6 +13,7 @@ #include #include "i915_active_types.h" +#include "i915_utils.h" #include "intel_engine_types.h" #include "intel_sseu.h" @@ -38,6 +39,10 @@ struct intel_context { struct i915_gem_context *gem_context; struct intel_engine_cs *engine; struct intel_engine_cs *inflight; +#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2) +#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2) +#define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight) +#define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight) struct list_head signal_link; struct list_head signals; diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 2f1c6871ee95..9bb6ff76680e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -125,71 +125,26 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) void intel_engines_set_scheduler_caps(struct drm_i915_private *i915); -static inline void -execlists_set_active(struct intel_engine_execlists *execlists, - unsigned int bit) -{ - __set_bit(bit, (unsigned long *)&execlists->active); -} - -static inline bool -execlists_set_active_once(struct intel_engine_execlists *execlists, - unsigned int bit) -{ - return !__test_and_set_bit(bit, (unsigned long *)&execlists->active); -} - -static inline void -execlists_clear_active(struct intel_engine_execlists *execlists, - unsigned int bit) -{ - __clear_bit(bit, (unsigned long *)&execlists->active); -} - -static inline void -execlists_clear_all_active(struct intel_engine_execlists *execlists) -{ - execlists->active = 0; -} - -static inline bool -execlists_is_active(const struct intel_engine_execlists *execlists, - unsigned int bit) -{ - return test_bit(bit, (unsigned long *)&execlists->active); -} - -void execlists_user_begin(struct intel_engine_execlists *execlists, - const struct execlist_port *port); -void execlists_user_end(struct intel_engine_execlists *execlists); - -void -execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); - -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); - static inline unsigned int execlists_num_ports(const struct intel_engine_execlists * const execlists) { return execlists->port_mask + 1; } -static inline struct execlist_port * -execlists_port_complete(struct intel_engine_execlists * const execlists, - struct execlist_port * const port) +static inline struct i915_request * +execlists_active(const struct intel_engine_execlists *execlists) { - const unsigned int m = execlists->port_mask; - - GEM_BUG_ON(port_index(port, execlists) != 0); - GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); - - memmove(port, port + 1, m * sizeof(struct execlist_port)); - memset(port + m, 0, sizeof(struct execlist_port)); - - return port; + GEM_BUG_ON(execlists->active - execlists->inflight > + execlists_num_ports(execlists)); + return READ_ONCE(*execlists->active); } +void +execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); + +struct i915_request * +execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); + static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 7fd33e81c2d9..d45328e254dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -508,6 +508,10 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine) GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); + memset(execlists->pending, 0, sizeof(execlists->pending)); + execlists->active = + memset(execlists->inflight, 0, sizeof(execlists->inflight)); + execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; } @@ -1152,7 +1156,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return true; /* Waiting to drain ELSP? */ - if (READ_ONCE(engine->execlists.active)) { + if (execlists_active(&engine->execlists)) { struct tasklet_struct *t = &engine->execlists.tasklet; synchronize_hardirq(engine->i915->drm.irq); @@ -1169,7 +1173,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) /* Otherwise flush the tasklet if it was on another cpu */ tasklet_unlock_wait(t); - if (READ_ONCE(engine->execlists.active)) + if (execlists_active(&engine->execlists)) return false; } @@ -1367,6 +1371,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, } if (HAS_EXECLISTS(dev_priv)) { + struct i915_request * const *port, *rq; const u32 *hws = &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; const u8 num_entries = execlists->csb_size; @@ -1399,27 +1404,33 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, } spin_lock_irqsave(&engine->active.lock, flags); - for (idx = 0; idx < execlists_num_ports(execlists); idx++) { - struct i915_request *rq; - unsigned int count; + for (port = execlists->active; (rq = *port); port++) { + char hdr[80]; + int len; + + len = snprintf(hdr, sizeof(hdr), + "\t\tActive[%d: ", + (int)(port - execlists->active)); + if (!i915_request_signaled(rq)) + len += snprintf(hdr + len, sizeof(hdr) - len, + "ring:{start:%08x, hwsp:%08x, seqno:%08x}, ", + i915_ggtt_offset(rq->ring->vma), + rq->timeline->hwsp_offset, + hwsp_seqno(rq)); + snprintf(hdr + len, sizeof(hdr) - len, "rq: "); + print_request(m, rq, hdr); + } + for (port = execlists->pending; (rq = *port); port++) { char hdr[80]; - rq = port_unpack(&execlists->port[idx], &count); - if (!rq) { - drm_printf(m, "\t\tELSP[%d] idle\n", idx); - } else if (!i915_request_signaled(rq)) { - snprintf(hdr, sizeof(hdr), - "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", - idx, count, - i915_ggtt_offset(rq->ring->vma), - rq->timeline->hwsp_offset, - hwsp_seqno(rq)); - print_request(m, rq, hdr); - } else { - print_request(m, rq, "\t\tELSP[%d] rq: "); - } + snprintf(hdr, sizeof(hdr), + "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", + (int)(port - execlists->pending), + i915_ggtt_offset(rq->ring->vma), + rq->timeline->hwsp_offset, + hwsp_seqno(rq)); + print_request(m, rq, hdr); } - drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active); spin_unlock_irqrestore(&engine->active.lock, flags); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", @@ -1583,15 +1594,19 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } if (engine->stats.enabled++ == 0) { - const struct execlist_port *port = execlists->port; - unsigned int num_ports = execlists_num_ports(execlists); + struct i915_request * const *port; + struct i915_request *rq; engine->stats.enabled_at = ktime_get(); /* XXX submission method oblivious? */ - while (num_ports-- && port_isset(port)) { + for (port = execlists->active; (rq = *port); port++) engine->stats.active++; - port++; + + for (port = execlists->pending; (rq = *port); port++) { + /* Exclude any contexts already counted in active */ + if (intel_context_inflight_count(rq->hw_context) == 1) + engine->stats.active++; } if (engine->stats.active) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 43e975a26016..b4f7b81a3c3e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -172,51 +172,28 @@ struct intel_engine_execlists { */ u32 __iomem *ctrl_reg; - /** - * @port: execlist port states - * - * For each hardware ELSP (ExecList Submission Port) we keep - * track of the last request and the number of times we submitted - * that port to hw. We then count the number of times the hw reports - * a context completion or preemption. As only one context can - * be active on hw, we limit resubmission of context to port[0]. This - * is called Lite Restore, of the context. - */ - struct execlist_port { - /** - * @request_count: combined request and submission count - */ - struct i915_request *request_count; -#define EXECLIST_COUNT_BITS 2 -#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) -#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) -#define port_set(p, packed) ((p)->request_count = (packed)) -#define port_isset(p) ((p)->request_count) -#define port_index(p, execlists) ((p) - (execlists)->port) - - /** - * @context_id: context ID for port - */ - GEM_DEBUG_DECL(u32 context_id); - #define EXECLIST_MAX_PORTS 2 - } port[EXECLIST_MAX_PORTS]; - /** - * @active: is the HW active? We consider the HW as active after - * submitting any context for execution and until we have seen the - * last context completion event. After that, we do not expect any - * more events until we submit, and so can park the HW. - * - * As we have a small number of different sources from which we feed - * the HW, we track the state of each inside a single bitfield. + * @active: the currently known context executing on HW */ - unsigned int active; -#define EXECLISTS_ACTIVE_USER 0 -#define EXECLISTS_ACTIVE_PREEMPT 1 -#define EXECLISTS_ACTIVE_HWACK 2 + struct i915_request * const *active; + /** + * @inflight: the set of contexts submitted and acknowleged by HW + * + * The set of inflight contexts is managed by reading CS events + * from the HW. On a context-switch event (not preemption), we + * know the HW has transitioned from port0 to port1, and we + * advance our inflight/active tracking accordingly. + */ + struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */]; + /** + * @pending: the next set of contexts submitted to ELSP + * + * We store the array of contexts that we submit to HW (via ELSP) and + * promote them to the inflight array once HW has signaled the + * preemption or idle-to-active event. + */ + struct i915_request *pending[EXECLIST_MAX_PORTS + 1]; /** * @port_mask: number of execlist ports - 1 @@ -257,11 +234,6 @@ struct intel_engine_execlists { */ u32 *csb_status; - /** - * @preempt_complete_status: expected CSB upon completing preemption - */ - u32 preempt_complete_status; - /** * @csb_size: context status buffer FIFO size */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 82b7ace62d97..cb9d285bd00a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -161,6 +161,8 @@ #define GEN8_CTX_STATUS_COMPLETED_MASK \ (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) +#define CTX_DESC_FORCE_RESTORE BIT_ULL(2) + /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ #define WA_TAIL_DWORDS 2 @@ -221,6 +223,25 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); +static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_PREEMPT_ADDR); +} + +static inline void +ring_set_paused(const struct intel_engine_cs *engine, int state) +{ + /* + * We inspect HWS_PREEMPT with a semaphore inside + * engine->emit_fini_breadcrumb. If the dword is true, + * the ring is paused as the semaphore will busywait + * until the dword is false. + */ + engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state; + wmb(); +} + static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -271,12 +292,6 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, { int last_prio; - if (!engine->preempt_context) - return false; - - if (i915_request_completed(rq)) - return false; - /* * Check if the current priority hint merits a preemption attempt. * @@ -338,9 +353,6 @@ __maybe_unused static inline bool assert_priority_queue(const struct i915_request *prev, const struct i915_request *next) { - const struct intel_engine_execlists *execlists = - &prev->engine->execlists; - /* * Without preemption, the prev may refer to the still active element * which we refuse to let go. @@ -348,7 +360,7 @@ assert_priority_queue(const struct i915_request *prev, * Even with preemption, there are times when we think it is better not * to preempt and leave an ostensibly lower priority request in flight. */ - if (port_request(execlists->port) == prev) + if (i915_request_is_active(prev)) return true; return rq_prio(prev) >= rq_prio(next); @@ -442,13 +454,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) struct intel_engine_cs *owner; if (i915_request_completed(rq)) - break; + continue; /* XXX */ __i915_request_unsubmit(rq); unwind_wa_tail(rq); - GEM_BUG_ON(rq->hw_context->inflight); - /* * Push the request back into the queue for later resubmission. * If this request is not native to this physical engine (i.e. @@ -500,32 +510,32 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } -inline void -execlists_user_begin(struct intel_engine_execlists *execlists, - const struct execlist_port *port) +static inline struct i915_request * +execlists_schedule_in(struct i915_request *rq, int idx) { - execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER); + struct intel_context *ce = rq->hw_context; + int count; + + trace_i915_request_in(rq, idx); + + count = intel_context_inflight_count(ce); + if (!count) { + intel_context_get(ce); + ce->inflight = rq->engine; + + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + intel_engine_context_in(ce->inflight); + } + + intel_context_inflight_inc(ce); + GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); + + return i915_request_get(rq); } -inline void -execlists_user_end(struct intel_engine_execlists *execlists) +static void kick_siblings(struct i915_request *rq, struct intel_context *ce) { - execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); -} - -static inline void -execlists_context_schedule_in(struct i915_request *rq) -{ - GEM_BUG_ON(rq->hw_context->inflight); - - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); - intel_engine_context_in(rq->engine); - rq->hw_context->inflight = rq->engine; -} - -static void kick_siblings(struct i915_request *rq) -{ - struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine); + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); struct i915_request *next = READ_ONCE(ve->request); if (next && next->execution_mask & ~rq->execution_mask) @@ -533,30 +543,43 @@ static void kick_siblings(struct i915_request *rq) } static inline void -execlists_context_schedule_out(struct i915_request *rq, unsigned long status) -{ - rq->hw_context->inflight = NULL; - intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, status); - trace_i915_request_out(rq); - - /* - * If this is part of a virtual engine, its next request may have - * been blocked waiting for access to the active context. We have - * to kick all the siblings again in case we need to switch (e.g. - * the next request is not runnable on this engine). Hopefully, - * we will already have submitted the next request before the - * tasklet runs and do not need to rebuild each virtual tree - * and kick everyone again. - */ - if (rq->engine != rq->hw_context->engine) - kick_siblings(rq); -} - -static u64 execlists_update_context(struct i915_request *rq) +execlists_schedule_out(struct i915_request *rq) { struct intel_context *ce = rq->hw_context; + GEM_BUG_ON(!intel_context_inflight_count(ce)); + + trace_i915_request_out(rq); + + intel_context_inflight_dec(ce); + if (!intel_context_inflight_count(ce)) { + intel_engine_context_out(ce->inflight); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + + ce->inflight = NULL; + intel_context_put(ce); + + /* + * If this is part of a virtual engine, its next request may + * have been blocked waiting for access to the active context. + * We have to kick all the siblings again in case we need to + * switch (e.g. the next request is not runnable on this + * engine). Hopefully, we will already have submitted the next + * request before the tasklet runs and do not need to rebuild + * each virtual tree and kick everyone again. + */ + if (rq->engine != ce->engine) + kick_siblings(rq, ce); + } + + i915_request_put(rq); +} + +static u64 execlists_update_context(const struct i915_request *rq) +{ + struct intel_context *ce = rq->hw_context; + u64 desc; + ce->lrc_reg_state[CTX_RING_TAIL + 1] = intel_ring_set_tail(rq->ring, rq->tail); @@ -576,7 +599,11 @@ static u64 execlists_update_context(struct i915_request *rq) * wmb). */ mb(); - return ce->lrc_desc; + + desc = ce->lrc_desc; + ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + + return desc; } static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) @@ -590,12 +617,62 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc } } +static __maybe_unused void +trace_ports(const struct intel_engine_execlists *execlists, + const char *msg, + struct i915_request * const *ports) +{ + const struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); + + GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n", + engine->name, msg, + ports[0]->fence.context, + ports[0]->fence.seqno, + i915_request_completed(ports[0]) ? "!" : + i915_request_started(ports[0]) ? "*" : + "", + ports[1] ? ports[1]->fence.context : 0, + ports[1] ? ports[1]->fence.seqno : 0); +} + +static __maybe_unused bool +assert_pending_valid(const struct intel_engine_execlists *execlists, + const char *msg) +{ + struct i915_request * const *port, *rq; + struct intel_context *ce = NULL; + + trace_ports(execlists, msg, execlists->pending); + + if (execlists->pending[execlists_num_ports(execlists)]) + return false; + + for (port = execlists->pending; (rq = *port); port++) { + if (ce == rq->hw_context) + return false; + + ce = rq->hw_context; + if (i915_request_completed(rq)) + continue; + + if (i915_active_is_idle(&ce->active)) + return false; + + if (!i915_vma_is_pinned(ce->state)) + return false; + } + + return ce; +} + static void execlists_submit_ports(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; - struct execlist_port *port = execlists->port; unsigned int n; + GEM_BUG_ON(!assert_pending_valid(execlists, "submit")); + /* * We can skip acquiring intel_runtime_pm_get() here as it was taken * on our behalf by the request (see i915_gem_mark_busy()) and it will @@ -613,38 +690,16 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) * of elsq entries, keep this in mind before changing the loop below. */ for (n = execlists_num_ports(execlists); n--; ) { - struct i915_request *rq; - unsigned int count; - u64 desc; + struct i915_request *rq = execlists->pending[n]; - rq = port_unpack(&port[n], &count); - if (rq) { - GEM_BUG_ON(count > !n); - if (!count++) - execlists_context_schedule_in(rq); - port_set(&port[n], port_pack(rq, count)); - desc = execlists_update_context(rq); - GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - - GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", - engine->name, n, - port[n].context_id, count, - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq), - rq_prio(rq)); - } else { - GEM_BUG_ON(!n); - desc = 0; - } - - write_desc(execlists, desc, n); + write_desc(execlists, + rq ? execlists_update_context(rq) : 0, + n); } /* we need to manually load the submit queue */ if (execlists->ctrl_reg) writel(EL_CTRL_LOAD, execlists->ctrl_reg); - - execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); } static bool ctx_single_port_submission(const struct intel_context *ce) @@ -668,6 +723,7 @@ static bool can_merge_ctx(const struct intel_context *prev, static bool can_merge_rq(const struct i915_request *prev, const struct i915_request *next) { + GEM_BUG_ON(prev == next); GEM_BUG_ON(!assert_priority_queue(prev, next)); if (!can_merge_ctx(prev->hw_context, next->hw_context)) @@ -676,58 +732,6 @@ static bool can_merge_rq(const struct i915_request *prev, return true; } -static void port_assign(struct execlist_port *port, struct i915_request *rq) -{ - GEM_BUG_ON(rq == port_request(port)); - - if (port_isset(port)) - i915_request_put(port_request(port)); - - port_set(port, port_pack(i915_request_get(rq), port_count(port))); -} - -static void inject_preempt_context(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists *execlists = &engine->execlists; - struct intel_context *ce = engine->preempt_context; - unsigned int n; - - GEM_BUG_ON(execlists->preempt_complete_status != - upper_32_bits(ce->lrc_desc)); - - /* - * Switch to our empty preempt context so - * the state of the GPU is known (idle). - */ - GEM_TRACE("%s\n", engine->name); - for (n = execlists_num_ports(execlists); --n; ) - write_desc(execlists, 0, n); - - write_desc(execlists, ce->lrc_desc, n); - - /* we need to manually load the submit queue */ - if (execlists->ctrl_reg) - writel(EL_CTRL_LOAD, execlists->ctrl_reg); - - execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); - execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); - - (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); -} - -static void complete_preempt_context(struct intel_engine_execlists *execlists) -{ - GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); - - if (inject_preempt_hang(execlists)) - return; - - execlists_cancel_port_requests(execlists); - __unwind_incomplete_requests(container_of(execlists, - struct intel_engine_cs, - execlists)); -} - static void virtual_update_register_offsets(u32 *regs, struct intel_engine_cs *engine) { @@ -792,7 +796,7 @@ static bool virtual_matches(const struct virtual_engine *ve, * we reuse the register offsets). This is a very small * hystersis on the greedy seelction algorithm. */ - inflight = READ_ONCE(ve->context.inflight); + inflight = intel_context_inflight(&ve->context); if (inflight && inflight != engine) return false; @@ -815,13 +819,23 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, spin_unlock(&old->breadcrumbs.irq_lock); } +static struct i915_request * +last_active(const struct intel_engine_execlists *execlists) +{ + struct i915_request * const *last = execlists->active; + + while (*last && i915_request_completed(*last)) + last++; + + return *last; +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; - const struct execlist_port * const last_port = - &execlists->port[execlists->port_mask]; - struct i915_request *last = port_request(port); + struct i915_request **port = execlists->pending; + struct i915_request ** const last_port = port + execlists->port_mask; + struct i915_request *last; struct rb_node *rb; bool submit = false; @@ -867,65 +881,72 @@ static void execlists_dequeue(struct intel_engine_cs *engine) break; } + /* + * If the queue is higher priority than the last + * request in the currently active context, submit afresh. + * We will resubmit again afterwards in case we need to split + * the active context to interject the preemption request, + * i.e. we will retrigger preemption following the ack in case + * of trouble. + */ + last = last_active(execlists); if (last) { - /* - * Don't resubmit or switch until all outstanding - * preemptions (lite-restore) are seen. Then we - * know the next preemption status we see corresponds - * to this ELSP update. - */ - GEM_BUG_ON(!execlists_is_active(execlists, - EXECLISTS_ACTIVE_USER)); - GEM_BUG_ON(!port_count(&port[0])); - - /* - * If we write to ELSP a second time before the HW has had - * a chance to respond to the previous write, we can confuse - * the HW and hit "undefined behaviour". After writing to ELSP, - * we must then wait until we see a context-switch event from - * the HW to indicate that it has had a chance to respond. - */ - if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) - return; - if (need_preempt(engine, last, rb)) { - inject_preempt_context(engine); - return; + GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n", + engine->name, + last->fence.context, + last->fence.seqno, + last->sched.attr.priority, + execlists->queue_priority_hint); + /* + * Don't let the RING_HEAD advance past the breadcrumb + * as we unwind (and until we resubmit) so that we do + * not accidentally tell it to go backwards. + */ + ring_set_paused(engine, 1); + + /* + * Note that we have not stopped the GPU at this point, + * so we are unwinding the incomplete requests as they + * remain inflight and so by the time we do complete + * the preemption, some of the unwound requests may + * complete! + */ + __unwind_incomplete_requests(engine); + + /* + * If we need to return to the preempted context, we + * need to skip the lite-restore and force it to + * reload the RING_TAIL. Otherwise, the HW has a + * tendency to ignore us rewinding the TAIL to the + * end of an earlier request. + */ + last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE; + last = NULL; + } else { + /* + * Otherwise if we already have a request pending + * for execution after the current one, we can + * just wait until the next CS event before + * queuing more. In either case we will force a + * lite-restore preemption event, but if we wait + * we hopefully coalesce several updates into a single + * submission. + */ + if (!list_is_last(&last->sched.link, + &engine->active.requests)) + return; + + /* + * WaIdleLiteRestore:bdw,skl + * Apply the wa NOOPs to prevent + * ring:HEAD == rq:TAIL as we resubmit the + * request. See gen8_emit_fini_breadcrumb() for + * where we prepare the padding after the + * end of the request. + */ + last->tail = last->wa_tail; } - - /* - * In theory, we could coalesce more requests onto - * the second port (the first port is active, with - * no preemptions pending). However, that means we - * then have to deal with the possible lite-restore - * of the second port (as we submit the ELSP, there - * may be a context-switch) but also we may complete - * the resubmission before the context-switch. Ergo, - * coalescing onto the second port will cause a - * preemption event, but we cannot predict whether - * that will affect port[0] or port[1]. - * - * If the second port is already active, we can wait - * until the next context-switch before contemplating - * new requests. The GPU will be busy and we should be - * able to resubmit the new ELSP before it idles, - * avoiding pipeline bubbles (momentary pauses where - * the driver is unable to keep up the supply of new - * work). However, we have to double check that the - * priorities of the ports haven't been switch. - */ - if (port_count(&port[1])) - return; - - /* - * WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent - * ring:HEAD == rq:TAIL as we resubmit the - * request. See gen8_emit_fini_breadcrumb() for - * where we prepare the padding after the - * end of the request. - */ - last->tail = last->wa_tail; } while (rb) { /* XXX virtual is always taking precedence */ @@ -955,9 +976,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine) continue; } + if (i915_request_completed(rq)) { + ve->request = NULL; + ve->base.execlists.queue_priority_hint = INT_MIN; + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + + rq->engine = engine; + __i915_request_submit(rq); + + spin_unlock(&ve->base.active.lock); + + rb = rb_first_cached(&execlists->virtual); + continue; + } + if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); - return; /* leave this rq for another engine */ + return; /* leave this for another */ } GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n", @@ -1006,9 +1042,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } __i915_request_submit(rq); - trace_i915_request_in(rq, port_index(port, execlists)); - submit = true; - last = rq; + if (!i915_request_completed(rq)) { + submit = true; + last = rq; + } } spin_unlock(&ve->base.active.lock); @@ -1021,6 +1058,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { + if (i915_request_completed(rq)) + goto skip; + /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not @@ -1060,19 +1100,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ctx_single_port_submission(rq->hw_context)) goto done; - - if (submit) - port_assign(port, last); + *port = execlists_schedule_in(last, port - execlists->pending); port++; - - GEM_BUG_ON(port_isset(port)); } - __i915_request_submit(rq); - trace_i915_request_in(rq, port_index(port, execlists)); - last = rq; submit = true; +skip: + __i915_request_submit(rq); } rb_erase_cached(&p->node, &execlists->queue); @@ -1097,54 +1132,30 @@ done: * interrupt for secondary ports). */ execlists->queue_priority_hint = queue_prio(execlists); + GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n", + engine->name, execlists->queue_priority_hint, + yesno(submit)); if (submit) { - port_assign(port, last); + *port = execlists_schedule_in(last, port - execlists->pending); + memset(port + 1, 0, (last_port - port) * sizeof(*port)); execlists_submit_ports(engine); } - - /* We must always keep the beast fed if we have work piled up */ - GEM_BUG_ON(rb_first_cached(&execlists->queue) && - !port_isset(execlists->port)); - - /* Re-evaluate the executing context setup after each preemptive kick */ - if (last) - execlists_user_begin(execlists, execlists->port); - - /* If the engine is now idle, so should be the flag; and vice versa. */ - GEM_BUG_ON(execlists_is_active(&engine->execlists, - EXECLISTS_ACTIVE_USER) == - !port_isset(engine->execlists.port)); } void execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) { - struct execlist_port *port = execlists->port; - unsigned int num_ports = execlists_num_ports(execlists); + struct i915_request * const *port, *rq; - while (num_ports-- && port_isset(port)) { - struct i915_request *rq = port_request(port); + for (port = execlists->pending; (rq = *port); port++) + execlists_schedule_out(rq); + memset(execlists->pending, 0, sizeof(execlists->pending)); - GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n", - rq->engine->name, - (unsigned int)(port - execlists->port), - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq)); - - GEM_BUG_ON(!execlists->active); - execlists_context_schedule_out(rq, - i915_request_completed(rq) ? - INTEL_CONTEXT_SCHEDULE_OUT : - INTEL_CONTEXT_SCHEDULE_PREEMPTED); - - i915_request_put(rq); - - memset(port, 0, sizeof(*port)); - port++; - } - - execlists_clear_all_active(execlists); + for (port = execlists->active; (rq = *port); port++) + execlists_schedule_out(rq); + execlists->active = + memset(execlists->inflight, 0, sizeof(execlists->inflight)); } static inline void @@ -1163,7 +1174,6 @@ reset_in_progress(const struct intel_engine_execlists *execlists) static void process_csb(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; const u32 * const buf = execlists->csb_status; const u8 num_entries = execlists->csb_size; u8 head, tail; @@ -1198,9 +1208,7 @@ static void process_csb(struct intel_engine_cs *engine) rmb(); do { - struct i915_request *rq; unsigned int status; - unsigned int count; if (++head == num_entries) head = 0; @@ -1223,68 +1231,38 @@ static void process_csb(struct intel_engine_cs *engine) * status notifier. */ - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n", engine->name, head, - buf[2 * head + 0], buf[2 * head + 1], - execlists->active); + buf[2 * head + 0], buf[2 * head + 1]); status = buf[2 * head]; - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | - GEN8_CTX_STATUS_PREEMPTED)) - execlists_set_active(execlists, - EXECLISTS_ACTIVE_HWACK); - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) - execlists_clear_active(execlists, - EXECLISTS_ACTIVE_HWACK); + if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) { + GEM_BUG_ON(*execlists->active); +promote: + GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); + execlists->active = + memcpy(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists) * + sizeof(*execlists->pending)); + execlists->pending[0] = NULL; - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) - continue; + if (!inject_preempt_hang(execlists)) + ring_set_paused(engine, 0); + } else if (status & GEN8_CTX_STATUS_PREEMPTED) { + struct i915_request * const *port = execlists->active; - /* We should never get a COMPLETED | IDLE_ACTIVE! */ - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); + trace_ports(execlists, "preempted", execlists->active); - if (status & GEN8_CTX_STATUS_COMPLETE && - buf[2*head + 1] == execlists->preempt_complete_status) { - GEM_TRACE("%s preempt-idle\n", engine->name); - complete_preempt_context(execlists); - continue; - } + while (*port) + execlists_schedule_out(*port++); - if (status & GEN8_CTX_STATUS_PREEMPTED && - execlists_is_active(execlists, - EXECLISTS_ACTIVE_PREEMPT)) - continue; + goto promote; + } else if (*execlists->active) { + struct i915_request *rq = *execlists->active++; - GEM_BUG_ON(!execlists_is_active(execlists, - EXECLISTS_ACTIVE_USER)); - - rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", - engine->name, - port->context_id, count, - rq ? rq->fence.context : 0, - rq ? rq->fence.seqno : 0, - rq ? hwsp_seqno(rq) : 0, - rq ? rq_prio(rq) : 0); - - /* Check the context/desc id for this event matches */ - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); - - GEM_BUG_ON(count == 0); - if (--count == 0) { - /* - * On the final event corresponding to the - * submission of this context, we expect either - * an element-switch event or a completion - * event (and on completion, the active-idle - * marker). No more preemptions, lite-restore - * or otherwise. - */ - GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); - GEM_BUG_ON(port_isset(&port[1]) && - !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); - GEM_BUG_ON(!port_isset(&port[1]) && - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); + trace_ports(execlists, "completed", + execlists->active - 1); /* * We rely on the hardware being strongly @@ -1293,21 +1271,10 @@ static void process_csb(struct intel_engine_cs *engine) * user interrupt and CSB is processed. */ GEM_BUG_ON(!i915_request_completed(rq)); + execlists_schedule_out(rq); - execlists_context_schedule_out(rq, - INTEL_CONTEXT_SCHEDULE_OUT); - i915_request_put(rq); - - GEM_TRACE("%s completed ctx=%d\n", - engine->name, port->context_id); - - port = execlists_port_complete(execlists, port); - if (port_isset(port)) - execlists_user_begin(execlists, port); - else - execlists_user_end(execlists); - } else { - port_set(port, port_pack(rq, count)); + GEM_BUG_ON(execlists->active - execlists->inflight > + execlists_num_ports(execlists)); } } while (head != tail); @@ -1332,7 +1299,7 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) lockdep_assert_held(&engine->active.lock); process_csb(engine); - if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) + if (!engine->execlists.pending[0]) execlists_dequeue(engine); } @@ -1345,11 +1312,6 @@ static void execlists_submission_tasklet(unsigned long data) struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; unsigned long flags; - GEM_TRACE("%s awake?=%d, active=%x\n", - engine->name, - !!intel_wakeref_active(&engine->wakeref), - engine->execlists.active); - spin_lock_irqsave(&engine->active.lock, flags); __execlists_submission_tasklet(engine); spin_unlock_irqrestore(&engine->active.lock, flags); @@ -1376,12 +1338,16 @@ static void __submit_queue_imm(struct intel_engine_cs *engine) tasklet_hi_schedule(&execlists->tasklet); } -static void submit_queue(struct intel_engine_cs *engine, int prio) +static void submit_queue(struct intel_engine_cs *engine, + const struct i915_request *rq) { - if (prio > engine->execlists.queue_priority_hint) { - engine->execlists.queue_priority_hint = prio; - __submit_queue_imm(engine); - } + struct intel_engine_execlists *execlists = &engine->execlists; + + if (rq_prio(rq) <= execlists->queue_priority_hint) + return; + + execlists->queue_priority_hint = rq_prio(rq); + __submit_queue_imm(engine); } static void execlists_submit_request(struct i915_request *request) @@ -1397,7 +1363,7 @@ static void execlists_submit_request(struct i915_request *request) GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); GEM_BUG_ON(list_empty(&request->sched.link)); - submit_queue(engine, rq_prio(request)); + submit_queue(engine, request); spin_unlock_irqrestore(&engine->active.lock, flags); } @@ -2048,27 +2014,13 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) spin_unlock_irqrestore(&engine->active.lock, flags); } -static bool lrc_regs_ok(const struct i915_request *rq) -{ - const struct intel_ring *ring = rq->ring; - const u32 *regs = rq->hw_context->lrc_reg_state; - - /* Quick spot check for the common signs of context corruption */ - - if (regs[CTX_RING_BUFFER_CONTROL + 1] != - (RING_CTL_SIZE(ring->size) | RING_VALID)) - return false; - - if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma)) - return false; - - return true; -} - -static void reset_csb_pointers(struct intel_engine_execlists *execlists) +static void reset_csb_pointers(struct intel_engine_cs *engine) { + struct intel_engine_execlists * const execlists = &engine->execlists; const unsigned int reset_value = execlists->csb_size - 1; + ring_set_paused(engine, 0); + /* * After a reset, the HW starts writing into CSB entry [0]. We * therefore have to set our HEAD pointer back one entry so that @@ -2115,18 +2067,21 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) process_csb(engine); /* drain preemption events */ /* Following the reset, we need to reload the CSB read/write pointers */ - reset_csb_pointers(&engine->execlists); + reset_csb_pointers(engine); /* * Save the currently executing context, even if we completed * its request, it was still running at the time of the * reset and will have been clobbered. */ - if (!port_isset(execlists->port)) - goto out_clear; + rq = execlists_active(execlists); + if (!rq) + return; - rq = port_request(execlists->port); ce = rq->hw_context; + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); + rq = active_request(rq); /* * Catch up with any missed context-switch interrupts. @@ -2139,9 +2094,12 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) */ execlists_cancel_port_requests(execlists); - rq = active_request(rq); - if (!rq) + if (!rq) { + ce->ring->head = ce->ring->tail; goto out_replay; + } + + ce->ring->head = intel_ring_wrap(ce->ring, rq->head); /* * If this request hasn't started yet, e.g. it is waiting on a @@ -2155,7 +2113,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * Otherwise, if we have not started yet, the request should replay * perfectly and we do not need to flag the result as being erroneous. */ - if (!i915_request_started(rq) && lrc_regs_ok(rq)) + if (!i915_request_started(rq)) goto out_replay; /* @@ -2170,7 +2128,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * image back to the expected values to skip over the guilty request. */ i915_reset_request(rq, stalled); - if (!stalled && lrc_regs_ok(rq)) + if (!stalled) goto out_replay; /* @@ -2190,17 +2148,13 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) execlists_init_reg_state(regs, ce, engine, ce->ring); out_replay: - /* Rerun the request; its payload has been neutered (if guilty). */ - ce->ring->head = - rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail; + GEM_TRACE("%s replay {head:%04x, tail:%04x\n", + engine->name, ce->ring->head, ce->ring->tail); intel_ring_update_space(ce->ring); __execlists_update_reg_state(ce, engine); /* Push back any incomplete requests for replay after the reset. */ __unwind_incomplete_requests(engine); - -out_clear: - execlists_clear_all_active(execlists); } static void execlists_reset(struct intel_engine_cs *engine, bool stalled) @@ -2296,7 +2250,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; - GEM_BUG_ON(port_isset(execlists->port)); GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); execlists->tasklet.func = nop_submission_tasklet; @@ -2514,15 +2467,29 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) return cs; } +static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs) +{ + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = intel_hws_preempt_address(request->engine); + *cs++ = 0; + + return cs; +} + static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write(cs, request->fence.seqno, request->timeline->hwsp_offset, 0); - *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + cs = emit_preempt_busywait(request, cs); request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2543,9 +2510,10 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL, 0); - *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + cs = emit_preempt_busywait(request, cs); request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2594,8 +2562,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_SUPPORTS_STATS; if (!intel_vgpu_active(engine->i915)) engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - if (engine->preempt_context && - HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) engine->flags |= I915_ENGINE_HAS_PREEMPTION; } @@ -2718,11 +2685,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) i915_mmio_reg_offset(RING_ELSP(base)); } - execlists->preempt_complete_status = ~0u; - if (engine->preempt_context) - execlists->preempt_complete_status = - upper_32_bits(engine->preempt_context->lrc_desc); - execlists->csb_status = &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; @@ -2734,7 +2696,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) else execlists->csb_size = GEN11_CSB_ENTRIES; - reset_csb_pointers(execlists); + reset_csb_pointers(engine); return 0; } @@ -2917,11 +2879,6 @@ populate_lr_context(struct intel_context *ce, if (!engine->default_state) regs[CTX_CONTEXT_CONTROL + 1] |= _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - if (ce->gem_context == engine->i915->preempt_context && - INTEL_GEN(engine->i915) < 11) - regs[CTX_CONTEXT_CONTROL + 1] |= - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | - CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); ret = 0; err_unpin_ctx: diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index b7e9fddef270..a497cf7acb6a 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1248,10 +1248,10 @@ static void error_record_engine_registers(struct i915_gpu_state *error, } } -static void record_request(struct i915_request *request, +static void record_request(const struct i915_request *request, struct drm_i915_error_request *erq) { - struct i915_gem_context *ctx = request->gem_context; + const struct i915_gem_context *ctx = request->gem_context; erq->flags = request->fence.flags; erq->context = request->fence.context; @@ -1315,20 +1315,15 @@ static void engine_record_requests(struct intel_engine_cs *engine, ee->num_requests = count; } -static void error_record_engine_execlists(struct intel_engine_cs *engine, +static void error_record_engine_execlists(const struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { const struct intel_engine_execlists * const execlists = &engine->execlists; - unsigned int n; + struct i915_request * const *port = execlists->active; + unsigned int n = 0; - for (n = 0; n < execlists_num_ports(execlists); n++) { - struct i915_request *rq = port_request(&execlists->port[n]); - - if (!rq) - break; - - record_request(rq, &ee->execlist[n]); - } + while (*port) + record_request(*port++, &ee->execlist[n++]); ee->num_ports = n; } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 7083e6ab92c5..0c99694faab7 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -276,6 +276,12 @@ static bool i915_request_retire(struct i915_request *rq) local_irq_disable(); + /* + * We only loosely track inflight requests across preemption, + * and so we may find ourselves attempting to retire a _completed_ + * request that we have removed from the HW and put back on a run + * queue. + */ spin_lock(&rq->engine->active.lock); list_del(&rq->sched.link); spin_unlock(&rq->engine->active.lock); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index edbbdfec24ab..bebc1e9b4a5e 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -28,6 +28,7 @@ #include #include +#include "gt/intel_context_types.h" #include "gt/intel_engine_types.h" #include "i915_gem.h" diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 2e9b38bdc33c..b1ba3e65cd52 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -179,8 +179,7 @@ static inline int rq_prio(const struct i915_request *rq) static void kick_submission(struct intel_engine_cs *engine, int prio) { - const struct i915_request *inflight = - port_request(engine->execlists.port); + const struct i915_request *inflight = *engine->execlists.active; /* * If we are already the currently executing context, don't diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 2987219a6300..4920ff9aba62 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -131,6 +131,18 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size) ((typeof(ptr))((unsigned long)(ptr) | __bits)); \ }) +#define ptr_count_dec(p_ptr) do { \ + typeof(p_ptr) __p = (p_ptr); \ + unsigned long __v = (unsigned long)(*__p); \ + *__p = (typeof(*p_ptr))(--__v); \ +} while (0) + +#define ptr_count_inc(p_ptr) do { \ + typeof(p_ptr) __p = (p_ptr); \ + unsigned long __v = (unsigned long)(*__p); \ + *__p = (typeof(*p_ptr))(++__v); \ +} while (0) + #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT) #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT) #define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index db531ebc7704..12c22359fdac 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -32,7 +32,11 @@ #include "intel_guc_submission.h" #include "i915_drv.h" -#define GUC_PREEMPT_FINISHED 0x1 +enum { + GUC_PREEMPT_NONE = 0, + GUC_PREEMPT_INPROGRESS, + GUC_PREEMPT_FINISHED, +}; #define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 #define GUC_PREEMPT_BREADCRUMB_BYTES \ (sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS) @@ -537,15 +541,11 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc); u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); - spin_lock(&client->wq_lock); - guc_wq_item_append(client, engine->guc_id, ctx_desc, ring_tail, rq->fence.seqno); guc_ring_doorbell(client); client->submissions[engine->id] += 1; - - spin_unlock(&client->wq_lock); } /* @@ -631,8 +631,9 @@ static void inject_preempt_context(struct work_struct *work) data[6] = intel_guc_ggtt_offset(guc, guc->shared_data); if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) { - execlists_clear_active(&engine->execlists, - EXECLISTS_ACTIVE_PREEMPT); + intel_write_status_page(engine, + I915_GEM_HWS_PREEMPT, + GUC_PREEMPT_NONE); tasklet_schedule(&engine->execlists.tasklet); } @@ -672,8 +673,6 @@ static void complete_preempt_context(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; - GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); - if (inject_preempt_hang(execlists)) return; @@ -681,89 +680,90 @@ static void complete_preempt_context(struct intel_engine_cs *engine) execlists_unwind_incomplete_requests(execlists); wait_for_guc_preempt_report(engine); - intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0); + intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, GUC_PREEMPT_NONE); } -/** - * guc_submit() - Submit commands through GuC - * @engine: engine associated with the commands - * - * The only error here arises if the doorbell hardware isn't functioning - * as expected, which really shouln't happen. - */ -static void guc_submit(struct intel_engine_cs *engine) +static void guc_submit(struct intel_engine_cs *engine, + struct i915_request **out, + struct i915_request **end) { struct intel_guc *guc = &engine->i915->guc; - struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; - unsigned int n; + struct intel_guc_client *client = guc->execbuf_client; - for (n = 0; n < execlists_num_ports(execlists); n++) { - struct i915_request *rq; - unsigned int count; + spin_lock(&client->wq_lock); - rq = port_unpack(&port[n], &count); - if (rq && count == 0) { - port_set(&port[n], port_pack(rq, ++count)); + do { + struct i915_request *rq = *out++; - flush_ggtt_writes(rq->ring->vma); + flush_ggtt_writes(rq->ring->vma); + guc_add_request(guc, rq); + } while (out != end); - guc_add_request(guc, rq); - } - } -} - -static void port_assign(struct execlist_port *port, struct i915_request *rq) -{ - GEM_BUG_ON(port_isset(port)); - - port_set(port, i915_request_get(rq)); + spin_unlock(&client->wq_lock); } static inline int rq_prio(const struct i915_request *rq) { - return rq->sched.attr.priority; + return rq->sched.attr.priority | __NO_PREEMPTION; } -static inline int port_prio(const struct execlist_port *port) +static struct i915_request *schedule_in(struct i915_request *rq, int idx) { - return rq_prio(port_request(port)) | __NO_PREEMPTION; + trace_i915_request_in(rq, idx); + + if (!rq->hw_context->inflight) + rq->hw_context->inflight = rq->engine; + intel_context_inflight_inc(rq->hw_context); + + return i915_request_get(rq); } -static bool __guc_dequeue(struct intel_engine_cs *engine) +static void schedule_out(struct i915_request *rq) +{ + trace_i915_request_out(rq); + + intel_context_inflight_dec(rq->hw_context); + if (!intel_context_inflight_count(rq->hw_context)) + rq->hw_context->inflight = NULL; + + i915_request_put(rq); +} + +static void __guc_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; - struct i915_request *last = NULL; - const struct execlist_port * const last_port = - &execlists->port[execlists->port_mask]; + struct i915_request **first = execlists->inflight; + struct i915_request ** const last_port = first + execlists->port_mask; + struct i915_request *last = first[0]; + struct i915_request **port; bool submit = false; struct rb_node *rb; lockdep_assert_held(&engine->active.lock); - if (port_isset(port)) { + if (last) { if (intel_engine_has_preemption(engine)) { struct guc_preempt_work *preempt_work = &engine->i915->guc.preempt_work[engine->id]; int prio = execlists->queue_priority_hint; - if (i915_scheduler_need_preempt(prio, - port_prio(port))) { - execlists_set_active(execlists, - EXECLISTS_ACTIVE_PREEMPT); + if (i915_scheduler_need_preempt(prio, rq_prio(last))) { + intel_write_status_page(engine, + I915_GEM_HWS_PREEMPT, + GUC_PREEMPT_INPROGRESS); queue_work(engine->i915->guc.preempt_wq, &preempt_work->work); - return false; + return; } } - port++; - if (port_isset(port)) - return false; - } - GEM_BUG_ON(port_isset(port)); + if (*++first) + return; + last = NULL; + } + + port = first; while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -774,18 +774,15 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) if (port == last_port) goto done; - if (submit) - port_assign(port, last); + *port = schedule_in(last, + port - execlists->inflight); port++; } list_del_init(&rq->sched.link); - __i915_request_submit(rq); - trace_i915_request_in(rq, port_index(port, execlists)); - - last = rq; submit = true; + last = rq; } rb_erase_cached(&p->node, &execlists->queue); @@ -794,58 +791,41 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) done: execlists->queue_priority_hint = rb ? to_priolist(rb)->priority : INT_MIN; - if (submit) - port_assign(port, last); - if (last) - execlists_user_begin(execlists, execlists->port); - - /* We must always keep the beast fed if we have work piled up */ - GEM_BUG_ON(port_isset(execlists->port) && - !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); - GEM_BUG_ON(rb_first_cached(&execlists->queue) && - !port_isset(execlists->port)); - - return submit; -} - -static void guc_dequeue(struct intel_engine_cs *engine) -{ - if (__guc_dequeue(engine)) - guc_submit(engine); + if (submit) { + *port = schedule_in(last, port - execlists->inflight); + *++port = NULL; + guc_submit(engine, first, port); + } + execlists->active = execlists->inflight; } static void guc_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; - struct i915_request *rq; + struct i915_request **port, *rq; unsigned long flags; spin_lock_irqsave(&engine->active.lock, flags); - rq = port_request(port); - while (rq && i915_request_completed(rq)) { - trace_i915_request_out(rq); - i915_request_put(rq); + for (port = execlists->inflight; (rq = *port); port++) { + if (!i915_request_completed(rq)) + break; - port = execlists_port_complete(execlists, port); - if (port_isset(port)) { - execlists_user_begin(execlists, port); - rq = port_request(port); - } else { - execlists_user_end(execlists); - rq = NULL; - } + schedule_out(rq); + } + if (port != execlists->inflight) { + int idx = port - execlists->inflight; + int rem = ARRAY_SIZE(execlists->inflight) - idx; + memmove(execlists->inflight, port, rem * sizeof(*port)); } - if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) && - intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) == + if (intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) == GUC_PREEMPT_FINISHED) complete_preempt_context(engine); - if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) - guc_dequeue(engine); + if (!intel_read_status_page(engine, I915_GEM_HWS_PREEMPT)) + __guc_dequeue(engine); spin_unlock_irqrestore(&engine->active.lock, flags); } @@ -959,7 +939,6 @@ static void guc_cancel_requests(struct intel_engine_cs *engine) execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; - GEM_BUG_ON(port_isset(execlists->port)); spin_unlock_irqrestore(&engine->active.lock, flags); } @@ -1422,7 +1401,7 @@ int intel_guc_submission_enable(struct intel_guc *guc) * and it is guaranteed that it will remove the work item from the * queue before our request is completed. */ - BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.port) * + BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) * sizeof(struct guc_wq_item) * I915_NUM_ENGINES > GUC_WQ_SIZE); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 298bb7116c51..1a5b9e284ca9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -366,13 +366,15 @@ static int __igt_breadcrumbs_smoketest(void *arg) if (!wait_event_timeout(wait->wait, i915_sw_fence_done(wait), - HZ / 2)) { + 5 * HZ)) { struct i915_request *rq = requests[count - 1]; - pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n", - count, + pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n", + atomic_read(&wait->pending), count, rq->fence.context, rq->fence.seqno, t->engine->name); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(t->engine->i915); GEM_BUG_ON(!i915_request_completed(rq)); i915_sw_fence_wait(wait);