1
0
Fork 0

drm/i915/execlists: Use per-process HWSP as scratch

Some of our commands (MI_FLUSH_DW / PIPE_CONTROL) require a post-sync write
operation to be performed. Currently we're using dedicated VMA for
PIPE_CONTROL and global HWSP for MI_FLUSH_DW.
On execlists platforms, each of our contexts has an area that can be
used as scratch space. Let's use that instead.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190926133142.2838-2-chris@chris-wilson.co.uk
alistair/sunxi64-5.5-dsi
Michał Winiarski 2019-09-26 14:31:41 +01:00 committed by Chris Wilson
parent 5311f5171e
commit e123752374
3 changed files with 17 additions and 35 deletions

View File

@ -89,9 +89,6 @@ enum intel_gt_scratch_field {
/* 8 bytes */
INTEL_GT_SCRATCH_FIELD_DEFAULT = 0,
/* 8 bytes */
INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA = 128,
/* 8 bytes */
INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128,

View File

@ -2308,12 +2308,6 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
return batch;
}
static u32 slm_offset(struct intel_engine_cs *engine)
{
return intel_gt_scratch_offset(engine->gt,
INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA);
}
/*
* Typically we only have one indirect_ctx and per_ctx batch buffer which are
* initialized at the beginning and shared across all contexts but this field
@ -2342,10 +2336,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* Actual scratch location is at 128 bytes offset */
batch = gen8_emit_pipe_control(batch,
PIPE_CONTROL_FLUSH_L3 |
PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_STORE_DATA_INDEX |
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_QW_WRITE,
slm_offset(engine));
LRC_PPHWSP_SCRATCH_ADDR);
*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@ -3052,7 +3046,7 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
}
*cs++ = cmd;
*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
*cs++ = 0; /* upper addr */
*cs++ = 0; /* value */
intel_ring_advance(request, cs);
@ -3063,10 +3057,6 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
static int gen8_emit_flush_render(struct i915_request *request,
u32 mode)
{
struct intel_engine_cs *engine = request->engine;
u32 scratch_addr =
intel_gt_scratch_offset(engine->gt,
INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
bool vf_flush_wa = false, dc_flush_wa = false;
u32 *cs, flags = 0;
int len;
@ -3088,7 +3078,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
/*
* On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
@ -3121,7 +3111,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
0);
cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
if (dc_flush_wa)
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
@ -3134,11 +3124,6 @@ static int gen8_emit_flush_render(struct i915_request *request,
static int gen11_emit_flush_render(struct i915_request *request,
u32 mode)
{
struct intel_engine_cs *engine = request->engine;
const u32 scratch_addr =
intel_gt_scratch_offset(engine->gt,
INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
if (mode & EMIT_FLUSH) {
u32 *cs;
u32 flags = 0;
@ -3151,13 +3136,13 @@ static int gen11_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
cs = intel_ring_begin(request, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(request, cs);
}
@ -3175,13 +3160,13 @@ static int gen11_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
cs = intel_ring_begin(request, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(request, cs);
}
@ -3196,10 +3181,6 @@ static u32 preparser_disable(bool state)
static int gen12_emit_flush_render(struct i915_request *request,
u32 mode)
{
const u32 scratch_addr =
intel_gt_scratch_offset(request->engine->gt,
INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
if (mode & EMIT_FLUSH) {
u32 flags = 0;
u32 *cs;
@ -3210,7 +3191,7 @@ static int gen12_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_CS_STALL;
@ -3219,7 +3200,7 @@ static int gen12_emit_flush_render(struct i915_request *request,
if (IS_ERR(cs))
return PTR_ERR(cs);
cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(request, cs);
}
@ -3235,7 +3216,7 @@ static int gen12_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_CS_STALL;
@ -3251,7 +3232,7 @@ static int gen12_emit_flush_render(struct i915_request *request,
*/
*cs++ = preparser_disable(true);
cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
*cs++ = preparser_disable(false);
intel_ring_advance(request, cs);

View File

@ -104,6 +104,10 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine);
*/
#define LRC_HEADER_PAGES LRC_PPHWSP_PN
/* Space within PPHWSP reserved to be used as scratch */
#define LRC_PPHWSP_SCRATCH 0x34
#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32))
void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
void intel_lr_context_reset(struct intel_engine_cs *engine,